Commit | Line | Data |
---|---|---|
b5eafe92 JF |
1 | /* |
2 | * Xen leaves the responsibility for maintaining p2m mappings to the | |
3 | * guests themselves, but it must also access and update the p2m array | |
4 | * during suspend/resume when all the pages are reallocated. | |
5 | * | |
054954eb JG |
6 | * The logical flat p2m table is mapped to a linear kernel memory area. |
7 | * For accesses by Xen a three-level tree linked via mfns only is set up to | |
8 | * allow the address space to be sparse. | |
b5eafe92 | 9 | * |
054954eb JG |
10 | * Xen |
11 | * | | |
12 | * p2m_top_mfn | |
13 | * / \ | |
14 | * p2m_mid_mfn p2m_mid_mfn | |
15 | * / / | |
16 | * p2m p2m p2m ... | |
b5eafe92 JF |
17 | * |
18 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | |
19 | * | |
054954eb JG |
20 | * The p2m_top_mfn level is limited to 1 page, so the maximum representable |
21 | * pseudo-physical address space is: | |
b5eafe92 JF |
22 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages |
23 | * | |
24 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | |
25 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | |
a3118beb | 26 | * 512 and 1024 entries respectively. |
f4cec35b KRW |
27 | * |
28 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. | |
29 | * | |
30 | * However not all entries are filled with MFNs. Specifically for all other | |
31 | * leaf entries, or for the top root, or middle one, for which there is a void | |
32 | * entry, we assume it is "missing". So (for example) | |
33 | * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. | |
054954eb JG |
34 | * We have a dedicated page p2m_missing with all entries being |
35 | * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m | |
36 | * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. | |
f4cec35b KRW |
37 | * |
38 | * We also have the possibility of setting 1-1 mappings on certain regions, so | |
39 | * that: | |
40 | * pfn_to_mfn(0xc0000)=0xc0000 | |
41 | * | |
42 | * The benefit of this is, that we can assume for non-RAM regions (think | |
3cb83e46 | 43 | * PCI BARs, or ACPI spaces), we can create mappings easily because we |
f4cec35b KRW |
44 | * get the PFN value to match the MFN. |
45 | * | |
054954eb JG |
46 | * For this to work efficiently we have one new page p2m_identity. All entries |
47 | * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only | |
48 | * recognizes that and MFNs, no other fancy value). | |
f4cec35b KRW |
49 | * |
50 | * On lookup we spot that the entry points to p2m_identity and return the | |
51 | * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. | |
52 | * If the entry points to an allocated page, we just proceed as before and | |
054954eb | 53 | * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in |
f4cec35b KRW |
54 | * appropriate functions (pfn_to_mfn). |
55 | * | |
56 | * The reason for having the IDENTITY_FRAME_BIT instead of just returning the | |
57 | * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a | |
58 | * non-identity pfn. To protect ourselves against we elect to set (and get) the | |
59 | * IDENTITY_FRAME_BIT on all identity mapped PFNs. | |
b5eafe92 JF |
60 | */ |
61 | ||
62 | #include <linux/init.h> | |
63 | #include <linux/module.h> | |
448f2831 JF |
64 | #include <linux/list.h> |
65 | #include <linux/hash.h> | |
87f1d40a | 66 | #include <linux/sched.h> |
2222e71b | 67 | #include <linux/seq_file.h> |
2c185687 | 68 | #include <linux/bootmem.h> |
7108c9ce | 69 | #include <linux/slab.h> |
b5eafe92 JF |
70 | |
71 | #include <asm/cache.h> | |
72 | #include <asm/setup.h> | |
2e917175 | 73 | #include <asm/uaccess.h> |
b5eafe92 JF |
74 | |
75 | #include <asm/xen/page.h> | |
76 | #include <asm/xen/hypercall.h> | |
77 | #include <asm/xen/hypervisor.h> | |
ee072640 | 78 | #include <xen/balloon.h> |
0930bba6 | 79 | #include <xen/grant_table.h> |
b5eafe92 | 80 | |
4fbb67e3 | 81 | #include "p2m.h" |
0930bba6 | 82 | #include "multicalls.h" |
b5eafe92 JF |
83 | #include "xen-ops.h" |
84 | ||
054954eb JG |
85 | #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) |
86 | ||
5b8e7d80 JG |
87 | unsigned long *xen_p2m_addr __read_mostly; |
88 | EXPORT_SYMBOL_GPL(xen_p2m_addr); | |
89 | unsigned long xen_p2m_size __read_mostly; | |
90 | EXPORT_SYMBOL_GPL(xen_p2m_size); | |
b5eafe92 | 91 | unsigned long xen_max_p2m_pfn __read_mostly; |
5b8e7d80 | 92 | EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); |
b5eafe92 | 93 | |
054954eb JG |
94 | static DEFINE_SPINLOCK(p2m_update_lock); |
95 | ||
2c185687 JG |
96 | static unsigned long *p2m_mid_missing_mfn; |
97 | static unsigned long *p2m_top_mfn; | |
98 | static unsigned long **p2m_top_mfn_p; | |
054954eb JG |
99 | static unsigned long *p2m_missing; |
100 | static unsigned long *p2m_identity; | |
101 | static pte_t *p2m_missing_pte; | |
102 | static pte_t *p2m_identity_pte; | |
7108c9ce | 103 | |
b5eafe92 JF |
104 | static inline unsigned p2m_top_index(unsigned long pfn) |
105 | { | |
106 | BUG_ON(pfn >= MAX_P2M_PFN); | |
107 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | |
108 | } | |
109 | ||
110 | static inline unsigned p2m_mid_index(unsigned long pfn) | |
111 | { | |
112 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | |
113 | } | |
114 | ||
115 | static inline unsigned p2m_index(unsigned long pfn) | |
116 | { | |
117 | return pfn % P2M_PER_PAGE; | |
118 | } | |
119 | ||
b5eafe92 JF |
120 | static void p2m_top_mfn_init(unsigned long *top) |
121 | { | |
122 | unsigned i; | |
123 | ||
124 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | |
125 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | |
126 | } | |
127 | ||
128 | static void p2m_top_mfn_p_init(unsigned long **top) | |
129 | { | |
130 | unsigned i; | |
131 | ||
132 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | |
133 | top[i] = p2m_mid_missing_mfn; | |
134 | } | |
135 | ||
054954eb | 136 | static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) |
b5eafe92 JF |
137 | { |
138 | unsigned i; | |
139 | ||
140 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | |
054954eb | 141 | mid[i] = virt_to_mfn(leaf); |
b5eafe92 JF |
142 | } |
143 | ||
054954eb | 144 | static void p2m_init(unsigned long *p2m) |
b5eafe92 JF |
145 | { |
146 | unsigned i; | |
147 | ||
054954eb JG |
148 | for (i = 0; i < P2M_PER_PAGE; i++) |
149 | p2m[i] = INVALID_P2M_ENTRY; | |
b5eafe92 JF |
150 | } |
151 | ||
054954eb | 152 | static void p2m_init_identity(unsigned long *p2m, unsigned long pfn) |
b5eafe92 JF |
153 | { |
154 | unsigned i; | |
155 | ||
054954eb JG |
156 | for (i = 0; i < P2M_PER_PAGE; i++) |
157 | p2m[i] = IDENTITY_FRAME(pfn + i); | |
b5eafe92 JF |
158 | } |
159 | ||
7108c9ce JG |
160 | static void * __ref alloc_p2m_page(void) |
161 | { | |
7108c9ce JG |
162 | if (unlikely(!slab_is_available())) |
163 | return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | |
164 | ||
165 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | |
166 | } | |
167 | ||
701a261a | 168 | static void __ref free_p2m_page(void *p) |
7108c9ce | 169 | { |
701a261a BO |
170 | if (unlikely(!slab_is_available())) { |
171 | free_bootmem((unsigned long)p, PAGE_SIZE); | |
172 | return; | |
173 | } | |
174 | ||
7108c9ce JG |
175 | free_page((unsigned long)p); |
176 | } | |
177 | ||
b5eafe92 JF |
178 | /* |
179 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | |
180 | * | |
181 | * This is called both at boot time, and after resuming from suspend: | |
2c185687 | 182 | * - At boot time we're called rather early, and must use alloc_bootmem*() |
b5eafe92 JF |
183 | * to allocate memory. |
184 | * | |
185 | * - After resume we're called from within stop_machine, but the mfn | |
2c185687 | 186 | * tree should already be completely allocated. |
b5eafe92 | 187 | */ |
44b46c3e | 188 | void __ref xen_build_mfn_list_list(void) |
b5eafe92 | 189 | { |
054954eb JG |
190 | unsigned long pfn, mfn; |
191 | pte_t *ptep; | |
192 | unsigned int level, topidx, mididx; | |
193 | unsigned long *mid_mfn_p; | |
b5eafe92 | 194 | |
696fd7c5 KRW |
195 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
196 | return; | |
197 | ||
b5eafe92 JF |
198 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
199 | if (p2m_top_mfn == NULL) { | |
7108c9ce | 200 | p2m_mid_missing_mfn = alloc_p2m_page(); |
3cb83e46 | 201 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
b5eafe92 | 202 | |
7108c9ce | 203 | p2m_top_mfn_p = alloc_p2m_page(); |
b5eafe92 JF |
204 | p2m_top_mfn_p_init(p2m_top_mfn_p); |
205 | ||
7108c9ce | 206 | p2m_top_mfn = alloc_p2m_page(); |
b5eafe92 JF |
207 | p2m_top_mfn_init(p2m_top_mfn); |
208 | } else { | |
209 | /* Reinitialise, mfn's all change after migration */ | |
3cb83e46 | 210 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
b5eafe92 JF |
211 | } |
212 | ||
054954eb JG |
213 | for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN; |
214 | pfn += P2M_PER_PAGE) { | |
215 | topidx = p2m_top_index(pfn); | |
216 | mididx = p2m_mid_index(pfn); | |
b5eafe92 | 217 | |
b5eafe92 | 218 | mid_mfn_p = p2m_top_mfn_p[topidx]; |
054954eb JG |
219 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), |
220 | &level); | |
221 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
222 | mfn = pte_mfn(*ptep); | |
223 | ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | |
b5eafe92 JF |
224 | |
225 | /* Don't bother allocating any mfn mid levels if | |
226 | * they're just missing, just update the stored mfn, | |
227 | * since all could have changed over a migrate. | |
228 | */ | |
054954eb | 229 | if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) { |
b5eafe92 JF |
230 | BUG_ON(mididx); |
231 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | |
232 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | |
233 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | |
234 | continue; | |
235 | } | |
236 | ||
237 | if (mid_mfn_p == p2m_mid_missing_mfn) { | |
7108c9ce | 238 | mid_mfn_p = alloc_p2m_page(); |
3cb83e46 | 239 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
b5eafe92 JF |
240 | |
241 | p2m_top_mfn_p[topidx] = mid_mfn_p; | |
242 | } | |
243 | ||
244 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | |
054954eb | 245 | mid_mfn_p[mididx] = mfn; |
b5eafe92 JF |
246 | } |
247 | } | |
248 | ||
249 | void xen_setup_mfn_list_list(void) | |
250 | { | |
4dd322bc MR |
251 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
252 | return; | |
253 | ||
b5eafe92 JF |
254 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
255 | ||
256 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | |
257 | virt_to_mfn(p2m_top_mfn); | |
258 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | |
259 | } | |
260 | ||
261 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | |
262 | void __init xen_build_dynamic_phys_to_machine(void) | |
263 | { | |
b5eafe92 JF |
264 | unsigned long pfn; |
265 | ||
696fd7c5 KRW |
266 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
267 | return; | |
268 | ||
5b8e7d80 | 269 | xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; |
054954eb | 270 | xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE); |
b5eafe92 | 271 | |
054954eb JG |
272 | for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++) |
273 | xen_p2m_addr[pfn] = INVALID_P2M_ENTRY; | |
b5eafe92 | 274 | |
054954eb JG |
275 | xen_max_p2m_pfn = xen_p2m_size; |
276 | } | |
b5eafe92 | 277 | |
054954eb JG |
278 | #define P2M_TYPE_IDENTITY 0 |
279 | #define P2M_TYPE_MISSING 1 | |
280 | #define P2M_TYPE_PFN 2 | |
281 | #define P2M_TYPE_UNKNOWN 3 | |
b5eafe92 | 282 | |
054954eb JG |
283 | static int xen_p2m_elem_type(unsigned long pfn) |
284 | { | |
285 | unsigned long mfn; | |
b5eafe92 | 286 | |
054954eb JG |
287 | if (pfn >= xen_p2m_size) |
288 | return P2M_TYPE_IDENTITY; | |
b5eafe92 | 289 | |
054954eb | 290 | mfn = xen_p2m_addr[pfn]; |
b5eafe92 | 291 | |
054954eb JG |
292 | if (mfn == INVALID_P2M_ENTRY) |
293 | return P2M_TYPE_MISSING; | |
cf04d120 | 294 | |
054954eb JG |
295 | if (mfn & IDENTITY_FRAME_BIT) |
296 | return P2M_TYPE_IDENTITY; | |
297 | ||
298 | return P2M_TYPE_PFN; | |
b5eafe92 | 299 | } |
054954eb JG |
300 | |
301 | static void __init xen_rebuild_p2m_list(unsigned long *p2m) | |
357a3cfb | 302 | { |
054954eb | 303 | unsigned int i, chunk; |
357a3cfb | 304 | unsigned long pfn; |
054954eb JG |
305 | unsigned long *mfns; |
306 | pte_t *ptep; | |
307 | pmd_t *pmdp; | |
308 | int type; | |
357a3cfb | 309 | |
054954eb JG |
310 | p2m_missing = alloc_p2m_page(); |
311 | p2m_init(p2m_missing); | |
312 | p2m_identity = alloc_p2m_page(); | |
313 | p2m_init(p2m_identity); | |
b5eafe92 | 314 | |
054954eb JG |
315 | p2m_missing_pte = alloc_p2m_page(); |
316 | paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); | |
317 | p2m_identity_pte = alloc_p2m_page(); | |
318 | paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); | |
319 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
320 | set_pte(p2m_missing_pte + i, | |
2e917175 | 321 | pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO)); |
054954eb | 322 | set_pte(p2m_identity_pte + i, |
2e917175 | 323 | pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO)); |
054954eb | 324 | } |
357a3cfb | 325 | |
054954eb JG |
326 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { |
327 | /* | |
328 | * Try to map missing/identity PMDs or p2m-pages if possible. | |
329 | * We have to respect the structure of the mfn_list_list | |
330 | * which will be built just afterwards. | |
331 | * Chunk size to test is one p2m page if we are in the middle | |
332 | * of a mfn_list_list mid page and the complete mid page area | |
333 | * if we are at index 0 of the mid page. Please note that a | |
334 | * mid page might cover more than one PMD, e.g. on 32 bit PAE | |
335 | * kernels. | |
336 | */ | |
337 | chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? | |
338 | P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; | |
339 | ||
340 | type = xen_p2m_elem_type(pfn); | |
341 | i = 0; | |
342 | if (type != P2M_TYPE_PFN) | |
343 | for (i = 1; i < chunk; i++) | |
344 | if (xen_p2m_elem_type(pfn + i) != type) | |
345 | break; | |
346 | if (i < chunk) | |
347 | /* Reset to minimal chunk size. */ | |
348 | chunk = P2M_PER_PAGE; | |
349 | ||
350 | if (type == P2M_TYPE_PFN || i < chunk) { | |
351 | /* Use initial p2m page contents. */ | |
352 | #ifdef CONFIG_X86_64 | |
353 | mfns = alloc_p2m_page(); | |
354 | copy_page(mfns, xen_p2m_addr + pfn); | |
355 | #else | |
356 | mfns = xen_p2m_addr + pfn; | |
357 | #endif | |
358 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | |
359 | set_pte(ptep, | |
360 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); | |
357a3cfb | 361 | continue; |
054954eb | 362 | } |
b5eafe92 | 363 | |
054954eb JG |
364 | if (chunk == P2M_PER_PAGE) { |
365 | /* Map complete missing or identity p2m-page. */ | |
366 | mfns = (type == P2M_TYPE_MISSING) ? | |
367 | p2m_missing : p2m_identity; | |
368 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | |
369 | set_pte(ptep, | |
2e917175 | 370 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO)); |
357a3cfb | 371 | continue; |
054954eb | 372 | } |
357a3cfb | 373 | |
054954eb JG |
374 | /* Complete missing or identity PMD(s) can be mapped. */ |
375 | ptep = (type == P2M_TYPE_MISSING) ? | |
376 | p2m_missing_pte : p2m_identity_pte; | |
377 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
378 | pmdp = populate_extra_pmd( | |
82c92ed1 | 379 | (unsigned long)(p2m + pfn) + i * PMD_SIZE); |
054954eb JG |
380 | set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); |
381 | } | |
382 | } | |
383 | } | |
357a3cfb | 384 | |
054954eb JG |
385 | void __init xen_vmalloc_p2m_tree(void) |
386 | { | |
387 | static struct vm_struct vm; | |
357a3cfb | 388 | |
054954eb JG |
389 | vm.flags = VM_ALLOC; |
390 | vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn, | |
391 | PMD_SIZE * PMDS_PER_MID_PAGE); | |
392 | vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); | |
393 | pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); | |
3fc509fc | 394 | |
054954eb | 395 | xen_max_p2m_pfn = vm.size / sizeof(unsigned long); |
357a3cfb | 396 | |
054954eb | 397 | xen_rebuild_p2m_list(vm.addr); |
357a3cfb | 398 | |
054954eb | 399 | xen_p2m_addr = vm.addr; |
5b8e7d80 | 400 | xen_p2m_size = xen_max_p2m_pfn; |
5b8e7d80 JG |
401 | |
402 | xen_inv_extra_mem(); | |
357a3cfb | 403 | } |
054954eb | 404 | |
b5eafe92 JF |
405 | unsigned long get_phys_to_machine(unsigned long pfn) |
406 | { | |
054954eb JG |
407 | pte_t *ptep; |
408 | unsigned int level; | |
b5eafe92 | 409 | |
5b8e7d80 JG |
410 | if (unlikely(pfn >= xen_p2m_size)) { |
411 | if (pfn < xen_max_p2m_pfn) | |
412 | return xen_chk_extra_mem(pfn); | |
413 | ||
25b884a8 | 414 | return IDENTITY_FRAME(pfn); |
5b8e7d80 | 415 | } |
b5eafe92 | 416 | |
054954eb JG |
417 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
418 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
b5eafe92 | 419 | |
f4cec35b KRW |
420 | /* |
421 | * The INVALID_P2M_ENTRY is filled in both p2m_*identity | |
422 | * and in p2m_*missing, so returning the INVALID_P2M_ENTRY | |
423 | * would be wrong. | |
424 | */ | |
054954eb | 425 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
f4cec35b KRW |
426 | return IDENTITY_FRAME(pfn); |
427 | ||
054954eb | 428 | return xen_p2m_addr[pfn]; |
b5eafe92 JF |
429 | } |
430 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | |
431 | ||
054954eb JG |
432 | /* |
433 | * Allocate new pmd(s). It is checked whether the old pmd is still in place. | |
434 | * If not, nothing is changed. This is okay as the only reason for allocating | |
435 | * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual | |
436 | * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! | |
437 | */ | |
f241b0b8 | 438 | static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) |
054954eb JG |
439 | { |
440 | pte_t *ptechk; | |
054954eb JG |
441 | pte_t *pte_newpg[PMDS_PER_MID_PAGE]; |
442 | pmd_t *pmdp; | |
443 | unsigned int level; | |
444 | unsigned long flags; | |
445 | unsigned long vaddr; | |
446 | int i; | |
447 | ||
448 | /* Do all allocations first to bail out in error case. */ | |
449 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
450 | pte_newpg[i] = alloc_p2m_page(); | |
451 | if (!pte_newpg[i]) { | |
452 | for (i--; i >= 0; i--) | |
453 | free_p2m_page(pte_newpg[i]); | |
454 | ||
455 | return NULL; | |
456 | } | |
457 | } | |
458 | ||
459 | vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1); | |
460 | ||
461 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
462 | copy_page(pte_newpg[i], pte_pg); | |
463 | paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT); | |
464 | ||
465 | pmdp = lookup_pmd_address(vaddr); | |
466 | BUG_ON(!pmdp); | |
467 | ||
468 | spin_lock_irqsave(&p2m_update_lock, flags); | |
469 | ||
470 | ptechk = lookup_address(vaddr, &level); | |
471 | if (ptechk == pte_pg) { | |
472 | set_pmd(pmdp, | |
473 | __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); | |
054954eb JG |
474 | pte_newpg[i] = NULL; |
475 | } | |
476 | ||
477 | spin_unlock_irqrestore(&p2m_update_lock, flags); | |
478 | ||
479 | if (pte_newpg[i]) { | |
480 | paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT); | |
481 | free_p2m_page(pte_newpg[i]); | |
482 | } | |
483 | ||
484 | vaddr += PMD_SIZE; | |
485 | } | |
486 | ||
f241b0b8 | 487 | return lookup_address(addr, &level); |
054954eb JG |
488 | } |
489 | ||
a3118beb | 490 | /* |
b5eafe92 JF |
491 | * Fully allocate the p2m structure for a given pfn. We need to check |
492 | * that both the top and mid levels are allocated, and make sure the | |
493 | * parallel mfn tree is kept in sync. We may race with other cpus, so | |
494 | * the new pages are installed with cmpxchg; if we lose the race then | |
495 | * simply free the page we allocated and use the one that's there. | |
496 | */ | |
497 | static bool alloc_p2m(unsigned long pfn) | |
498 | { | |
499 | unsigned topidx, mididx; | |
b5eafe92 | 500 | unsigned long *top_mfn_p, *mid_mfn; |
054954eb JG |
501 | pte_t *ptep, *pte_pg; |
502 | unsigned int level; | |
503 | unsigned long flags; | |
504 | unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); | |
505 | unsigned long p2m_pfn; | |
b5eafe92 JF |
506 | |
507 | topidx = p2m_top_index(pfn); | |
508 | mididx = p2m_mid_index(pfn); | |
509 | ||
054954eb JG |
510 | ptep = lookup_address(addr, &level); |
511 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
512 | pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | |
b5eafe92 | 513 | |
054954eb JG |
514 | if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { |
515 | /* PMD level is missing, allocate a new one */ | |
f241b0b8 | 516 | ptep = alloc_p2m_pmd(addr, pte_pg); |
054954eb | 517 | if (!ptep) |
b5eafe92 | 518 | return false; |
b5eafe92 JF |
519 | } |
520 | ||
054954eb JG |
521 | if (p2m_top_mfn) { |
522 | top_mfn_p = &p2m_top_mfn[topidx]; | |
523 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); | |
b5eafe92 | 524 | |
054954eb | 525 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); |
b5eafe92 | 526 | |
054954eb JG |
527 | if (mid_mfn == p2m_mid_missing_mfn) { |
528 | /* Separately check the mid mfn level */ | |
529 | unsigned long missing_mfn; | |
530 | unsigned long mid_mfn_mfn; | |
531 | unsigned long old_mfn; | |
b5eafe92 | 532 | |
054954eb JG |
533 | mid_mfn = alloc_p2m_page(); |
534 | if (!mid_mfn) | |
535 | return false; | |
b5eafe92 | 536 | |
054954eb | 537 | p2m_mid_mfn_init(mid_mfn, p2m_missing); |
b5eafe92 | 538 | |
054954eb JG |
539 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); |
540 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | |
541 | old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); | |
542 | if (old_mfn != missing_mfn) { | |
543 | free_p2m_page(mid_mfn); | |
544 | mid_mfn = mfn_to_virt(old_mfn); | |
545 | } else { | |
546 | p2m_top_mfn_p[topidx] = mid_mfn; | |
547 | } | |
239af7c7 | 548 | } |
054954eb JG |
549 | } else { |
550 | mid_mfn = NULL; | |
b5eafe92 JF |
551 | } |
552 | ||
1760f1eb | 553 | p2m_pfn = pte_pfn(READ_ONCE(*ptep)); |
054954eb JG |
554 | if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || |
555 | p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { | |
b5eafe92 JF |
556 | /* p2m leaf page is missing */ |
557 | unsigned long *p2m; | |
558 | ||
559 | p2m = alloc_p2m_page(); | |
560 | if (!p2m) | |
561 | return false; | |
562 | ||
054954eb JG |
563 | if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) |
564 | p2m_init(p2m); | |
565 | else | |
566 | p2m_init_identity(p2m, pfn); | |
567 | ||
568 | spin_lock_irqsave(&p2m_update_lock, flags); | |
569 | ||
570 | if (pte_pfn(*ptep) == p2m_pfn) { | |
571 | set_pte(ptep, | |
572 | pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); | |
573 | if (mid_mfn) | |
574 | mid_mfn[mididx] = virt_to_mfn(p2m); | |
575 | p2m = NULL; | |
576 | } | |
577 | ||
578 | spin_unlock_irqrestore(&p2m_update_lock, flags); | |
b5eafe92 | 579 | |
054954eb | 580 | if (p2m) |
b5eafe92 | 581 | free_p2m_page(p2m); |
b5eafe92 JF |
582 | } |
583 | ||
584 | return true; | |
585 | } | |
586 | ||
b83c6e55 | 587 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
f4cec35b KRW |
588 | unsigned long pfn_e) |
589 | { | |
590 | unsigned long pfn; | |
591 | ||
5b8e7d80 | 592 | if (unlikely(pfn_s >= xen_p2m_size)) |
f4cec35b KRW |
593 | return 0; |
594 | ||
595 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | |
596 | return pfn_e - pfn_s; | |
597 | ||
598 | if (pfn_s > pfn_e) | |
599 | return 0; | |
600 | ||
5b8e7d80 JG |
601 | if (pfn_e > xen_p2m_size) |
602 | pfn_e = xen_p2m_size; | |
f4cec35b | 603 | |
5b8e7d80 JG |
604 | for (pfn = pfn_s; pfn < pfn_e; pfn++) |
605 | xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn); | |
f4cec35b KRW |
606 | |
607 | return pfn - pfn_s; | |
608 | } | |
609 | ||
b5eafe92 JF |
610 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
611 | { | |
054954eb JG |
612 | pte_t *ptep; |
613 | unsigned int level; | |
b5eafe92 | 614 | |
2f558d40 SS |
615 | /* don't track P2M changes in autotranslate guests */ |
616 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | |
6eaa412f | 617 | return true; |
2f558d40 | 618 | |
5b8e7d80 | 619 | if (unlikely(pfn >= xen_p2m_size)) { |
b5eafe92 JF |
620 | BUG_ON(mfn != INVALID_P2M_ENTRY); |
621 | return true; | |
622 | } | |
623 | ||
90fff3ea | 624 | if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) |
2e917175 JG |
625 | return true; |
626 | ||
054954eb JG |
627 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
628 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
f4cec35b | 629 | |
054954eb | 630 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing))) |
b5eafe92 JF |
631 | return mfn == INVALID_P2M_ENTRY; |
632 | ||
054954eb JG |
633 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
634 | return mfn == IDENTITY_FRAME(pfn); | |
635 | ||
2e917175 | 636 | return false; |
b5eafe92 JF |
637 | } |
638 | ||
639 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |
640 | { | |
054954eb | 641 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
b5eafe92 JF |
642 | if (!alloc_p2m(pfn)) |
643 | return false; | |
644 | ||
054954eb | 645 | return __set_phys_to_machine(pfn, mfn); |
b5eafe92 JF |
646 | } |
647 | ||
648 | return true; | |
649 | } | |
448f2831 | 650 | |
820c4db2 JG |
651 | int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, |
652 | struct gnttab_map_grant_ref *kmap_ops, | |
653 | struct page **pages, unsigned int count) | |
1429d46d ZK |
654 | { |
655 | int i, ret = 0; | |
820c4db2 | 656 | pte_t *pte; |
1429d46d ZK |
657 | |
658 | if (xen_feature(XENFEAT_auto_translated_physmap)) | |
659 | return 0; | |
660 | ||
0bb599fd DV |
661 | if (kmap_ops) { |
662 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, | |
663 | kmap_ops, count); | |
664 | if (ret) | |
665 | goto out; | |
1429d46d ZK |
666 | } |
667 | ||
668 | for (i = 0; i < count; i++) { | |
820c4db2 | 669 | unsigned long mfn, pfn; |
1429d46d | 670 | |
820c4db2 JG |
671 | /* Do not add to override if the map failed. */ |
672 | if (map_ops[i].status) | |
673 | continue; | |
674 | ||
675 | if (map_ops[i].flags & GNTMAP_contains_pte) { | |
676 | pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | |
677 | (map_ops[i].host_addr & ~PAGE_MASK)); | |
678 | mfn = pte_mfn(*pte); | |
679 | } else { | |
680 | mfn = PFN_DOWN(map_ops[i].dev_bus_addr); | |
1429d46d | 681 | } |
820c4db2 | 682 | pfn = page_to_pfn(pages[i]); |
1429d46d | 683 | |
0ae65f49 JH |
684 | WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); |
685 | ||
820c4db2 JG |
686 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { |
687 | ret = -ENOMEM; | |
1429d46d | 688 | goto out; |
820c4db2 | 689 | } |
1429d46d ZK |
690 | } |
691 | ||
692 | out: | |
1429d46d ZK |
693 | return ret; |
694 | } | |
820c4db2 | 695 | EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); |
1429d46d | 696 | |
820c4db2 | 697 | int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, |
853d0289 | 698 | struct gnttab_unmap_grant_ref *kunmap_ops, |
820c4db2 | 699 | struct page **pages, unsigned int count) |
448f2831 | 700 | { |
820c4db2 | 701 | int i, ret = 0; |
448f2831 | 702 | |
820c4db2 JG |
703 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
704 | return 0; | |
448f2831 | 705 | |
820c4db2 | 706 | for (i = 0; i < count; i++) { |
0aad5689 | 707 | unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); |
820c4db2 JG |
708 | unsigned long pfn = page_to_pfn(pages[i]); |
709 | ||
710 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { | |
711 | ret = -EINVAL; | |
712 | goto out; | |
448f2831 | 713 | } |
448f2831 | 714 | |
0ae65f49 | 715 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
820c4db2 | 716 | } |
0bb599fd DV |
717 | if (kunmap_ops) |
718 | ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, | |
719 | kunmap_ops, count); | |
820c4db2 | 720 | out: |
448f2831 JF |
721 | return ret; |
722 | } | |
820c4db2 | 723 | EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); |
448f2831 | 724 | |
2222e71b | 725 | #ifdef CONFIG_XEN_DEBUG_FS |
a867db10 KRW |
726 | #include <linux/debugfs.h> |
727 | #include "debugfs.h" | |
728 | static int p2m_dump_show(struct seq_file *m, void *v) | |
2222e71b | 729 | { |
a491dbef | 730 | static const char * const type_name[] = { |
054954eb JG |
731 | [P2M_TYPE_IDENTITY] = "identity", |
732 | [P2M_TYPE_MISSING] = "missing", | |
733 | [P2M_TYPE_PFN] = "pfn", | |
734 | [P2M_TYPE_UNKNOWN] = "abnormal"}; | |
735 | unsigned long pfn, first_pfn; | |
736 | int type, prev_type; | |
737 | ||
738 | prev_type = xen_p2m_elem_type(0); | |
739 | first_pfn = 0; | |
740 | ||
741 | for (pfn = 0; pfn < xen_p2m_size; pfn++) { | |
742 | type = xen_p2m_elem_type(pfn); | |
743 | if (type != prev_type) { | |
744 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, | |
745 | type_name[prev_type]); | |
2222e71b | 746 | prev_type = type; |
054954eb | 747 | first_pfn = pfn; |
2222e71b KRW |
748 | } |
749 | } | |
054954eb JG |
750 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, |
751 | type_name[prev_type]); | |
2222e71b | 752 | return 0; |
2222e71b | 753 | } |
a867db10 KRW |
754 | |
755 | static int p2m_dump_open(struct inode *inode, struct file *filp) | |
756 | { | |
757 | return single_open(filp, p2m_dump_show, NULL); | |
758 | } | |
759 | ||
760 | static const struct file_operations p2m_dump_fops = { | |
761 | .open = p2m_dump_open, | |
762 | .read = seq_read, | |
763 | .llseek = seq_lseek, | |
764 | .release = single_release, | |
765 | }; | |
766 | ||
767 | static struct dentry *d_mmu_debug; | |
768 | ||
769 | static int __init xen_p2m_debugfs(void) | |
770 | { | |
771 | struct dentry *d_xen = xen_init_debugfs(); | |
772 | ||
773 | if (d_xen == NULL) | |
774 | return -ENOMEM; | |
775 | ||
776 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); | |
777 | ||
778 | debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); | |
779 | return 0; | |
780 | } | |
781 | fs_initcall(xen_p2m_debugfs); | |
782 | #endif /* CONFIG_XEN_DEBUG_FS */ |