xen: add configurable max domain size
[deliverable/linux.git] / arch / x86 / xen / mmu.c
CommitLineData
3b827c1b
JF
1/*
2 * Xen mmu operations
3 *
4 * This file contains the various mmu fetch and update operations.
5 * The most important job they must perform is the mapping between the
6 * domain's pfn and the overall machine mfns.
7 *
8 * Xen allows guests to directly update the pagetable, in a controlled
9 * fashion. In other words, the guest modifies the same pagetable
10 * that the CPU actually uses, which eliminates the overhead of having
11 * a separate shadow pagetable.
12 *
13 * In order to allow this, it falls on the guest domain to map its
14 * notion of a "physical" pfn - which is just a domain-local linear
15 * address - into a real "machine address" which the CPU's MMU can
16 * use.
17 *
18 * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be
19 * inserted directly into the pagetable. When creating a new
20 * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely,
21 * when reading the content back with __(pgd|pmd|pte)_val, it converts
22 * the mfn back into a pfn.
23 *
24 * The other constraint is that all pages which make up a pagetable
25 * must be mapped read-only in the guest. This prevents uncontrolled
26 * guest updates to the pagetable. Xen strictly enforces this, and
27 * will disallow any pagetable update which will end up mapping a
28 * pagetable page RW, and will disallow using any writable page as a
29 * pagetable.
30 *
31 * Naively, when loading %cr3 with the base of a new pagetable, Xen
32 * would need to validate the whole pagetable before going on.
33 * Naturally, this is quite slow. The solution is to "pin" a
34 * pagetable, which enforces all the constraints on the pagetable even
35 * when it is not actively in use. This menas that Xen can be assured
36 * that it is still valid when you do load it into %cr3, and doesn't
37 * need to revalidate it.
38 *
39 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
40 */
f120f13e 41#include <linux/sched.h>
f4f97b3e 42#include <linux/highmem.h>
3b827c1b 43#include <linux/bug.h>
3b827c1b
JF
44
45#include <asm/pgtable.h>
46#include <asm/tlbflush.h>
47#include <asm/mmu_context.h>
f4f97b3e 48#include <asm/paravirt.h>
3b827c1b
JF
49
50#include <asm/xen/hypercall.h>
f4f97b3e 51#include <asm/xen/hypervisor.h>
3b827c1b
JF
52
53#include <xen/page.h>
54#include <xen/interface/xen.h>
55
f4f97b3e 56#include "multicalls.h"
3b827c1b
JF
57#include "mmu.h"
58
d451bb7a
JF
59#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
60
8006ec3e 61static unsigned long *p2m_top[MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE];
d451bb7a
JF
62
63static inline unsigned p2m_top_index(unsigned long pfn)
64{
8006ec3e 65 BUG_ON(pfn >= MAX_DOMAIN_PAGES);
d451bb7a
JF
66 return pfn / P2M_ENTRIES_PER_PAGE;
67}
68
69static inline unsigned p2m_index(unsigned long pfn)
70{
71 return pfn % P2M_ENTRIES_PER_PAGE;
72}
73
74void __init xen_build_dynamic_phys_to_machine(void)
75{
76 unsigned pfn;
77 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
8006ec3e 78 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
d451bb7a 79
8006ec3e 80 for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
d451bb7a
JF
81 unsigned topidx = p2m_top_index(pfn);
82
83 p2m_top[topidx] = &mfn_list[pfn];
84 }
85}
86
87unsigned long get_phys_to_machine(unsigned long pfn)
88{
89 unsigned topidx, idx;
90
8006ec3e
JF
91 if (unlikely(pfn >= MAX_DOMAIN_PAGES))
92 return INVALID_P2M_ENTRY;
93
d451bb7a
JF
94 topidx = p2m_top_index(pfn);
95 if (p2m_top[topidx] == NULL)
96 return INVALID_P2M_ENTRY;
97
98 idx = p2m_index(pfn);
99 return p2m_top[topidx][idx];
100}
101
102static void alloc_p2m(unsigned long **pp)
103{
104 unsigned long *p;
105 unsigned i;
106
107 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
108 BUG_ON(p == NULL);
109
110 for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
111 p[i] = INVALID_P2M_ENTRY;
112
113 if (cmpxchg(pp, NULL, p) != NULL)
114 free_page((unsigned long)p);
115}
116
117void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
118{
119 unsigned topidx, idx;
120
121 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
122 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
8006ec3e
JF
123 return;
124 }
125
126 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
127 BUG_ON(mfn != INVALID_P2M_ENTRY);
d451bb7a
JF
128 return;
129 }
130
131 topidx = p2m_top_index(pfn);
132 if (p2m_top[topidx] == NULL) {
133 /* no need to allocate a page to store an invalid entry */
134 if (mfn == INVALID_P2M_ENTRY)
135 return;
136 alloc_p2m(&p2m_top[topidx]);
137 }
138
139 idx = p2m_index(pfn);
140 p2m_top[topidx][idx] = mfn;
141}
142
3b827c1b
JF
143xmaddr_t arbitrary_virt_to_machine(unsigned long address)
144{
da7bfc50 145 unsigned int level;
f0646e43 146 pte_t *pte = lookup_address(address, &level);
3b827c1b
JF
147 unsigned offset = address & PAGE_MASK;
148
149 BUG_ON(pte == NULL);
150
151 return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
152}
153
154void make_lowmem_page_readonly(void *vaddr)
155{
156 pte_t *pte, ptev;
157 unsigned long address = (unsigned long)vaddr;
da7bfc50 158 unsigned int level;
3b827c1b 159
f0646e43 160 pte = lookup_address(address, &level);
3b827c1b
JF
161 BUG_ON(pte == NULL);
162
163 ptev = pte_wrprotect(*pte);
164
165 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
166 BUG();
167}
168
169void make_lowmem_page_readwrite(void *vaddr)
170{
171 pte_t *pte, ptev;
172 unsigned long address = (unsigned long)vaddr;
da7bfc50 173 unsigned int level;
3b827c1b 174
f0646e43 175 pte = lookup_address(address, &level);
3b827c1b
JF
176 BUG_ON(pte == NULL);
177
178 ptev = pte_mkwrite(*pte);
179
180 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
181 BUG();
182}
183
184
3b827c1b
JF
185void xen_set_pmd(pmd_t *ptr, pmd_t val)
186{
d66bf8fc
JF
187 struct multicall_space mcs;
188 struct mmu_update *u;
3b827c1b 189
d66bf8fc
JF
190 preempt_disable();
191
192 mcs = xen_mc_entry(sizeof(*u));
193 u = mcs.args;
194 u->ptr = virt_to_machine(ptr).maddr;
195 u->val = pmd_val_ma(val);
196 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
197
198 xen_mc_issue(PARAVIRT_LAZY_MMU);
199
200 preempt_enable();
3b827c1b
JF
201}
202
3b827c1b
JF
203/*
204 * Associate a virtual page frame with a given physical page frame
205 * and protection flags for that frame.
206 */
207void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
208{
209 pgd_t *pgd;
210 pud_t *pud;
211 pmd_t *pmd;
212 pte_t *pte;
213
214 pgd = swapper_pg_dir + pgd_index(vaddr);
215 if (pgd_none(*pgd)) {
216 BUG();
217 return;
218 }
219 pud = pud_offset(pgd, vaddr);
220 if (pud_none(*pud)) {
221 BUG();
222 return;
223 }
224 pmd = pmd_offset(pud, vaddr);
225 if (pmd_none(*pmd)) {
226 BUG();
227 return;
228 }
229 pte = pte_offset_kernel(pmd, vaddr);
230 /* <mfn,flags> stored as-is, to permit clearing entries */
231 xen_set_pte(pte, mfn_pte(mfn, flags));
232
233 /*
234 * It's enough to flush this one mapping.
235 * (PGE mappings get flushed as well)
236 */
237 __flush_tlb_one(vaddr);
238}
239
240void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
241 pte_t *ptep, pte_t pteval)
242{
2bd50036
JF
243 /* updates to init_mm may be done without lock */
244 if (mm == &init_mm)
245 preempt_disable();
246
d66bf8fc 247 if (mm == current->mm || mm == &init_mm) {
8965c1c0 248 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
d66bf8fc
JF
249 struct multicall_space mcs;
250 mcs = xen_mc_entry(0);
251
252 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
253 xen_mc_issue(PARAVIRT_LAZY_MMU);
2bd50036 254 goto out;
d66bf8fc
JF
255 } else
256 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
2bd50036 257 goto out;
d66bf8fc
JF
258 }
259 xen_set_pte(ptep, pteval);
2bd50036
JF
260
261out:
262 if (mm == &init_mm)
263 preempt_enable();
3b827c1b
JF
264}
265
947a69c9
JF
266pteval_t xen_pte_val(pte_t pte)
267{
268 pteval_t ret = pte.pte;
269
270 if (ret & _PAGE_PRESENT)
271 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
272
273 return ret;
274}
275
276pgdval_t xen_pgd_val(pgd_t pgd)
277{
278 pgdval_t ret = pgd.pgd;
279 if (ret & _PAGE_PRESENT)
280 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
281 return ret;
282}
283
284pte_t xen_make_pte(pteval_t pte)
285{
286 if (pte & _PAGE_PRESENT) {
287 pte = phys_to_machine(XPADDR(pte)).maddr;
288 pte &= ~(_PAGE_PCD | _PAGE_PWT);
289 }
290
291 return (pte_t){ .pte = pte };
292}
293
294pgd_t xen_make_pgd(pgdval_t pgd)
295{
296 if (pgd & _PAGE_PRESENT)
297 pgd = phys_to_machine(XPADDR(pgd)).maddr;
298
299 return (pgd_t){ pgd };
300}
301
302pmdval_t xen_pmd_val(pmd_t pmd)
303{
304 pmdval_t ret = native_pmd_val(pmd);
305 if (ret & _PAGE_PRESENT)
306 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
307 return ret;
308}
3843fc25 309
f4f97b3e
JF
310void xen_set_pud(pud_t *ptr, pud_t val)
311{
d66bf8fc
JF
312 struct multicall_space mcs;
313 struct mmu_update *u;
f4f97b3e 314
d66bf8fc
JF
315 preempt_disable();
316
317 mcs = xen_mc_entry(sizeof(*u));
318 u = mcs.args;
319 u->ptr = virt_to_machine(ptr).maddr;
320 u->val = pud_val_ma(val);
321 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
322
323 xen_mc_issue(PARAVIRT_LAZY_MMU);
324
325 preempt_enable();
f4f97b3e
JF
326}
327
328void xen_set_pte(pte_t *ptep, pte_t pte)
329{
330 ptep->pte_high = pte.pte_high;
331 smp_wmb();
332 ptep->pte_low = pte.pte_low;
333}
334
3b827c1b
JF
335void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
336{
337 set_64bit((u64 *)ptep, pte_val_ma(pte));
338}
339
340void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
341{
342 ptep->pte_low = 0;
343 smp_wmb(); /* make sure low gets written first */
344 ptep->pte_high = 0;
345}
346
347void xen_pmd_clear(pmd_t *pmdp)
348{
349 xen_set_pmd(pmdp, __pmd(0));
350}
351
abf33038 352pmd_t xen_make_pmd(pmdval_t pmd)
3b827c1b 353{
430442e3 354 if (pmd & _PAGE_PRESENT)
3b827c1b
JF
355 pmd = phys_to_machine(XPADDR(pmd)).maddr;
356
947a69c9 357 return native_make_pmd(pmd);
3b827c1b 358}
3b827c1b 359
f4f97b3e
JF
360/*
361 (Yet another) pagetable walker. This one is intended for pinning a
362 pagetable. This means that it walks a pagetable and calls the
363 callback function on each page it finds making up the page table,
364 at every level. It walks the entire pagetable, but it only bothers
365 pinning pte pages which are below pte_limit. In the normal case
366 this will be TASK_SIZE, but at boot we need to pin up to
367 FIXADDR_TOP. But the important bit is that we don't pin beyond
368 there, because then we start getting into Xen's ptes.
369*/
74260714 370static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level),
f4f97b3e 371 unsigned long limit)
3b827c1b
JF
372{
373 pgd_t *pgd = pgd_base;
f4f97b3e
JF
374 int flush = 0;
375 unsigned long addr = 0;
376 unsigned long pgd_next;
377
378 BUG_ON(limit > FIXADDR_TOP);
3b827c1b
JF
379
380 if (xen_feature(XENFEAT_auto_translated_physmap))
f4f97b3e
JF
381 return 0;
382
383 for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) {
384 pud_t *pud;
385 unsigned long pud_limit, pud_next;
3b827c1b 386
f4f97b3e
JF
387 pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP);
388
389 if (!pgd_val(*pgd))
3b827c1b 390 continue;
f4f97b3e 391
3b827c1b
JF
392 pud = pud_offset(pgd, 0);
393
394 if (PTRS_PER_PUD > 1) /* not folded */
74260714 395 flush |= (*func)(virt_to_page(pud), PT_PUD);
f4f97b3e
JF
396
397 for (; addr != pud_limit; pud++, addr = pud_next) {
398 pmd_t *pmd;
399 unsigned long pmd_limit;
400
401 pud_next = pud_addr_end(addr, pud_limit);
402
403 if (pud_next < limit)
404 pmd_limit = pud_next;
405 else
406 pmd_limit = limit;
3b827c1b 407
3b827c1b
JF
408 if (pud_none(*pud))
409 continue;
f4f97b3e 410
3b827c1b
JF
411 pmd = pmd_offset(pud, 0);
412
413 if (PTRS_PER_PMD > 1) /* not folded */
74260714 414 flush |= (*func)(virt_to_page(pmd), PT_PMD);
f4f97b3e
JF
415
416 for (; addr != pmd_limit; pmd++) {
417 addr += (PAGE_SIZE * PTRS_PER_PTE);
418 if ((pmd_limit-1) < (addr-1)) {
419 addr = pmd_limit;
420 break;
421 }
3b827c1b 422
3b827c1b
JF
423 if (pmd_none(*pmd))
424 continue;
425
74260714 426 flush |= (*func)(pmd_page(*pmd), PT_PTE);
3b827c1b
JF
427 }
428 }
429 }
430
74260714 431 flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
f4f97b3e
JF
432
433 return flush;
3b827c1b
JF
434}
435
74260714
JF
436static spinlock_t *lock_pte(struct page *page)
437{
438 spinlock_t *ptl = NULL;
439
440#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
441 ptl = __pte_lockptr(page);
442 spin_lock(ptl);
443#endif
444
445 return ptl;
446}
447
448static void do_unlock(void *v)
449{
450 spinlock_t *ptl = v;
451 spin_unlock(ptl);
452}
453
454static void xen_do_pin(unsigned level, unsigned long pfn)
455{
456 struct mmuext_op *op;
457 struct multicall_space mcs;
458
459 mcs = __xen_mc_entry(sizeof(*op));
460 op = mcs.args;
461 op->cmd = level;
462 op->arg1.mfn = pfn_to_mfn(pfn);
463 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
464}
465
466static int pin_page(struct page *page, enum pt_level level)
f4f97b3e 467{
d60cd46b 468 unsigned pgfl = TestSetPagePinned(page);
f4f97b3e
JF
469 int flush;
470
471 if (pgfl)
472 flush = 0; /* already pinned */
473 else if (PageHighMem(page))
474 /* kmaps need flushing if we found an unpinned
475 highpage */
476 flush = 1;
477 else {
478 void *pt = lowmem_page_address(page);
479 unsigned long pfn = page_to_pfn(page);
480 struct multicall_space mcs = __xen_mc_entry(0);
74260714 481 spinlock_t *ptl;
f4f97b3e
JF
482
483 flush = 0;
484
74260714
JF
485 ptl = NULL;
486 if (level == PT_PTE)
487 ptl = lock_pte(page);
488
f4f97b3e
JF
489 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
490 pfn_pte(pfn, PAGE_KERNEL_RO),
74260714
JF
491 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
492
493 if (level == PT_PTE)
494 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
495
496 if (ptl) {
497 /* Queue a deferred unlock for when this batch
498 is completed. */
499 xen_mc_callback(do_unlock, ptl);
500 }
f4f97b3e
JF
501 }
502
503 return flush;
504}
3b827c1b 505
f4f97b3e
JF
506/* This is called just after a mm has been created, but it has not
507 been used yet. We need to make sure that its pagetable is all
508 read-only, and can be pinned. */
3b827c1b
JF
509void xen_pgd_pin(pgd_t *pgd)
510{
f4f97b3e 511 xen_mc_batch();
3b827c1b 512
f87e4cac
JF
513 if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
514 /* re-enable interrupts for kmap_flush_unused */
515 xen_mc_issue(0);
f4f97b3e 516 kmap_flush_unused();
f87e4cac
JF
517 xen_mc_batch();
518 }
f4f97b3e 519
3843fc25 520 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
f4f97b3e 521 xen_mc_issue(0);
3b827c1b
JF
522}
523
f4f97b3e
JF
524/* The init_mm pagetable is really pinned as soon as its created, but
525 that's before we have page structures to store the bits. So do all
526 the book-keeping now. */
74260714 527static __init int mark_pinned(struct page *page, enum pt_level level)
3b827c1b 528{
f4f97b3e
JF
529 SetPagePinned(page);
530 return 0;
531}
3b827c1b 532
f4f97b3e
JF
533void __init xen_mark_init_mm_pinned(void)
534{
535 pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
536}
3b827c1b 537
74260714 538static int unpin_page(struct page *page, enum pt_level level)
f4f97b3e 539{
d60cd46b 540 unsigned pgfl = TestClearPagePinned(page);
3b827c1b 541
f4f97b3e
JF
542 if (pgfl && !PageHighMem(page)) {
543 void *pt = lowmem_page_address(page);
544 unsigned long pfn = page_to_pfn(page);
74260714
JF
545 spinlock_t *ptl = NULL;
546 struct multicall_space mcs;
547
548 if (level == PT_PTE) {
549 ptl = lock_pte(page);
550
551 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
552 }
553
554 mcs = __xen_mc_entry(0);
f4f97b3e
JF
555
556 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
557 pfn_pte(pfn, PAGE_KERNEL),
74260714
JF
558 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
559
560 if (ptl) {
561 /* unlock when batch completed */
562 xen_mc_callback(do_unlock, ptl);
563 }
f4f97b3e
JF
564 }
565
566 return 0; /* never need to flush on unpin */
3b827c1b
JF
567}
568
f4f97b3e
JF
569/* Release a pagetables pages back as normal RW */
570static void xen_pgd_unpin(pgd_t *pgd)
571{
f4f97b3e
JF
572 xen_mc_batch();
573
74260714 574 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
f4f97b3e
JF
575
576 pgd_walk(pgd, unpin_page, TASK_SIZE);
577
578 xen_mc_issue(0);
579}
3b827c1b
JF
580
581void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
582{
f4f97b3e 583 spin_lock(&next->page_table_lock);
3b827c1b 584 xen_pgd_pin(next->pgd);
f4f97b3e 585 spin_unlock(&next->page_table_lock);
3b827c1b
JF
586}
587
588void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
589{
f4f97b3e 590 spin_lock(&mm->page_table_lock);
3b827c1b 591 xen_pgd_pin(mm->pgd);
f4f97b3e 592 spin_unlock(&mm->page_table_lock);
3b827c1b
JF
593}
594
3b827c1b 595
f87e4cac
JF
596#ifdef CONFIG_SMP
597/* Another cpu may still have their %cr3 pointing at the pagetable, so
598 we need to repoint it somewhere else before we can unpin it. */
599static void drop_other_mm_ref(void *info)
600{
601 struct mm_struct *mm = info;
3b827c1b 602
f87e4cac
JF
603 if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
604 leave_mm(smp_processor_id());
9f79991d
JF
605
606 /* If this cpu still has a stale cr3 reference, then make sure
607 it has been flushed. */
608 if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
609 load_cr3(swapper_pg_dir);
610 arch_flush_lazy_cpu_mode();
611 }
f87e4cac 612}
3b827c1b 613
f87e4cac
JF
614static void drop_mm_ref(struct mm_struct *mm)
615{
9f79991d
JF
616 cpumask_t mask;
617 unsigned cpu;
618
f87e4cac
JF
619 if (current->active_mm == mm) {
620 if (current->mm == mm)
621 load_cr3(swapper_pg_dir);
622 else
623 leave_mm(smp_processor_id());
9f79991d
JF
624 arch_flush_lazy_cpu_mode();
625 }
626
627 /* Get the "official" set of cpus referring to our pagetable. */
628 mask = mm->cpu_vm_mask;
629
630 /* It's possible that a vcpu may have a stale reference to our
631 cr3, because its in lazy mode, and it hasn't yet flushed
632 its set of pending hypercalls yet. In this case, we can
633 look at its actual current cr3 value, and force it to flush
634 if needed. */
635 for_each_online_cpu(cpu) {
636 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
637 cpu_set(cpu, mask);
3b827c1b
JF
638 }
639
9f79991d
JF
640 if (!cpus_empty(mask))
641 xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
f87e4cac
JF
642}
643#else
644static void drop_mm_ref(struct mm_struct *mm)
645{
646 if (current->active_mm == mm)
647 load_cr3(swapper_pg_dir);
648}
649#endif
650
651/*
652 * While a process runs, Xen pins its pagetables, which means that the
653 * hypervisor forces it to be read-only, and it controls all updates
654 * to it. This means that all pagetable updates have to go via the
655 * hypervisor, which is moderately expensive.
656 *
657 * Since we're pulling the pagetable down, we switch to use init_mm,
658 * unpin old process pagetable and mark it all read-write, which
659 * allows further operations on it to be simple memory accesses.
660 *
661 * The only subtle point is that another CPU may be still using the
662 * pagetable because of lazy tlb flushing. This means we need need to
663 * switch all CPUs off this pagetable before we can unpin it.
664 */
665void xen_exit_mmap(struct mm_struct *mm)
666{
667 get_cpu(); /* make sure we don't move around */
668 drop_mm_ref(mm);
669 put_cpu();
3b827c1b 670
f120f13e 671 spin_lock(&mm->page_table_lock);
df912ea4
JF
672
673 /* pgd may not be pinned in the error exit path of execve */
674 if (PagePinned(virt_to_page(mm->pgd)))
675 xen_pgd_unpin(mm->pgd);
74260714 676
f120f13e 677 spin_unlock(&mm->page_table_lock);
3b827c1b 678}
This page took 0.148599 seconds and 5 git commands to generate.