diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:16:51 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:16:51 -0400 |
commit | 9702785a747aa27baf46ff504beab6528f21f2dd (patch) | |
tree | ab69d6f802f5b680c33999dc089e44982c74595d /arch/x86/xen/mmu.c | |
parent | 334e621a01f86d5bc25e4f742e1eaae6e2d2a97a (diff) |
i386: move xen
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r-- | arch/x86/xen/mmu.c | 567 |
1 files changed, 567 insertions, 0 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c new file mode 100644 index 000000000000..874db0cd1d2a --- /dev/null +++ b/arch/x86/xen/mmu.c | |||
@@ -0,0 +1,567 @@ | |||
1 | /* | ||
2 | * Xen mmu operations | ||
3 | * | ||
4 | * This file contains the various mmu fetch and update operations. | ||
5 | * The most important job they must perform is the mapping between the | ||
6 | * domain's pfn and the overall machine mfns. | ||
7 | * | ||
8 | * Xen allows guests to directly update the pagetable, in a controlled | ||
9 | * fashion. In other words, the guest modifies the same pagetable | ||
10 | * that the CPU actually uses, which eliminates the overhead of having | ||
11 | * a separate shadow pagetable. | ||
12 | * | ||
13 | * In order to allow this, it falls on the guest domain to map its | ||
14 | * notion of a "physical" pfn - which is just a domain-local linear | ||
15 | * address - into a real "machine address" which the CPU's MMU can | ||
16 | * use. | ||
17 | * | ||
18 | * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be | ||
19 | * inserted directly into the pagetable. When creating a new | ||
20 | * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely, | ||
21 | * when reading the content back with __(pgd|pmd|pte)_val, it converts | ||
22 | * the mfn back into a pfn. | ||
23 | * | ||
24 | * The other constraint is that all pages which make up a pagetable | ||
25 | * must be mapped read-only in the guest. This prevents uncontrolled | ||
26 | * guest updates to the pagetable. Xen strictly enforces this, and | ||
27 | * will disallow any pagetable update which will end up mapping a | ||
28 | * pagetable page RW, and will disallow using any writable page as a | ||
29 | * pagetable. | ||
30 | * | ||
31 | * Naively, when loading %cr3 with the base of a new pagetable, Xen | ||
32 | * would need to validate the whole pagetable before going on. | ||
33 | * Naturally, this is quite slow. The solution is to "pin" a | ||
34 | * pagetable, which enforces all the constraints on the pagetable even | ||
35 | * when it is not actively in use. This menas that Xen can be assured | ||
36 | * that it is still valid when you do load it into %cr3, and doesn't | ||
37 | * need to revalidate it. | ||
38 | * | ||
39 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | ||
40 | */ | ||
41 | #include <linux/sched.h> | ||
42 | #include <linux/highmem.h> | ||
43 | #include <linux/bug.h> | ||
44 | #include <linux/sched.h> | ||
45 | |||
46 | #include <asm/pgtable.h> | ||
47 | #include <asm/tlbflush.h> | ||
48 | #include <asm/mmu_context.h> | ||
49 | #include <asm/paravirt.h> | ||
50 | |||
51 | #include <asm/xen/hypercall.h> | ||
52 | #include <asm/xen/hypervisor.h> | ||
53 | |||
54 | #include <xen/page.h> | ||
55 | #include <xen/interface/xen.h> | ||
56 | |||
57 | #include "multicalls.h" | ||
58 | #include "mmu.h" | ||
59 | |||
60 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) | ||
61 | { | ||
62 | pte_t *pte = lookup_address(address); | ||
63 | unsigned offset = address & PAGE_MASK; | ||
64 | |||
65 | BUG_ON(pte == NULL); | ||
66 | |||
67 | return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset); | ||
68 | } | ||
69 | |||
70 | void make_lowmem_page_readonly(void *vaddr) | ||
71 | { | ||
72 | pte_t *pte, ptev; | ||
73 | unsigned long address = (unsigned long)vaddr; | ||
74 | |||
75 | pte = lookup_address(address); | ||
76 | BUG_ON(pte == NULL); | ||
77 | |||
78 | ptev = pte_wrprotect(*pte); | ||
79 | |||
80 | if (HYPERVISOR_update_va_mapping(address, ptev, 0)) | ||
81 | BUG(); | ||
82 | } | ||
83 | |||
84 | void make_lowmem_page_readwrite(void *vaddr) | ||
85 | { | ||
86 | pte_t *pte, ptev; | ||
87 | unsigned long address = (unsigned long)vaddr; | ||
88 | |||
89 | pte = lookup_address(address); | ||
90 | BUG_ON(pte == NULL); | ||
91 | |||
92 | ptev = pte_mkwrite(*pte); | ||
93 | |||
94 | if (HYPERVISOR_update_va_mapping(address, ptev, 0)) | ||
95 | BUG(); | ||
96 | } | ||
97 | |||
98 | |||
99 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | ||
100 | { | ||
101 | struct multicall_space mcs; | ||
102 | struct mmu_update *u; | ||
103 | |||
104 | preempt_disable(); | ||
105 | |||
106 | mcs = xen_mc_entry(sizeof(*u)); | ||
107 | u = mcs.args; | ||
108 | u->ptr = virt_to_machine(ptr).maddr; | ||
109 | u->val = pmd_val_ma(val); | ||
110 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | ||
111 | |||
112 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
113 | |||
114 | preempt_enable(); | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * Associate a virtual page frame with a given physical page frame | ||
119 | * and protection flags for that frame. | ||
120 | */ | ||
121 | void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | ||
122 | { | ||
123 | pgd_t *pgd; | ||
124 | pud_t *pud; | ||
125 | pmd_t *pmd; | ||
126 | pte_t *pte; | ||
127 | |||
128 | pgd = swapper_pg_dir + pgd_index(vaddr); | ||
129 | if (pgd_none(*pgd)) { | ||
130 | BUG(); | ||
131 | return; | ||
132 | } | ||
133 | pud = pud_offset(pgd, vaddr); | ||
134 | if (pud_none(*pud)) { | ||
135 | BUG(); | ||
136 | return; | ||
137 | } | ||
138 | pmd = pmd_offset(pud, vaddr); | ||
139 | if (pmd_none(*pmd)) { | ||
140 | BUG(); | ||
141 | return; | ||
142 | } | ||
143 | pte = pte_offset_kernel(pmd, vaddr); | ||
144 | /* <mfn,flags> stored as-is, to permit clearing entries */ | ||
145 | xen_set_pte(pte, mfn_pte(mfn, flags)); | ||
146 | |||
147 | /* | ||
148 | * It's enough to flush this one mapping. | ||
149 | * (PGE mappings get flushed as well) | ||
150 | */ | ||
151 | __flush_tlb_one(vaddr); | ||
152 | } | ||
153 | |||
154 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
155 | pte_t *ptep, pte_t pteval) | ||
156 | { | ||
157 | if (mm == current->mm || mm == &init_mm) { | ||
158 | if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | ||
159 | struct multicall_space mcs; | ||
160 | mcs = xen_mc_entry(0); | ||
161 | |||
162 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); | ||
163 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
164 | return; | ||
165 | } else | ||
166 | if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) | ||
167 | return; | ||
168 | } | ||
169 | xen_set_pte(ptep, pteval); | ||
170 | } | ||
171 | |||
172 | #ifdef CONFIG_X86_PAE | ||
173 | void xen_set_pud(pud_t *ptr, pud_t val) | ||
174 | { | ||
175 | struct multicall_space mcs; | ||
176 | struct mmu_update *u; | ||
177 | |||
178 | preempt_disable(); | ||
179 | |||
180 | mcs = xen_mc_entry(sizeof(*u)); | ||
181 | u = mcs.args; | ||
182 | u->ptr = virt_to_machine(ptr).maddr; | ||
183 | u->val = pud_val_ma(val); | ||
184 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | ||
185 | |||
186 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
187 | |||
188 | preempt_enable(); | ||
189 | } | ||
190 | |||
191 | void xen_set_pte(pte_t *ptep, pte_t pte) | ||
192 | { | ||
193 | ptep->pte_high = pte.pte_high; | ||
194 | smp_wmb(); | ||
195 | ptep->pte_low = pte.pte_low; | ||
196 | } | ||
197 | |||
198 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
199 | { | ||
200 | set_64bit((u64 *)ptep, pte_val_ma(pte)); | ||
201 | } | ||
202 | |||
203 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
204 | { | ||
205 | ptep->pte_low = 0; | ||
206 | smp_wmb(); /* make sure low gets written first */ | ||
207 | ptep->pte_high = 0; | ||
208 | } | ||
209 | |||
210 | void xen_pmd_clear(pmd_t *pmdp) | ||
211 | { | ||
212 | xen_set_pmd(pmdp, __pmd(0)); | ||
213 | } | ||
214 | |||
215 | unsigned long long xen_pte_val(pte_t pte) | ||
216 | { | ||
217 | unsigned long long ret = 0; | ||
218 | |||
219 | if (pte.pte_low) { | ||
220 | ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low; | ||
221 | ret = machine_to_phys(XMADDR(ret)).paddr | 1; | ||
222 | } | ||
223 | |||
224 | return ret; | ||
225 | } | ||
226 | |||
227 | unsigned long long xen_pmd_val(pmd_t pmd) | ||
228 | { | ||
229 | unsigned long long ret = pmd.pmd; | ||
230 | if (ret) | ||
231 | ret = machine_to_phys(XMADDR(ret)).paddr | 1; | ||
232 | return ret; | ||
233 | } | ||
234 | |||
235 | unsigned long long xen_pgd_val(pgd_t pgd) | ||
236 | { | ||
237 | unsigned long long ret = pgd.pgd; | ||
238 | if (ret) | ||
239 | ret = machine_to_phys(XMADDR(ret)).paddr | 1; | ||
240 | return ret; | ||
241 | } | ||
242 | |||
243 | pte_t xen_make_pte(unsigned long long pte) | ||
244 | { | ||
245 | if (pte & 1) | ||
246 | pte = phys_to_machine(XPADDR(pte)).maddr; | ||
247 | |||
248 | return (pte_t){ pte, pte >> 32 }; | ||
249 | } | ||
250 | |||
251 | pmd_t xen_make_pmd(unsigned long long pmd) | ||
252 | { | ||
253 | if (pmd & 1) | ||
254 | pmd = phys_to_machine(XPADDR(pmd)).maddr; | ||
255 | |||
256 | return (pmd_t){ pmd }; | ||
257 | } | ||
258 | |||
259 | pgd_t xen_make_pgd(unsigned long long pgd) | ||
260 | { | ||
261 | if (pgd & _PAGE_PRESENT) | ||
262 | pgd = phys_to_machine(XPADDR(pgd)).maddr; | ||
263 | |||
264 | return (pgd_t){ pgd }; | ||
265 | } | ||
266 | #else /* !PAE */ | ||
267 | void xen_set_pte(pte_t *ptep, pte_t pte) | ||
268 | { | ||
269 | *ptep = pte; | ||
270 | } | ||
271 | |||
272 | unsigned long xen_pte_val(pte_t pte) | ||
273 | { | ||
274 | unsigned long ret = pte.pte_low; | ||
275 | |||
276 | if (ret & _PAGE_PRESENT) | ||
277 | ret = machine_to_phys(XMADDR(ret)).paddr; | ||
278 | |||
279 | return ret; | ||
280 | } | ||
281 | |||
282 | unsigned long xen_pgd_val(pgd_t pgd) | ||
283 | { | ||
284 | unsigned long ret = pgd.pgd; | ||
285 | if (ret) | ||
286 | ret = machine_to_phys(XMADDR(ret)).paddr | 1; | ||
287 | return ret; | ||
288 | } | ||
289 | |||
290 | pte_t xen_make_pte(unsigned long pte) | ||
291 | { | ||
292 | if (pte & _PAGE_PRESENT) | ||
293 | pte = phys_to_machine(XPADDR(pte)).maddr; | ||
294 | |||
295 | return (pte_t){ pte }; | ||
296 | } | ||
297 | |||
298 | pgd_t xen_make_pgd(unsigned long pgd) | ||
299 | { | ||
300 | if (pgd & _PAGE_PRESENT) | ||
301 | pgd = phys_to_machine(XPADDR(pgd)).maddr; | ||
302 | |||
303 | return (pgd_t){ pgd }; | ||
304 | } | ||
305 | #endif /* CONFIG_X86_PAE */ | ||
306 | |||
307 | |||
308 | |||
309 | /* | ||
310 | (Yet another) pagetable walker. This one is intended for pinning a | ||
311 | pagetable. This means that it walks a pagetable and calls the | ||
312 | callback function on each page it finds making up the page table, | ||
313 | at every level. It walks the entire pagetable, but it only bothers | ||
314 | pinning pte pages which are below pte_limit. In the normal case | ||
315 | this will be TASK_SIZE, but at boot we need to pin up to | ||
316 | FIXADDR_TOP. But the important bit is that we don't pin beyond | ||
317 | there, because then we start getting into Xen's ptes. | ||
318 | */ | ||
319 | static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned), | ||
320 | unsigned long limit) | ||
321 | { | ||
322 | pgd_t *pgd = pgd_base; | ||
323 | int flush = 0; | ||
324 | unsigned long addr = 0; | ||
325 | unsigned long pgd_next; | ||
326 | |||
327 | BUG_ON(limit > FIXADDR_TOP); | ||
328 | |||
329 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
330 | return 0; | ||
331 | |||
332 | for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) { | ||
333 | pud_t *pud; | ||
334 | unsigned long pud_limit, pud_next; | ||
335 | |||
336 | pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP); | ||
337 | |||
338 | if (!pgd_val(*pgd)) | ||
339 | continue; | ||
340 | |||
341 | pud = pud_offset(pgd, 0); | ||
342 | |||
343 | if (PTRS_PER_PUD > 1) /* not folded */ | ||
344 | flush |= (*func)(virt_to_page(pud), 0); | ||
345 | |||
346 | for (; addr != pud_limit; pud++, addr = pud_next) { | ||
347 | pmd_t *pmd; | ||
348 | unsigned long pmd_limit; | ||
349 | |||
350 | pud_next = pud_addr_end(addr, pud_limit); | ||
351 | |||
352 | if (pud_next < limit) | ||
353 | pmd_limit = pud_next; | ||
354 | else | ||
355 | pmd_limit = limit; | ||
356 | |||
357 | if (pud_none(*pud)) | ||
358 | continue; | ||
359 | |||
360 | pmd = pmd_offset(pud, 0); | ||
361 | |||
362 | if (PTRS_PER_PMD > 1) /* not folded */ | ||
363 | flush |= (*func)(virt_to_page(pmd), 0); | ||
364 | |||
365 | for (; addr != pmd_limit; pmd++) { | ||
366 | addr += (PAGE_SIZE * PTRS_PER_PTE); | ||
367 | if ((pmd_limit-1) < (addr-1)) { | ||
368 | addr = pmd_limit; | ||
369 | break; | ||
370 | } | ||
371 | |||
372 | if (pmd_none(*pmd)) | ||
373 | continue; | ||
374 | |||
375 | flush |= (*func)(pmd_page(*pmd), 0); | ||
376 | } | ||
377 | } | ||
378 | } | ||
379 | |||
380 | flush |= (*func)(virt_to_page(pgd_base), UVMF_TLB_FLUSH); | ||
381 | |||
382 | return flush; | ||
383 | } | ||
384 | |||
385 | static int pin_page(struct page *page, unsigned flags) | ||
386 | { | ||
387 | unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags); | ||
388 | int flush; | ||
389 | |||
390 | if (pgfl) | ||
391 | flush = 0; /* already pinned */ | ||
392 | else if (PageHighMem(page)) | ||
393 | /* kmaps need flushing if we found an unpinned | ||
394 | highpage */ | ||
395 | flush = 1; | ||
396 | else { | ||
397 | void *pt = lowmem_page_address(page); | ||
398 | unsigned long pfn = page_to_pfn(page); | ||
399 | struct multicall_space mcs = __xen_mc_entry(0); | ||
400 | |||
401 | flush = 0; | ||
402 | |||
403 | MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, | ||
404 | pfn_pte(pfn, PAGE_KERNEL_RO), | ||
405 | flags); | ||
406 | } | ||
407 | |||
408 | return flush; | ||
409 | } | ||
410 | |||
411 | /* This is called just after a mm has been created, but it has not | ||
412 | been used yet. We need to make sure that its pagetable is all | ||
413 | read-only, and can be pinned. */ | ||
414 | void xen_pgd_pin(pgd_t *pgd) | ||
415 | { | ||
416 | struct multicall_space mcs; | ||
417 | struct mmuext_op *op; | ||
418 | |||
419 | xen_mc_batch(); | ||
420 | |||
421 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | ||
422 | /* re-enable interrupts for kmap_flush_unused */ | ||
423 | xen_mc_issue(0); | ||
424 | kmap_flush_unused(); | ||
425 | xen_mc_batch(); | ||
426 | } | ||
427 | |||
428 | mcs = __xen_mc_entry(sizeof(*op)); | ||
429 | op = mcs.args; | ||
430 | |||
431 | #ifdef CONFIG_X86_PAE | ||
432 | op->cmd = MMUEXT_PIN_L3_TABLE; | ||
433 | #else | ||
434 | op->cmd = MMUEXT_PIN_L2_TABLE; | ||
435 | #endif | ||
436 | op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd))); | ||
437 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
438 | |||
439 | xen_mc_issue(0); | ||
440 | } | ||
441 | |||
442 | /* The init_mm pagetable is really pinned as soon as its created, but | ||
443 | that's before we have page structures to store the bits. So do all | ||
444 | the book-keeping now. */ | ||
445 | static __init int mark_pinned(struct page *page, unsigned flags) | ||
446 | { | ||
447 | SetPagePinned(page); | ||
448 | return 0; | ||
449 | } | ||
450 | |||
451 | void __init xen_mark_init_mm_pinned(void) | ||
452 | { | ||
453 | pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP); | ||
454 | } | ||
455 | |||
456 | static int unpin_page(struct page *page, unsigned flags) | ||
457 | { | ||
458 | unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags); | ||
459 | |||
460 | if (pgfl && !PageHighMem(page)) { | ||
461 | void *pt = lowmem_page_address(page); | ||
462 | unsigned long pfn = page_to_pfn(page); | ||
463 | struct multicall_space mcs = __xen_mc_entry(0); | ||
464 | |||
465 | MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, | ||
466 | pfn_pte(pfn, PAGE_KERNEL), | ||
467 | flags); | ||
468 | } | ||
469 | |||
470 | return 0; /* never need to flush on unpin */ | ||
471 | } | ||
472 | |||
473 | /* Release a pagetables pages back as normal RW */ | ||
474 | static void xen_pgd_unpin(pgd_t *pgd) | ||
475 | { | ||
476 | struct mmuext_op *op; | ||
477 | struct multicall_space mcs; | ||
478 | |||
479 | xen_mc_batch(); | ||
480 | |||
481 | mcs = __xen_mc_entry(sizeof(*op)); | ||
482 | |||
483 | op = mcs.args; | ||
484 | op->cmd = MMUEXT_UNPIN_TABLE; | ||
485 | op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd))); | ||
486 | |||
487 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
488 | |||
489 | pgd_walk(pgd, unpin_page, TASK_SIZE); | ||
490 | |||
491 | xen_mc_issue(0); | ||
492 | } | ||
493 | |||
494 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | ||
495 | { | ||
496 | spin_lock(&next->page_table_lock); | ||
497 | xen_pgd_pin(next->pgd); | ||
498 | spin_unlock(&next->page_table_lock); | ||
499 | } | ||
500 | |||
501 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | ||
502 | { | ||
503 | spin_lock(&mm->page_table_lock); | ||
504 | xen_pgd_pin(mm->pgd); | ||
505 | spin_unlock(&mm->page_table_lock); | ||
506 | } | ||
507 | |||
508 | |||
509 | #ifdef CONFIG_SMP | ||
510 | /* Another cpu may still have their %cr3 pointing at the pagetable, so | ||
511 | we need to repoint it somewhere else before we can unpin it. */ | ||
512 | static void drop_other_mm_ref(void *info) | ||
513 | { | ||
514 | struct mm_struct *mm = info; | ||
515 | |||
516 | if (__get_cpu_var(cpu_tlbstate).active_mm == mm) | ||
517 | leave_mm(smp_processor_id()); | ||
518 | } | ||
519 | |||
520 | static void drop_mm_ref(struct mm_struct *mm) | ||
521 | { | ||
522 | if (current->active_mm == mm) { | ||
523 | if (current->mm == mm) | ||
524 | load_cr3(swapper_pg_dir); | ||
525 | else | ||
526 | leave_mm(smp_processor_id()); | ||
527 | } | ||
528 | |||
529 | if (!cpus_empty(mm->cpu_vm_mask)) | ||
530 | xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref, | ||
531 | mm, 1); | ||
532 | } | ||
533 | #else | ||
534 | static void drop_mm_ref(struct mm_struct *mm) | ||
535 | { | ||
536 | if (current->active_mm == mm) | ||
537 | load_cr3(swapper_pg_dir); | ||
538 | } | ||
539 | #endif | ||
540 | |||
541 | /* | ||
542 | * While a process runs, Xen pins its pagetables, which means that the | ||
543 | * hypervisor forces it to be read-only, and it controls all updates | ||
544 | * to it. This means that all pagetable updates have to go via the | ||
545 | * hypervisor, which is moderately expensive. | ||
546 | * | ||
547 | * Since we're pulling the pagetable down, we switch to use init_mm, | ||
548 | * unpin old process pagetable and mark it all read-write, which | ||
549 | * allows further operations on it to be simple memory accesses. | ||
550 | * | ||
551 | * The only subtle point is that another CPU may be still using the | ||
552 | * pagetable because of lazy tlb flushing. This means we need need to | ||
553 | * switch all CPUs off this pagetable before we can unpin it. | ||
554 | */ | ||
555 | void xen_exit_mmap(struct mm_struct *mm) | ||
556 | { | ||
557 | get_cpu(); /* make sure we don't move around */ | ||
558 | drop_mm_ref(mm); | ||
559 | put_cpu(); | ||
560 | |||
561 | spin_lock(&mm->page_table_lock); | ||
562 | |||
563 | /* pgd may not be pinned in the error exit path of execve */ | ||
564 | if (PagePinned(virt_to_page(mm->pgd))) | ||
565 | xen_pgd_unpin(mm->pgd); | ||
566 | spin_unlock(&mm->page_table_lock); | ||
567 | } | ||