diff options
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r-- | arch/x86/xen/mmu.c | 316 |
1 files changed, 227 insertions, 89 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ff0aa74afaa..a44d56e38bd 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -44,8 +44,10 @@ | |||
44 | 44 | ||
45 | #include <asm/pgtable.h> | 45 | #include <asm/pgtable.h> |
46 | #include <asm/tlbflush.h> | 46 | #include <asm/tlbflush.h> |
47 | #include <asm/fixmap.h> | ||
47 | #include <asm/mmu_context.h> | 48 | #include <asm/mmu_context.h> |
48 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
50 | #include <asm/linkage.h> | ||
49 | 51 | ||
50 | #include <asm/xen/hypercall.h> | 52 | #include <asm/xen/hypercall.h> |
51 | #include <asm/xen/hypervisor.h> | 53 | #include <asm/xen/hypervisor.h> |
@@ -56,26 +58,29 @@ | |||
56 | #include "multicalls.h" | 58 | #include "multicalls.h" |
57 | #include "mmu.h" | 59 | #include "mmu.h" |
58 | 60 | ||
61 | /* | ||
62 | * Just beyond the highest usermode address. STACK_TOP_MAX has a | ||
63 | * redzone above it, so round it up to a PGD boundary. | ||
64 | */ | ||
65 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | ||
66 | |||
67 | |||
59 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | 68 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) |
60 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | 69 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) |
61 | 70 | ||
62 | /* Placeholder for holes in the address space */ | 71 | /* Placeholder for holes in the address space */ |
63 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] | 72 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data = |
64 | __attribute__((section(".data.page_aligned"))) = | ||
65 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | 73 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; |
66 | 74 | ||
67 | /* Array of pointers to pages containing p2m entries */ | 75 | /* Array of pointers to pages containing p2m entries */ |
68 | static unsigned long *p2m_top[TOP_ENTRIES] | 76 | static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data = |
69 | __attribute__((section(".data.page_aligned"))) = | ||
70 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | 77 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; |
71 | 78 | ||
72 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | 79 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ |
73 | static unsigned long p2m_top_mfn[TOP_ENTRIES] | 80 | static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss; |
74 | __attribute__((section(".bss.page_aligned"))); | ||
75 | 81 | ||
76 | static unsigned long p2m_top_mfn_list[ | 82 | static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] |
77 | PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] | 83 | __page_aligned_bss; |
78 | __attribute__((section(".bss.page_aligned"))); | ||
79 | 84 | ||
80 | static inline unsigned p2m_top_index(unsigned long pfn) | 85 | static inline unsigned p2m_top_index(unsigned long pfn) |
81 | { | 86 | { |
@@ -181,15 +186,16 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
181 | p2m_top[topidx][idx] = mfn; | 186 | p2m_top[topidx][idx] = mfn; |
182 | } | 187 | } |
183 | 188 | ||
184 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) | 189 | xmaddr_t arbitrary_virt_to_machine(void *vaddr) |
185 | { | 190 | { |
191 | unsigned long address = (unsigned long)vaddr; | ||
186 | unsigned int level; | 192 | unsigned int level; |
187 | pte_t *pte = lookup_address(address, &level); | 193 | pte_t *pte = lookup_address(address, &level); |
188 | unsigned offset = address & ~PAGE_MASK; | 194 | unsigned offset = address & ~PAGE_MASK; |
189 | 195 | ||
190 | BUG_ON(pte == NULL); | 196 | BUG_ON(pte == NULL); |
191 | 197 | ||
192 | return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset); | 198 | return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); |
193 | } | 199 | } |
194 | 200 | ||
195 | void make_lowmem_page_readonly(void *vaddr) | 201 | void make_lowmem_page_readonly(void *vaddr) |
@@ -256,7 +262,8 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
256 | 262 | ||
257 | xen_mc_batch(); | 263 | xen_mc_batch(); |
258 | 264 | ||
259 | u.ptr = virt_to_machine(ptr).maddr; | 265 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
266 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | ||
260 | u.val = pmd_val_ma(val); | 267 | u.val = pmd_val_ma(val); |
261 | extend_mmu_update(&u); | 268 | extend_mmu_update(&u); |
262 | 269 | ||
@@ -283,35 +290,7 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val) | |||
283 | */ | 290 | */ |
284 | void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | 291 | void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) |
285 | { | 292 | { |
286 | pgd_t *pgd; | 293 | set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); |
287 | pud_t *pud; | ||
288 | pmd_t *pmd; | ||
289 | pte_t *pte; | ||
290 | |||
291 | pgd = swapper_pg_dir + pgd_index(vaddr); | ||
292 | if (pgd_none(*pgd)) { | ||
293 | BUG(); | ||
294 | return; | ||
295 | } | ||
296 | pud = pud_offset(pgd, vaddr); | ||
297 | if (pud_none(*pud)) { | ||
298 | BUG(); | ||
299 | return; | ||
300 | } | ||
301 | pmd = pmd_offset(pud, vaddr); | ||
302 | if (pmd_none(*pmd)) { | ||
303 | BUG(); | ||
304 | return; | ||
305 | } | ||
306 | pte = pte_offset_kernel(pmd, vaddr); | ||
307 | /* <mfn,flags> stored as-is, to permit clearing entries */ | ||
308 | xen_set_pte(pte, mfn_pte(mfn, flags)); | ||
309 | |||
310 | /* | ||
311 | * It's enough to flush this one mapping. | ||
312 | * (PGE mappings get flushed as well) | ||
313 | */ | ||
314 | __flush_tlb_one(vaddr); | ||
315 | } | 294 | } |
316 | 295 | ||
317 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 296 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
@@ -418,7 +397,8 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
418 | 397 | ||
419 | xen_mc_batch(); | 398 | xen_mc_batch(); |
420 | 399 | ||
421 | u.ptr = virt_to_machine(ptr).maddr; | 400 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
401 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | ||
422 | u.val = pud_val_ma(val); | 402 | u.val = pud_val_ma(val); |
423 | extend_mmu_update(&u); | 403 | extend_mmu_update(&u); |
424 | 404 | ||
@@ -441,14 +421,19 @@ void xen_set_pud(pud_t *ptr, pud_t val) | |||
441 | 421 | ||
442 | void xen_set_pte(pte_t *ptep, pte_t pte) | 422 | void xen_set_pte(pte_t *ptep, pte_t pte) |
443 | { | 423 | { |
424 | #ifdef CONFIG_X86_PAE | ||
444 | ptep->pte_high = pte.pte_high; | 425 | ptep->pte_high = pte.pte_high; |
445 | smp_wmb(); | 426 | smp_wmb(); |
446 | ptep->pte_low = pte.pte_low; | 427 | ptep->pte_low = pte.pte_low; |
428 | #else | ||
429 | *ptep = pte; | ||
430 | #endif | ||
447 | } | 431 | } |
448 | 432 | ||
433 | #ifdef CONFIG_X86_PAE | ||
449 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | 434 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
450 | { | 435 | { |
451 | set_64bit((u64 *)ptep, pte_val_ma(pte)); | 436 | set_64bit((u64 *)ptep, native_pte_val(pte)); |
452 | } | 437 | } |
453 | 438 | ||
454 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 439 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
@@ -462,6 +447,7 @@ void xen_pmd_clear(pmd_t *pmdp) | |||
462 | { | 447 | { |
463 | set_pmd(pmdp, __pmd(0)); | 448 | set_pmd(pmdp, __pmd(0)); |
464 | } | 449 | } |
450 | #endif /* CONFIG_X86_PAE */ | ||
465 | 451 | ||
466 | pmd_t xen_make_pmd(pmdval_t pmd) | 452 | pmd_t xen_make_pmd(pmdval_t pmd) |
467 | { | 453 | { |
@@ -469,78 +455,189 @@ pmd_t xen_make_pmd(pmdval_t pmd) | |||
469 | return native_make_pmd(pmd); | 455 | return native_make_pmd(pmd); |
470 | } | 456 | } |
471 | 457 | ||
458 | #if PAGETABLE_LEVELS == 4 | ||
459 | pudval_t xen_pud_val(pud_t pud) | ||
460 | { | ||
461 | return pte_mfn_to_pfn(pud.pud); | ||
462 | } | ||
463 | |||
464 | pud_t xen_make_pud(pudval_t pud) | ||
465 | { | ||
466 | pud = pte_pfn_to_mfn(pud); | ||
467 | |||
468 | return native_make_pud(pud); | ||
469 | } | ||
470 | |||
471 | pgd_t *xen_get_user_pgd(pgd_t *pgd) | ||
472 | { | ||
473 | pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); | ||
474 | unsigned offset = pgd - pgd_page; | ||
475 | pgd_t *user_ptr = NULL; | ||
476 | |||
477 | if (offset < pgd_index(USER_LIMIT)) { | ||
478 | struct page *page = virt_to_page(pgd_page); | ||
479 | user_ptr = (pgd_t *)page->private; | ||
480 | if (user_ptr) | ||
481 | user_ptr += offset; | ||
482 | } | ||
483 | |||
484 | return user_ptr; | ||
485 | } | ||
486 | |||
487 | static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
488 | { | ||
489 | struct mmu_update u; | ||
490 | |||
491 | u.ptr = virt_to_machine(ptr).maddr; | ||
492 | u.val = pgd_val_ma(val); | ||
493 | extend_mmu_update(&u); | ||
494 | } | ||
495 | |||
496 | /* | ||
497 | * Raw hypercall-based set_pgd, intended for in early boot before | ||
498 | * there's a page structure. This implies: | ||
499 | * 1. The only existing pagetable is the kernel's | ||
500 | * 2. It is always pinned | ||
501 | * 3. It has no user pagetable attached to it | ||
502 | */ | ||
503 | void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
504 | { | ||
505 | preempt_disable(); | ||
506 | |||
507 | xen_mc_batch(); | ||
508 | |||
509 | __xen_set_pgd_hyper(ptr, val); | ||
510 | |||
511 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
512 | |||
513 | preempt_enable(); | ||
514 | } | ||
515 | |||
516 | void xen_set_pgd(pgd_t *ptr, pgd_t val) | ||
517 | { | ||
518 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | ||
519 | |||
520 | /* If page is not pinned, we can just update the entry | ||
521 | directly */ | ||
522 | if (!page_pinned(ptr)) { | ||
523 | *ptr = val; | ||
524 | if (user_ptr) { | ||
525 | WARN_ON(page_pinned(user_ptr)); | ||
526 | *user_ptr = val; | ||
527 | } | ||
528 | return; | ||
529 | } | ||
530 | |||
531 | /* If it's pinned, then we can at least batch the kernel and | ||
532 | user updates together. */ | ||
533 | xen_mc_batch(); | ||
534 | |||
535 | __xen_set_pgd_hyper(ptr, val); | ||
536 | if (user_ptr) | ||
537 | __xen_set_pgd_hyper(user_ptr, val); | ||
538 | |||
539 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
540 | } | ||
541 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
542 | |||
472 | /* | 543 | /* |
473 | (Yet another) pagetable walker. This one is intended for pinning a | 544 | * (Yet another) pagetable walker. This one is intended for pinning a |
474 | pagetable. This means that it walks a pagetable and calls the | 545 | * pagetable. This means that it walks a pagetable and calls the |
475 | callback function on each page it finds making up the page table, | 546 | * callback function on each page it finds making up the page table, |
476 | at every level. It walks the entire pagetable, but it only bothers | 547 | * at every level. It walks the entire pagetable, but it only bothers |
477 | pinning pte pages which are below pte_limit. In the normal case | 548 | * pinning pte pages which are below limit. In the normal case this |
478 | this will be TASK_SIZE, but at boot we need to pin up to | 549 | * will be STACK_TOP_MAX, but at boot we need to pin up to |
479 | FIXADDR_TOP. But the important bit is that we don't pin beyond | 550 | * FIXADDR_TOP. |
480 | there, because then we start getting into Xen's ptes. | 551 | * |
481 | */ | 552 | * For 32-bit the important bit is that we don't pin beyond there, |
482 | static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level), | 553 | * because then we start getting into Xen's ptes. |
554 | * | ||
555 | * For 64-bit, we must skip the Xen hole in the middle of the address | ||
556 | * space, just after the big x86-64 virtual hole. | ||
557 | */ | ||
558 | static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | ||
483 | unsigned long limit) | 559 | unsigned long limit) |
484 | { | 560 | { |
485 | pgd_t *pgd = pgd_base; | ||
486 | int flush = 0; | 561 | int flush = 0; |
487 | unsigned long addr = 0; | 562 | unsigned hole_low, hole_high; |
488 | unsigned long pgd_next; | 563 | unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; |
564 | unsigned pgdidx, pudidx, pmdidx; | ||
489 | 565 | ||
490 | BUG_ON(limit > FIXADDR_TOP); | 566 | /* The limit is the last byte to be touched */ |
567 | limit--; | ||
568 | BUG_ON(limit >= FIXADDR_TOP); | ||
491 | 569 | ||
492 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 570 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
493 | return 0; | 571 | return 0; |
494 | 572 | ||
495 | for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) { | 573 | /* |
574 | * 64-bit has a great big hole in the middle of the address | ||
575 | * space, which contains the Xen mappings. On 32-bit these | ||
576 | * will end up making a zero-sized hole and so is a no-op. | ||
577 | */ | ||
578 | hole_low = pgd_index(USER_LIMIT); | ||
579 | hole_high = pgd_index(PAGE_OFFSET); | ||
580 | |||
581 | pgdidx_limit = pgd_index(limit); | ||
582 | #if PTRS_PER_PUD > 1 | ||
583 | pudidx_limit = pud_index(limit); | ||
584 | #else | ||
585 | pudidx_limit = 0; | ||
586 | #endif | ||
587 | #if PTRS_PER_PMD > 1 | ||
588 | pmdidx_limit = pmd_index(limit); | ||
589 | #else | ||
590 | pmdidx_limit = 0; | ||
591 | #endif | ||
592 | |||
593 | flush |= (*func)(virt_to_page(pgd), PT_PGD); | ||
594 | |||
595 | for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { | ||
496 | pud_t *pud; | 596 | pud_t *pud; |
497 | unsigned long pud_limit, pud_next; | ||
498 | 597 | ||
499 | pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP); | 598 | if (pgdidx >= hole_low && pgdidx < hole_high) |
599 | continue; | ||
500 | 600 | ||
501 | if (!pgd_val(*pgd)) | 601 | if (!pgd_val(pgd[pgdidx])) |
502 | continue; | 602 | continue; |
503 | 603 | ||
504 | pud = pud_offset(pgd, 0); | 604 | pud = pud_offset(&pgd[pgdidx], 0); |
505 | 605 | ||
506 | if (PTRS_PER_PUD > 1) /* not folded */ | 606 | if (PTRS_PER_PUD > 1) /* not folded */ |
507 | flush |= (*func)(virt_to_page(pud), PT_PUD); | 607 | flush |= (*func)(virt_to_page(pud), PT_PUD); |
508 | 608 | ||
509 | for (; addr != pud_limit; pud++, addr = pud_next) { | 609 | for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { |
510 | pmd_t *pmd; | 610 | pmd_t *pmd; |
511 | unsigned long pmd_limit; | ||
512 | 611 | ||
513 | pud_next = pud_addr_end(addr, pud_limit); | 612 | if (pgdidx == pgdidx_limit && |
514 | 613 | pudidx > pudidx_limit) | |
515 | if (pud_next < limit) | 614 | goto out; |
516 | pmd_limit = pud_next; | ||
517 | else | ||
518 | pmd_limit = limit; | ||
519 | 615 | ||
520 | if (pud_none(*pud)) | 616 | if (pud_none(pud[pudidx])) |
521 | continue; | 617 | continue; |
522 | 618 | ||
523 | pmd = pmd_offset(pud, 0); | 619 | pmd = pmd_offset(&pud[pudidx], 0); |
524 | 620 | ||
525 | if (PTRS_PER_PMD > 1) /* not folded */ | 621 | if (PTRS_PER_PMD > 1) /* not folded */ |
526 | flush |= (*func)(virt_to_page(pmd), PT_PMD); | 622 | flush |= (*func)(virt_to_page(pmd), PT_PMD); |
527 | 623 | ||
528 | for (; addr != pmd_limit; pmd++) { | 624 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { |
529 | addr += (PAGE_SIZE * PTRS_PER_PTE); | 625 | struct page *pte; |
530 | if ((pmd_limit-1) < (addr-1)) { | 626 | |
531 | addr = pmd_limit; | 627 | if (pgdidx == pgdidx_limit && |
532 | break; | 628 | pudidx == pudidx_limit && |
533 | } | 629 | pmdidx > pmdidx_limit) |
630 | goto out; | ||
534 | 631 | ||
535 | if (pmd_none(*pmd)) | 632 | if (pmd_none(pmd[pmdidx])) |
536 | continue; | 633 | continue; |
537 | 634 | ||
538 | flush |= (*func)(pmd_page(*pmd), PT_PTE); | 635 | pte = pmd_page(pmd[pmdidx]); |
636 | flush |= (*func)(pte, PT_PTE); | ||
539 | } | 637 | } |
540 | } | 638 | } |
541 | } | 639 | } |
542 | 640 | out: | |
543 | flush |= (*func)(virt_to_page(pgd_base), PT_PGD); | ||
544 | 641 | ||
545 | return flush; | 642 | return flush; |
546 | } | 643 | } |
@@ -622,14 +719,31 @@ void xen_pgd_pin(pgd_t *pgd) | |||
622 | { | 719 | { |
623 | xen_mc_batch(); | 720 | xen_mc_batch(); |
624 | 721 | ||
625 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 722 | if (pgd_walk(pgd, pin_page, USER_LIMIT)) { |
626 | /* re-enable interrupts for kmap_flush_unused */ | 723 | /* re-enable interrupts for kmap_flush_unused */ |
627 | xen_mc_issue(0); | 724 | xen_mc_issue(0); |
628 | kmap_flush_unused(); | 725 | kmap_flush_unused(); |
629 | xen_mc_batch(); | 726 | xen_mc_batch(); |
630 | } | 727 | } |
631 | 728 | ||
729 | #ifdef CONFIG_X86_64 | ||
730 | { | ||
731 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
732 | |||
733 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); | ||
734 | |||
735 | if (user_pgd) { | ||
736 | pin_page(virt_to_page(user_pgd), PT_PGD); | ||
737 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
738 | } | ||
739 | } | ||
740 | #else /* CONFIG_X86_32 */ | ||
741 | #ifdef CONFIG_X86_PAE | ||
742 | /* Need to make sure unshared kernel PMD is pinnable */ | ||
743 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | ||
744 | #endif | ||
632 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); | 745 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
746 | #endif /* CONFIG_X86_64 */ | ||
633 | xen_mc_issue(0); | 747 | xen_mc_issue(0); |
634 | } | 748 | } |
635 | 749 | ||
@@ -656,9 +770,11 @@ void xen_mm_pin_all(void) | |||
656 | spin_unlock_irqrestore(&pgd_lock, flags); | 770 | spin_unlock_irqrestore(&pgd_lock, flags); |
657 | } | 771 | } |
658 | 772 | ||
659 | /* The init_mm pagetable is really pinned as soon as its created, but | 773 | /* |
660 | that's before we have page structures to store the bits. So do all | 774 | * The init_mm pagetable is really pinned as soon as its created, but |
661 | the book-keeping now. */ | 775 | * that's before we have page structures to store the bits. So do all |
776 | * the book-keeping now. | ||
777 | */ | ||
662 | static __init int mark_pinned(struct page *page, enum pt_level level) | 778 | static __init int mark_pinned(struct page *page, enum pt_level level) |
663 | { | 779 | { |
664 | SetPagePinned(page); | 780 | SetPagePinned(page); |
@@ -708,7 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
708 | 824 | ||
709 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 825 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
710 | 826 | ||
711 | pgd_walk(pgd, unpin_page, TASK_SIZE); | 827 | #ifdef CONFIG_X86_64 |
828 | { | ||
829 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
830 | |||
831 | if (user_pgd) { | ||
832 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
833 | unpin_page(virt_to_page(user_pgd), PT_PGD); | ||
834 | } | ||
835 | } | ||
836 | #endif | ||
837 | |||
838 | #ifdef CONFIG_X86_PAE | ||
839 | /* Need to make sure unshared kernel PMD is unpinned */ | ||
840 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | ||
841 | #endif | ||
842 | |||
843 | pgd_walk(pgd, unpin_page, USER_LIMIT); | ||
712 | 844 | ||
713 | xen_mc_issue(0); | 845 | xen_mc_issue(0); |
714 | } | 846 | } |
@@ -727,7 +859,6 @@ void xen_mm_unpin_all(void) | |||
727 | list_for_each_entry(page, &pgd_list, lru) { | 859 | list_for_each_entry(page, &pgd_list, lru) { |
728 | if (PageSavePinned(page)) { | 860 | if (PageSavePinned(page)) { |
729 | BUG_ON(!PagePinned(page)); | 861 | BUG_ON(!PagePinned(page)); |
730 | printk("unpinning pinned %p\n", page_address(page)); | ||
731 | xen_pgd_unpin((pgd_t *)page_address(page)); | 862 | xen_pgd_unpin((pgd_t *)page_address(page)); |
732 | ClearPageSavePinned(page); | 863 | ClearPageSavePinned(page); |
733 | } | 864 | } |
@@ -757,8 +888,15 @@ void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | |||
757 | static void drop_other_mm_ref(void *info) | 888 | static void drop_other_mm_ref(void *info) |
758 | { | 889 | { |
759 | struct mm_struct *mm = info; | 890 | struct mm_struct *mm = info; |
891 | struct mm_struct *active_mm; | ||
892 | |||
893 | #ifdef CONFIG_X86_64 | ||
894 | active_mm = read_pda(active_mm); | ||
895 | #else | ||
896 | active_mm = __get_cpu_var(cpu_tlbstate).active_mm; | ||
897 | #endif | ||
760 | 898 | ||
761 | if (__get_cpu_var(cpu_tlbstate).active_mm == mm) | 899 | if (active_mm == mm) |
762 | leave_mm(smp_processor_id()); | 900 | leave_mm(smp_processor_id()); |
763 | 901 | ||
764 | /* If this cpu still has a stale cr3 reference, then make sure | 902 | /* If this cpu still has a stale cr3 reference, then make sure |