diff options
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r-- | arch/x86/xen/mmu.c | 91 |
1 files changed, 82 insertions, 9 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 046c1f23dd6e..a44d56e38bd1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -58,6 +58,13 @@ | |||
58 | #include "multicalls.h" | 58 | #include "multicalls.h" |
59 | #include "mmu.h" | 59 | #include "mmu.h" |
60 | 60 | ||
61 | /* | ||
62 | * Just beyond the highest usermode address. STACK_TOP_MAX has a | ||
63 | * redzone above it, so round it up to a PGD boundary. | ||
64 | */ | ||
65 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | ||
66 | |||
67 | |||
61 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | 68 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) |
62 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | 69 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) |
63 | 70 | ||
@@ -461,17 +468,45 @@ pud_t xen_make_pud(pudval_t pud) | |||
461 | return native_make_pud(pud); | 468 | return native_make_pud(pud); |
462 | } | 469 | } |
463 | 470 | ||
464 | void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | 471 | pgd_t *xen_get_user_pgd(pgd_t *pgd) |
465 | { | 472 | { |
466 | struct mmu_update u; | 473 | pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); |
474 | unsigned offset = pgd - pgd_page; | ||
475 | pgd_t *user_ptr = NULL; | ||
467 | 476 | ||
468 | preempt_disable(); | 477 | if (offset < pgd_index(USER_LIMIT)) { |
478 | struct page *page = virt_to_page(pgd_page); | ||
479 | user_ptr = (pgd_t *)page->private; | ||
480 | if (user_ptr) | ||
481 | user_ptr += offset; | ||
482 | } | ||
469 | 483 | ||
470 | xen_mc_batch(); | 484 | return user_ptr; |
485 | } | ||
486 | |||
487 | static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
488 | { | ||
489 | struct mmu_update u; | ||
471 | 490 | ||
472 | u.ptr = virt_to_machine(ptr).maddr; | 491 | u.ptr = virt_to_machine(ptr).maddr; |
473 | u.val = pgd_val_ma(val); | 492 | u.val = pgd_val_ma(val); |
474 | extend_mmu_update(&u); | 493 | extend_mmu_update(&u); |
494 | } | ||
495 | |||
496 | /* | ||
497 | * Raw hypercall-based set_pgd, intended for in early boot before | ||
498 | * there's a page structure. This implies: | ||
499 | * 1. The only existing pagetable is the kernel's | ||
500 | * 2. It is always pinned | ||
501 | * 3. It has no user pagetable attached to it | ||
502 | */ | ||
503 | void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
504 | { | ||
505 | preempt_disable(); | ||
506 | |||
507 | xen_mc_batch(); | ||
508 | |||
509 | __xen_set_pgd_hyper(ptr, val); | ||
475 | 510 | ||
476 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 511 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
477 | 512 | ||
@@ -480,14 +515,28 @@ void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | |||
480 | 515 | ||
481 | void xen_set_pgd(pgd_t *ptr, pgd_t val) | 516 | void xen_set_pgd(pgd_t *ptr, pgd_t val) |
482 | { | 517 | { |
518 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | ||
519 | |||
483 | /* If page is not pinned, we can just update the entry | 520 | /* If page is not pinned, we can just update the entry |
484 | directly */ | 521 | directly */ |
485 | if (!page_pinned(ptr)) { | 522 | if (!page_pinned(ptr)) { |
486 | *ptr = val; | 523 | *ptr = val; |
524 | if (user_ptr) { | ||
525 | WARN_ON(page_pinned(user_ptr)); | ||
526 | *user_ptr = val; | ||
527 | } | ||
487 | return; | 528 | return; |
488 | } | 529 | } |
489 | 530 | ||
490 | xen_set_pgd_hyper(ptr, val); | 531 | /* If it's pinned, then we can at least batch the kernel and |
532 | user updates together. */ | ||
533 | xen_mc_batch(); | ||
534 | |||
535 | __xen_set_pgd_hyper(ptr, val); | ||
536 | if (user_ptr) | ||
537 | __xen_set_pgd_hyper(user_ptr, val); | ||
538 | |||
539 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
491 | } | 540 | } |
492 | #endif /* PAGETABLE_LEVELS == 4 */ | 541 | #endif /* PAGETABLE_LEVELS == 4 */ |
493 | 542 | ||
@@ -526,7 +575,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
526 | * space, which contains the Xen mappings. On 32-bit these | 575 | * space, which contains the Xen mappings. On 32-bit these |
527 | * will end up making a zero-sized hole and so is a no-op. | 576 | * will end up making a zero-sized hole and so is a no-op. |
528 | */ | 577 | */ |
529 | hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1); | 578 | hole_low = pgd_index(USER_LIMIT); |
530 | hole_high = pgd_index(PAGE_OFFSET); | 579 | hole_high = pgd_index(PAGE_OFFSET); |
531 | 580 | ||
532 | pgdidx_limit = pgd_index(limit); | 581 | pgdidx_limit = pgd_index(limit); |
@@ -670,19 +719,31 @@ void xen_pgd_pin(pgd_t *pgd) | |||
670 | { | 719 | { |
671 | xen_mc_batch(); | 720 | xen_mc_batch(); |
672 | 721 | ||
673 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 722 | if (pgd_walk(pgd, pin_page, USER_LIMIT)) { |
674 | /* re-enable interrupts for kmap_flush_unused */ | 723 | /* re-enable interrupts for kmap_flush_unused */ |
675 | xen_mc_issue(0); | 724 | xen_mc_issue(0); |
676 | kmap_flush_unused(); | 725 | kmap_flush_unused(); |
677 | xen_mc_batch(); | 726 | xen_mc_batch(); |
678 | } | 727 | } |
679 | 728 | ||
729 | #ifdef CONFIG_X86_64 | ||
730 | { | ||
731 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
732 | |||
733 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); | ||
734 | |||
735 | if (user_pgd) { | ||
736 | pin_page(virt_to_page(user_pgd), PT_PGD); | ||
737 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
738 | } | ||
739 | } | ||
740 | #else /* CONFIG_X86_32 */ | ||
680 | #ifdef CONFIG_X86_PAE | 741 | #ifdef CONFIG_X86_PAE |
681 | /* Need to make sure unshared kernel PMD is pinnable */ | 742 | /* Need to make sure unshared kernel PMD is pinnable */ |
682 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 743 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); |
683 | #endif | 744 | #endif |
684 | |||
685 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); | 745 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
746 | #endif /* CONFIG_X86_64 */ | ||
686 | xen_mc_issue(0); | 747 | xen_mc_issue(0); |
687 | } | 748 | } |
688 | 749 | ||
@@ -763,11 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
763 | 824 | ||
764 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 825 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
765 | 826 | ||
827 | #ifdef CONFIG_X86_64 | ||
828 | { | ||
829 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
830 | |||
831 | if (user_pgd) { | ||
832 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
833 | unpin_page(virt_to_page(user_pgd), PT_PGD); | ||
834 | } | ||
835 | } | ||
836 | #endif | ||
837 | |||
766 | #ifdef CONFIG_X86_PAE | 838 | #ifdef CONFIG_X86_PAE |
767 | /* Need to make sure unshared kernel PMD is unpinned */ | 839 | /* Need to make sure unshared kernel PMD is unpinned */ |
768 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 840 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); |
769 | #endif | 841 | #endif |
770 | pgd_walk(pgd, unpin_page, TASK_SIZE); | 842 | |
843 | pgd_walk(pgd, unpin_page, USER_LIMIT); | ||
771 | 844 | ||
772 | xen_mc_issue(0); | 845 | xen_mc_issue(0); |
773 | } | 846 | } |