diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/enlighten.c | 99 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 91 | ||||
-rw-r--r-- | arch/x86/xen/mmu.h | 2 |
3 files changed, 168 insertions, 24 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c13698faae54..48f1a7eca8b9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -46,7 +46,6 @@ | |||
46 | #include <asm/pgtable.h> | 46 | #include <asm/pgtable.h> |
47 | #include <asm/tlbflush.h> | 47 | #include <asm/tlbflush.h> |
48 | #include <asm/reboot.h> | 48 | #include <asm/reboot.h> |
49 | #include <asm/pgalloc.h> | ||
50 | 49 | ||
51 | #include "xen-ops.h" | 50 | #include "xen-ops.h" |
52 | #include "mmu.h" | 51 | #include "mmu.h" |
@@ -711,29 +710,57 @@ static void set_current_cr3(void *v) | |||
711 | x86_write_percpu(xen_current_cr3, (unsigned long)v); | 710 | x86_write_percpu(xen_current_cr3, (unsigned long)v); |
712 | } | 711 | } |
713 | 712 | ||
714 | static void xen_write_cr3(unsigned long cr3) | 713 | static void __xen_write_cr3(bool kernel, unsigned long cr3) |
715 | { | 714 | { |
716 | struct mmuext_op *op; | 715 | struct mmuext_op *op; |
717 | struct multicall_space mcs; | 716 | struct multicall_space mcs; |
718 | unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); | 717 | unsigned long mfn; |
719 | 718 | ||
720 | BUG_ON(preemptible()); | 719 | if (cr3) |
720 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); | ||
721 | else | ||
722 | mfn = 0; | ||
721 | 723 | ||
722 | mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ | 724 | WARN_ON(mfn == 0 && kernel); |
723 | 725 | ||
724 | /* Update while interrupts are disabled, so its atomic with | 726 | mcs = __xen_mc_entry(sizeof(*op)); |
725 | respect to ipis */ | ||
726 | x86_write_percpu(xen_cr3, cr3); | ||
727 | 727 | ||
728 | op = mcs.args; | 728 | op = mcs.args; |
729 | op->cmd = MMUEXT_NEW_BASEPTR; | 729 | op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; |
730 | op->arg1.mfn = mfn; | 730 | op->arg1.mfn = mfn; |
731 | 731 | ||
732 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 732 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
733 | 733 | ||
734 | /* Update xen_update_cr3 once the batch has actually | 734 | if (kernel) { |
735 | been submitted. */ | 735 | x86_write_percpu(xen_cr3, cr3); |
736 | xen_mc_callback(set_current_cr3, (void *)cr3); | 736 | |
737 | /* Update xen_current_cr3 once the batch has actually | ||
738 | been submitted. */ | ||
739 | xen_mc_callback(set_current_cr3, (void *)cr3); | ||
740 | } | ||
741 | } | ||
742 | |||
743 | static void xen_write_cr3(unsigned long cr3) | ||
744 | { | ||
745 | BUG_ON(preemptible()); | ||
746 | |||
747 | xen_mc_batch(); /* disables interrupts */ | ||
748 | |||
749 | /* Update while interrupts are disabled, so its atomic with | ||
750 | respect to ipis */ | ||
751 | x86_write_percpu(xen_cr3, cr3); | ||
752 | |||
753 | __xen_write_cr3(true, cr3); | ||
754 | |||
755 | #ifdef CONFIG_X86_64 | ||
756 | { | ||
757 | pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); | ||
758 | if (user_pgd) | ||
759 | __xen_write_cr3(false, __pa(user_pgd)); | ||
760 | else | ||
761 | __xen_write_cr3(false, 0); | ||
762 | } | ||
763 | #endif | ||
737 | 764 | ||
738 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ | 765 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ |
739 | } | 766 | } |
@@ -794,6 +821,40 @@ static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn) | |||
794 | xen_alloc_ptpage(mm, pfn, PT_PMD); | 821 | xen_alloc_ptpage(mm, pfn, PT_PMD); |
795 | } | 822 | } |
796 | 823 | ||
824 | static int xen_pgd_alloc(struct mm_struct *mm) | ||
825 | { | ||
826 | pgd_t *pgd = mm->pgd; | ||
827 | int ret = 0; | ||
828 | |||
829 | BUG_ON(PagePinned(virt_to_page(pgd))); | ||
830 | |||
831 | #ifdef CONFIG_X86_64 | ||
832 | { | ||
833 | struct page *page = virt_to_page(pgd); | ||
834 | |||
835 | BUG_ON(page->private != 0); | ||
836 | |||
837 | page->private = __get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
838 | if (page->private == 0) | ||
839 | ret = -ENOMEM; | ||
840 | |||
841 | BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); | ||
842 | } | ||
843 | #endif | ||
844 | |||
845 | return ret; | ||
846 | } | ||
847 | |||
848 | static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
849 | { | ||
850 | #ifdef CONFIG_X86_64 | ||
851 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
852 | |||
853 | if (user_pgd) | ||
854 | free_page((unsigned long)user_pgd); | ||
855 | #endif | ||
856 | } | ||
857 | |||
797 | /* This should never happen until we're OK to use struct page */ | 858 | /* This should never happen until we're OK to use struct page */ |
798 | static void xen_release_ptpage(u32 pfn, unsigned level) | 859 | static void xen_release_ptpage(u32 pfn, unsigned level) |
799 | { | 860 | { |
@@ -1168,8 +1229,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1168 | .pte_update = paravirt_nop, | 1229 | .pte_update = paravirt_nop, |
1169 | .pte_update_defer = paravirt_nop, | 1230 | .pte_update_defer = paravirt_nop, |
1170 | 1231 | ||
1171 | .pgd_alloc = __paravirt_pgd_alloc, | 1232 | .pgd_alloc = xen_pgd_alloc, |
1172 | .pgd_free = paravirt_nop, | 1233 | .pgd_free = xen_pgd_free, |
1173 | 1234 | ||
1174 | .alloc_pte = xen_alloc_pte_init, | 1235 | .alloc_pte = xen_alloc_pte_init, |
1175 | .release_pte = xen_release_pte_init, | 1236 | .release_pte = xen_release_pte_init, |
@@ -1480,7 +1541,15 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf | |||
1480 | 1541 | ||
1481 | /* Switch over */ | 1542 | /* Switch over */ |
1482 | pgd = init_level4_pgt; | 1543 | pgd = init_level4_pgt; |
1483 | xen_write_cr3(__pa(pgd)); | 1544 | |
1545 | /* | ||
1546 | * At this stage there can be no user pgd, and no page | ||
1547 | * structure to attach it to, so make sure we just set kernel | ||
1548 | * pgd. | ||
1549 | */ | ||
1550 | xen_mc_batch(); | ||
1551 | __xen_write_cr3(true, __pa(pgd)); | ||
1552 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
1484 | 1553 | ||
1485 | reserve_early(__pa(xen_start_info->pt_base), | 1554 | reserve_early(__pa(xen_start_info->pt_base), |
1486 | __pa(xen_start_info->pt_base + | 1555 | __pa(xen_start_info->pt_base + |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 046c1f23dd6e..a44d56e38bd1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -58,6 +58,13 @@ | |||
58 | #include "multicalls.h" | 58 | #include "multicalls.h" |
59 | #include "mmu.h" | 59 | #include "mmu.h" |
60 | 60 | ||
61 | /* | ||
62 | * Just beyond the highest usermode address. STACK_TOP_MAX has a | ||
63 | * redzone above it, so round it up to a PGD boundary. | ||
64 | */ | ||
65 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | ||
66 | |||
67 | |||
61 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | 68 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) |
62 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | 69 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) |
63 | 70 | ||
@@ -461,17 +468,45 @@ pud_t xen_make_pud(pudval_t pud) | |||
461 | return native_make_pud(pud); | 468 | return native_make_pud(pud); |
462 | } | 469 | } |
463 | 470 | ||
464 | void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | 471 | pgd_t *xen_get_user_pgd(pgd_t *pgd) |
465 | { | 472 | { |
466 | struct mmu_update u; | 473 | pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); |
474 | unsigned offset = pgd - pgd_page; | ||
475 | pgd_t *user_ptr = NULL; | ||
467 | 476 | ||
468 | preempt_disable(); | 477 | if (offset < pgd_index(USER_LIMIT)) { |
478 | struct page *page = virt_to_page(pgd_page); | ||
479 | user_ptr = (pgd_t *)page->private; | ||
480 | if (user_ptr) | ||
481 | user_ptr += offset; | ||
482 | } | ||
469 | 483 | ||
470 | xen_mc_batch(); | 484 | return user_ptr; |
485 | } | ||
486 | |||
487 | static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
488 | { | ||
489 | struct mmu_update u; | ||
471 | 490 | ||
472 | u.ptr = virt_to_machine(ptr).maddr; | 491 | u.ptr = virt_to_machine(ptr).maddr; |
473 | u.val = pgd_val_ma(val); | 492 | u.val = pgd_val_ma(val); |
474 | extend_mmu_update(&u); | 493 | extend_mmu_update(&u); |
494 | } | ||
495 | |||
496 | /* | ||
497 | * Raw hypercall-based set_pgd, intended for in early boot before | ||
498 | * there's a page structure. This implies: | ||
499 | * 1. The only existing pagetable is the kernel's | ||
500 | * 2. It is always pinned | ||
501 | * 3. It has no user pagetable attached to it | ||
502 | */ | ||
503 | void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
504 | { | ||
505 | preempt_disable(); | ||
506 | |||
507 | xen_mc_batch(); | ||
508 | |||
509 | __xen_set_pgd_hyper(ptr, val); | ||
475 | 510 | ||
476 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 511 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
477 | 512 | ||
@@ -480,14 +515,28 @@ void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | |||
480 | 515 | ||
481 | void xen_set_pgd(pgd_t *ptr, pgd_t val) | 516 | void xen_set_pgd(pgd_t *ptr, pgd_t val) |
482 | { | 517 | { |
518 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | ||
519 | |||
483 | /* If page is not pinned, we can just update the entry | 520 | /* If page is not pinned, we can just update the entry |
484 | directly */ | 521 | directly */ |
485 | if (!page_pinned(ptr)) { | 522 | if (!page_pinned(ptr)) { |
486 | *ptr = val; | 523 | *ptr = val; |
524 | if (user_ptr) { | ||
525 | WARN_ON(page_pinned(user_ptr)); | ||
526 | *user_ptr = val; | ||
527 | } | ||
487 | return; | 528 | return; |
488 | } | 529 | } |
489 | 530 | ||
490 | xen_set_pgd_hyper(ptr, val); | 531 | /* If it's pinned, then we can at least batch the kernel and |
532 | user updates together. */ | ||
533 | xen_mc_batch(); | ||
534 | |||
535 | __xen_set_pgd_hyper(ptr, val); | ||
536 | if (user_ptr) | ||
537 | __xen_set_pgd_hyper(user_ptr, val); | ||
538 | |||
539 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
491 | } | 540 | } |
492 | #endif /* PAGETABLE_LEVELS == 4 */ | 541 | #endif /* PAGETABLE_LEVELS == 4 */ |
493 | 542 | ||
@@ -526,7 +575,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
526 | * space, which contains the Xen mappings. On 32-bit these | 575 | * space, which contains the Xen mappings. On 32-bit these |
527 | * will end up making a zero-sized hole and so is a no-op. | 576 | * will end up making a zero-sized hole and so is a no-op. |
528 | */ | 577 | */ |
529 | hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1); | 578 | hole_low = pgd_index(USER_LIMIT); |
530 | hole_high = pgd_index(PAGE_OFFSET); | 579 | hole_high = pgd_index(PAGE_OFFSET); |
531 | 580 | ||
532 | pgdidx_limit = pgd_index(limit); | 581 | pgdidx_limit = pgd_index(limit); |
@@ -670,19 +719,31 @@ void xen_pgd_pin(pgd_t *pgd) | |||
670 | { | 719 | { |
671 | xen_mc_batch(); | 720 | xen_mc_batch(); |
672 | 721 | ||
673 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 722 | if (pgd_walk(pgd, pin_page, USER_LIMIT)) { |
674 | /* re-enable interrupts for kmap_flush_unused */ | 723 | /* re-enable interrupts for kmap_flush_unused */ |
675 | xen_mc_issue(0); | 724 | xen_mc_issue(0); |
676 | kmap_flush_unused(); | 725 | kmap_flush_unused(); |
677 | xen_mc_batch(); | 726 | xen_mc_batch(); |
678 | } | 727 | } |
679 | 728 | ||
729 | #ifdef CONFIG_X86_64 | ||
730 | { | ||
731 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
732 | |||
733 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); | ||
734 | |||
735 | if (user_pgd) { | ||
736 | pin_page(virt_to_page(user_pgd), PT_PGD); | ||
737 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
738 | } | ||
739 | } | ||
740 | #else /* CONFIG_X86_32 */ | ||
680 | #ifdef CONFIG_X86_PAE | 741 | #ifdef CONFIG_X86_PAE |
681 | /* Need to make sure unshared kernel PMD is pinnable */ | 742 | /* Need to make sure unshared kernel PMD is pinnable */ |
682 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 743 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); |
683 | #endif | 744 | #endif |
684 | |||
685 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); | 745 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
746 | #endif /* CONFIG_X86_64 */ | ||
686 | xen_mc_issue(0); | 747 | xen_mc_issue(0); |
687 | } | 748 | } |
688 | 749 | ||
@@ -763,11 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
763 | 824 | ||
764 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 825 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
765 | 826 | ||
827 | #ifdef CONFIG_X86_64 | ||
828 | { | ||
829 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
830 | |||
831 | if (user_pgd) { | ||
832 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
833 | unpin_page(virt_to_page(user_pgd), PT_PGD); | ||
834 | } | ||
835 | } | ||
836 | #endif | ||
837 | |||
766 | #ifdef CONFIG_X86_PAE | 838 | #ifdef CONFIG_X86_PAE |
767 | /* Need to make sure unshared kernel PMD is unpinned */ | 839 | /* Need to make sure unshared kernel PMD is unpinned */ |
768 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 840 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); |
769 | #endif | 841 | #endif |
770 | pgd_walk(pgd, unpin_page, TASK_SIZE); | 842 | |
843 | pgd_walk(pgd, unpin_page, USER_LIMIT); | ||
771 | 844 | ||
772 | xen_mc_issue(0); | 845 | xen_mc_issue(0); |
773 | } | 846 | } |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 19d544b0b6c6..0f59bd03f9e3 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -51,6 +51,8 @@ void xen_set_pgd(pgd_t *pgdp, pgd_t pgd); | |||
51 | void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd); | 51 | void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd); |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | pgd_t *xen_get_user_pgd(pgd_t *pgd); | ||
55 | |||
54 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 56 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
55 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | 57 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, |
56 | pte_t *ptep, pte_t pte); | 58 | pte_t *ptep, pte_t pte); |