diff options
| -rw-r--r-- | arch/x86/xen/enlighten.c | 99 | ||||
| -rw-r--r-- | arch/x86/xen/mmu.c | 91 | ||||
| -rw-r--r-- | arch/x86/xen/mmu.h | 2 |
3 files changed, 168 insertions, 24 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c13698faae54..48f1a7eca8b9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
| @@ -46,7 +46,6 @@ | |||
| 46 | #include <asm/pgtable.h> | 46 | #include <asm/pgtable.h> |
| 47 | #include <asm/tlbflush.h> | 47 | #include <asm/tlbflush.h> |
| 48 | #include <asm/reboot.h> | 48 | #include <asm/reboot.h> |
| 49 | #include <asm/pgalloc.h> | ||
| 50 | 49 | ||
| 51 | #include "xen-ops.h" | 50 | #include "xen-ops.h" |
| 52 | #include "mmu.h" | 51 | #include "mmu.h" |
| @@ -711,29 +710,57 @@ static void set_current_cr3(void *v) | |||
| 711 | x86_write_percpu(xen_current_cr3, (unsigned long)v); | 710 | x86_write_percpu(xen_current_cr3, (unsigned long)v); |
| 712 | } | 711 | } |
| 713 | 712 | ||
| 714 | static void xen_write_cr3(unsigned long cr3) | 713 | static void __xen_write_cr3(bool kernel, unsigned long cr3) |
| 715 | { | 714 | { |
| 716 | struct mmuext_op *op; | 715 | struct mmuext_op *op; |
| 717 | struct multicall_space mcs; | 716 | struct multicall_space mcs; |
| 718 | unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); | 717 | unsigned long mfn; |
| 719 | 718 | ||
| 720 | BUG_ON(preemptible()); | 719 | if (cr3) |
| 720 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); | ||
| 721 | else | ||
| 722 | mfn = 0; | ||
| 721 | 723 | ||
| 722 | mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ | 724 | WARN_ON(mfn == 0 && kernel); |
| 723 | 725 | ||
| 724 | /* Update while interrupts are disabled, so its atomic with | 726 | mcs = __xen_mc_entry(sizeof(*op)); |
| 725 | respect to ipis */ | ||
| 726 | x86_write_percpu(xen_cr3, cr3); | ||
| 727 | 727 | ||
| 728 | op = mcs.args; | 728 | op = mcs.args; |
| 729 | op->cmd = MMUEXT_NEW_BASEPTR; | 729 | op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; |
| 730 | op->arg1.mfn = mfn; | 730 | op->arg1.mfn = mfn; |
| 731 | 731 | ||
| 732 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 732 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
| 733 | 733 | ||
| 734 | /* Update xen_update_cr3 once the batch has actually | 734 | if (kernel) { |
| 735 | been submitted. */ | 735 | x86_write_percpu(xen_cr3, cr3); |
| 736 | xen_mc_callback(set_current_cr3, (void *)cr3); | 736 | |
| 737 | /* Update xen_current_cr3 once the batch has actually | ||
| 738 | been submitted. */ | ||
| 739 | xen_mc_callback(set_current_cr3, (void *)cr3); | ||
| 740 | } | ||
| 741 | } | ||
| 742 | |||
| 743 | static void xen_write_cr3(unsigned long cr3) | ||
| 744 | { | ||
| 745 | BUG_ON(preemptible()); | ||
| 746 | |||
| 747 | xen_mc_batch(); /* disables interrupts */ | ||
| 748 | |||
| 749 | /* Update while interrupts are disabled, so its atomic with | ||
| 750 | respect to ipis */ | ||
| 751 | x86_write_percpu(xen_cr3, cr3); | ||
| 752 | |||
| 753 | __xen_write_cr3(true, cr3); | ||
| 754 | |||
| 755 | #ifdef CONFIG_X86_64 | ||
| 756 | { | ||
| 757 | pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); | ||
| 758 | if (user_pgd) | ||
| 759 | __xen_write_cr3(false, __pa(user_pgd)); | ||
| 760 | else | ||
| 761 | __xen_write_cr3(false, 0); | ||
| 762 | } | ||
| 763 | #endif | ||
| 737 | 764 | ||
| 738 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ | 765 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ |
| 739 | } | 766 | } |
| @@ -794,6 +821,40 @@ static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn) | |||
| 794 | xen_alloc_ptpage(mm, pfn, PT_PMD); | 821 | xen_alloc_ptpage(mm, pfn, PT_PMD); |
| 795 | } | 822 | } |
| 796 | 823 | ||
| 824 | static int xen_pgd_alloc(struct mm_struct *mm) | ||
| 825 | { | ||
| 826 | pgd_t *pgd = mm->pgd; | ||
| 827 | int ret = 0; | ||
| 828 | |||
| 829 | BUG_ON(PagePinned(virt_to_page(pgd))); | ||
| 830 | |||
| 831 | #ifdef CONFIG_X86_64 | ||
| 832 | { | ||
| 833 | struct page *page = virt_to_page(pgd); | ||
| 834 | |||
| 835 | BUG_ON(page->private != 0); | ||
| 836 | |||
| 837 | page->private = __get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
| 838 | if (page->private == 0) | ||
| 839 | ret = -ENOMEM; | ||
| 840 | |||
| 841 | BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); | ||
| 842 | } | ||
| 843 | #endif | ||
| 844 | |||
| 845 | return ret; | ||
| 846 | } | ||
| 847 | |||
| 848 | static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
| 849 | { | ||
| 850 | #ifdef CONFIG_X86_64 | ||
| 851 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
| 852 | |||
| 853 | if (user_pgd) | ||
| 854 | free_page((unsigned long)user_pgd); | ||
| 855 | #endif | ||
| 856 | } | ||
| 857 | |||
| 797 | /* This should never happen until we're OK to use struct page */ | 858 | /* This should never happen until we're OK to use struct page */ |
| 798 | static void xen_release_ptpage(u32 pfn, unsigned level) | 859 | static void xen_release_ptpage(u32 pfn, unsigned level) |
| 799 | { | 860 | { |
| @@ -1168,8 +1229,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
| 1168 | .pte_update = paravirt_nop, | 1229 | .pte_update = paravirt_nop, |
| 1169 | .pte_update_defer = paravirt_nop, | 1230 | .pte_update_defer = paravirt_nop, |
| 1170 | 1231 | ||
| 1171 | .pgd_alloc = __paravirt_pgd_alloc, | 1232 | .pgd_alloc = xen_pgd_alloc, |
| 1172 | .pgd_free = paravirt_nop, | 1233 | .pgd_free = xen_pgd_free, |
| 1173 | 1234 | ||
| 1174 | .alloc_pte = xen_alloc_pte_init, | 1235 | .alloc_pte = xen_alloc_pte_init, |
| 1175 | .release_pte = xen_release_pte_init, | 1236 | .release_pte = xen_release_pte_init, |
| @@ -1480,7 +1541,15 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf | |||
| 1480 | 1541 | ||
| 1481 | /* Switch over */ | 1542 | /* Switch over */ |
| 1482 | pgd = init_level4_pgt; | 1543 | pgd = init_level4_pgt; |
| 1483 | xen_write_cr3(__pa(pgd)); | 1544 | |
| 1545 | /* | ||
| 1546 | * At this stage there can be no user pgd, and no page | ||
| 1547 | * structure to attach it to, so make sure we just set kernel | ||
| 1548 | * pgd. | ||
| 1549 | */ | ||
| 1550 | xen_mc_batch(); | ||
| 1551 | __xen_write_cr3(true, __pa(pgd)); | ||
| 1552 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
| 1484 | 1553 | ||
| 1485 | reserve_early(__pa(xen_start_info->pt_base), | 1554 | reserve_early(__pa(xen_start_info->pt_base), |
| 1486 | __pa(xen_start_info->pt_base + | 1555 | __pa(xen_start_info->pt_base + |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 046c1f23dd6e..a44d56e38bd1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
| @@ -58,6 +58,13 @@ | |||
| 58 | #include "multicalls.h" | 58 | #include "multicalls.h" |
| 59 | #include "mmu.h" | 59 | #include "mmu.h" |
| 60 | 60 | ||
| 61 | /* | ||
| 62 | * Just beyond the highest usermode address. STACK_TOP_MAX has a | ||
| 63 | * redzone above it, so round it up to a PGD boundary. | ||
| 64 | */ | ||
| 65 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | ||
| 66 | |||
| 67 | |||
| 61 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | 68 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) |
| 62 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | 69 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) |
| 63 | 70 | ||
| @@ -461,17 +468,45 @@ pud_t xen_make_pud(pudval_t pud) | |||
| 461 | return native_make_pud(pud); | 468 | return native_make_pud(pud); |
| 462 | } | 469 | } |
| 463 | 470 | ||
| 464 | void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | 471 | pgd_t *xen_get_user_pgd(pgd_t *pgd) |
| 465 | { | 472 | { |
| 466 | struct mmu_update u; | 473 | pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); |
| 474 | unsigned offset = pgd - pgd_page; | ||
| 475 | pgd_t *user_ptr = NULL; | ||
| 467 | 476 | ||
| 468 | preempt_disable(); | 477 | if (offset < pgd_index(USER_LIMIT)) { |
| 478 | struct page *page = virt_to_page(pgd_page); | ||
| 479 | user_ptr = (pgd_t *)page->private; | ||
| 480 | if (user_ptr) | ||
| 481 | user_ptr += offset; | ||
| 482 | } | ||
| 469 | 483 | ||
| 470 | xen_mc_batch(); | 484 | return user_ptr; |
| 485 | } | ||
| 486 | |||
| 487 | static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
| 488 | { | ||
| 489 | struct mmu_update u; | ||
| 471 | 490 | ||
| 472 | u.ptr = virt_to_machine(ptr).maddr; | 491 | u.ptr = virt_to_machine(ptr).maddr; |
| 473 | u.val = pgd_val_ma(val); | 492 | u.val = pgd_val_ma(val); |
| 474 | extend_mmu_update(&u); | 493 | extend_mmu_update(&u); |
| 494 | } | ||
| 495 | |||
| 496 | /* | ||
| 497 | * Raw hypercall-based set_pgd, intended for in early boot before | ||
| 498 | * there's a page structure. This implies: | ||
| 499 | * 1. The only existing pagetable is the kernel's | ||
| 500 | * 2. It is always pinned | ||
| 501 | * 3. It has no user pagetable attached to it | ||
| 502 | */ | ||
| 503 | void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
| 504 | { | ||
| 505 | preempt_disable(); | ||
| 506 | |||
| 507 | xen_mc_batch(); | ||
| 508 | |||
| 509 | __xen_set_pgd_hyper(ptr, val); | ||
| 475 | 510 | ||
| 476 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 511 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
| 477 | 512 | ||
| @@ -480,14 +515,28 @@ void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | |||
| 480 | 515 | ||
| 481 | void xen_set_pgd(pgd_t *ptr, pgd_t val) | 516 | void xen_set_pgd(pgd_t *ptr, pgd_t val) |
| 482 | { | 517 | { |
| 518 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | ||
| 519 | |||
| 483 | /* If page is not pinned, we can just update the entry | 520 | /* If page is not pinned, we can just update the entry |
| 484 | directly */ | 521 | directly */ |
| 485 | if (!page_pinned(ptr)) { | 522 | if (!page_pinned(ptr)) { |
| 486 | *ptr = val; | 523 | *ptr = val; |
| 524 | if (user_ptr) { | ||
| 525 | WARN_ON(page_pinned(user_ptr)); | ||
| 526 | *user_ptr = val; | ||
| 527 | } | ||
| 487 | return; | 528 | return; |
| 488 | } | 529 | } |
| 489 | 530 | ||
| 490 | xen_set_pgd_hyper(ptr, val); | 531 | /* If it's pinned, then we can at least batch the kernel and |
| 532 | user updates together. */ | ||
| 533 | xen_mc_batch(); | ||
| 534 | |||
| 535 | __xen_set_pgd_hyper(ptr, val); | ||
| 536 | if (user_ptr) | ||
| 537 | __xen_set_pgd_hyper(user_ptr, val); | ||
| 538 | |||
| 539 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
| 491 | } | 540 | } |
| 492 | #endif /* PAGETABLE_LEVELS == 4 */ | 541 | #endif /* PAGETABLE_LEVELS == 4 */ |
| 493 | 542 | ||
| @@ -526,7 +575,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
| 526 | * space, which contains the Xen mappings. On 32-bit these | 575 | * space, which contains the Xen mappings. On 32-bit these |
| 527 | * will end up making a zero-sized hole and so is a no-op. | 576 | * will end up making a zero-sized hole and so is a no-op. |
| 528 | */ | 577 | */ |
| 529 | hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1); | 578 | hole_low = pgd_index(USER_LIMIT); |
| 530 | hole_high = pgd_index(PAGE_OFFSET); | 579 | hole_high = pgd_index(PAGE_OFFSET); |
| 531 | 580 | ||
| 532 | pgdidx_limit = pgd_index(limit); | 581 | pgdidx_limit = pgd_index(limit); |
| @@ -670,19 +719,31 @@ void xen_pgd_pin(pgd_t *pgd) | |||
| 670 | { | 719 | { |
| 671 | xen_mc_batch(); | 720 | xen_mc_batch(); |
| 672 | 721 | ||
| 673 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 722 | if (pgd_walk(pgd, pin_page, USER_LIMIT)) { |
| 674 | /* re-enable interrupts for kmap_flush_unused */ | 723 | /* re-enable interrupts for kmap_flush_unused */ |
| 675 | xen_mc_issue(0); | 724 | xen_mc_issue(0); |
| 676 | kmap_flush_unused(); | 725 | kmap_flush_unused(); |
| 677 | xen_mc_batch(); | 726 | xen_mc_batch(); |
| 678 | } | 727 | } |
| 679 | 728 | ||
| 729 | #ifdef CONFIG_X86_64 | ||
| 730 | { | ||
| 731 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
| 732 | |||
| 733 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); | ||
| 734 | |||
| 735 | if (user_pgd) { | ||
| 736 | pin_page(virt_to_page(user_pgd), PT_PGD); | ||
| 737 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
| 738 | } | ||
| 739 | } | ||
| 740 | #else /* CONFIG_X86_32 */ | ||
| 680 | #ifdef CONFIG_X86_PAE | 741 | #ifdef CONFIG_X86_PAE |
| 681 | /* Need to make sure unshared kernel PMD is pinnable */ | 742 | /* Need to make sure unshared kernel PMD is pinnable */ |
| 682 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 743 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); |
| 683 | #endif | 744 | #endif |
| 684 | |||
| 685 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); | 745 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
| 746 | #endif /* CONFIG_X86_64 */ | ||
| 686 | xen_mc_issue(0); | 747 | xen_mc_issue(0); |
| 687 | } | 748 | } |
| 688 | 749 | ||
| @@ -763,11 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
| 763 | 824 | ||
| 764 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 825 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
| 765 | 826 | ||
| 827 | #ifdef CONFIG_X86_64 | ||
| 828 | { | ||
| 829 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
| 830 | |||
| 831 | if (user_pgd) { | ||
| 832 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
| 833 | unpin_page(virt_to_page(user_pgd), PT_PGD); | ||
| 834 | } | ||
| 835 | } | ||
| 836 | #endif | ||
| 837 | |||
| 766 | #ifdef CONFIG_X86_PAE | 838 | #ifdef CONFIG_X86_PAE |
| 767 | /* Need to make sure unshared kernel PMD is unpinned */ | 839 | /* Need to make sure unshared kernel PMD is unpinned */ |
| 768 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 840 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); |
| 769 | #endif | 841 | #endif |
| 770 | pgd_walk(pgd, unpin_page, TASK_SIZE); | 842 | |
| 843 | pgd_walk(pgd, unpin_page, USER_LIMIT); | ||
| 771 | 844 | ||
| 772 | xen_mc_issue(0); | 845 | xen_mc_issue(0); |
| 773 | } | 846 | } |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 19d544b0b6c6..0f59bd03f9e3 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
| @@ -51,6 +51,8 @@ void xen_set_pgd(pgd_t *pgdp, pgd_t pgd); | |||
| 51 | void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd); | 51 | void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd); |
| 52 | #endif | 52 | #endif |
| 53 | 53 | ||
| 54 | pgd_t *xen_get_user_pgd(pgd_t *pgd); | ||
| 55 | |||
| 54 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 56 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
| 55 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | 57 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, |
| 56 | pte_t *ptep, pte_t pte); | 58 | pte_t *ptep, pte_t pte); |
