aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/mmu.c
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2008-07-08 18:07:13 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-16 05:05:38 -0400
commitd6182fbf04164016cb6540db02eef3d6bdc967c3 (patch)
tree53bd4b9b764e9220b978a6506b46455930973f27 /arch/x86/xen/mmu.c
parentc24481e9da2c7bc8aafab46e0bc64821244a24a6 (diff)
xen64: allocate and manage user pagetables
Because the x86_64 architecture does not enforce segment limits, Xen cannot protect itself with them as it does in 32-bit mode. Therefore, to protect itself, it runs the guest kernel in ring 3. Since it also runs the guest userspace in ring3, the guest kernel must maintain a second pagetable for its userspace, which does not map kernel space. Naturally, the guest kernel pagetables map both kernel and userspace. The userspace pagetable is attached to the corresponding kernel pagetable via the pgd's page->private field. It is allocated and freed at the same time as the kernel pgd via the paravirt_pgd_alloc/free hooks. Fortunately, the user pagetable is almost entirely shared with the kernel pagetable; the only difference is the pgd page itself. set_pgd will populate all entries in the kernel pagetable, and also set the corresponding user pgd entry if the address is less than STACK_TOP_MAX. The user pagetable must be pinned and unpinned with the kernel one, but because the pagetables are aliased, pgd_walk() only needs to be called on the kernel pagetable. The user pgd page is then pinned/unpinned along with the kernel pgd page. xen_write_cr3 must write both the kernel and user cr3s. The init_mm.pgd pagetable never has a user pagetable allocated for it, because it can never be used while running usermode. One awkward area is that early in boot the page structures are not available. No user pagetable can exist at that point, but it complicates the logic to avoid looking at the page structure. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Stephen Tweedie <sct@redhat.com> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r--arch/x86/xen/mmu.c91
1 files changed, 82 insertions, 9 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 046c1f23dd6..a44d56e38bd 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -58,6 +58,13 @@
58#include "multicalls.h" 58#include "multicalls.h"
59#include "mmu.h" 59#include "mmu.h"
60 60
61/*
62 * Just beyond the highest usermode address. STACK_TOP_MAX has a
63 * redzone above it, so round it up to a PGD boundary.
64 */
65#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
66
67
61#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) 68#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
62#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) 69#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
63 70
@@ -461,17 +468,45 @@ pud_t xen_make_pud(pudval_t pud)
461 return native_make_pud(pud); 468 return native_make_pud(pud);
462} 469}
463 470
464void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) 471pgd_t *xen_get_user_pgd(pgd_t *pgd)
465{ 472{
466 struct mmu_update u; 473 pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
474 unsigned offset = pgd - pgd_page;
475 pgd_t *user_ptr = NULL;
467 476
468 preempt_disable(); 477 if (offset < pgd_index(USER_LIMIT)) {
478 struct page *page = virt_to_page(pgd_page);
479 user_ptr = (pgd_t *)page->private;
480 if (user_ptr)
481 user_ptr += offset;
482 }
469 483
470 xen_mc_batch(); 484 return user_ptr;
485}
486
487static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
488{
489 struct mmu_update u;
471 490
472 u.ptr = virt_to_machine(ptr).maddr; 491 u.ptr = virt_to_machine(ptr).maddr;
473 u.val = pgd_val_ma(val); 492 u.val = pgd_val_ma(val);
474 extend_mmu_update(&u); 493 extend_mmu_update(&u);
494}
495
496/*
497 * Raw hypercall-based set_pgd, intended for in early boot before
498 * there's a page structure. This implies:
499 * 1. The only existing pagetable is the kernel's
500 * 2. It is always pinned
501 * 3. It has no user pagetable attached to it
502 */
503void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
504{
505 preempt_disable();
506
507 xen_mc_batch();
508
509 __xen_set_pgd_hyper(ptr, val);
475 510
476 xen_mc_issue(PARAVIRT_LAZY_MMU); 511 xen_mc_issue(PARAVIRT_LAZY_MMU);
477 512
@@ -480,14 +515,28 @@ void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
480 515
481void xen_set_pgd(pgd_t *ptr, pgd_t val) 516void xen_set_pgd(pgd_t *ptr, pgd_t val)
482{ 517{
518 pgd_t *user_ptr = xen_get_user_pgd(ptr);
519
483 /* If page is not pinned, we can just update the entry 520 /* If page is not pinned, we can just update the entry
484 directly */ 521 directly */
485 if (!page_pinned(ptr)) { 522 if (!page_pinned(ptr)) {
486 *ptr = val; 523 *ptr = val;
524 if (user_ptr) {
525 WARN_ON(page_pinned(user_ptr));
526 *user_ptr = val;
527 }
487 return; 528 return;
488 } 529 }
489 530
490 xen_set_pgd_hyper(ptr, val); 531 /* If it's pinned, then we can at least batch the kernel and
532 user updates together. */
533 xen_mc_batch();
534
535 __xen_set_pgd_hyper(ptr, val);
536 if (user_ptr)
537 __xen_set_pgd_hyper(user_ptr, val);
538
539 xen_mc_issue(PARAVIRT_LAZY_MMU);
491} 540}
492#endif /* PAGETABLE_LEVELS == 4 */ 541#endif /* PAGETABLE_LEVELS == 4 */
493 542
@@ -526,7 +575,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
526 * space, which contains the Xen mappings. On 32-bit these 575 * space, which contains the Xen mappings. On 32-bit these
527 * will end up making a zero-sized hole and so is a no-op. 576 * will end up making a zero-sized hole and so is a no-op.
528 */ 577 */
529 hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1); 578 hole_low = pgd_index(USER_LIMIT);
530 hole_high = pgd_index(PAGE_OFFSET); 579 hole_high = pgd_index(PAGE_OFFSET);
531 580
532 pgdidx_limit = pgd_index(limit); 581 pgdidx_limit = pgd_index(limit);
@@ -670,19 +719,31 @@ void xen_pgd_pin(pgd_t *pgd)
670{ 719{
671 xen_mc_batch(); 720 xen_mc_batch();
672 721
673 if (pgd_walk(pgd, pin_page, TASK_SIZE)) { 722 if (pgd_walk(pgd, pin_page, USER_LIMIT)) {
674 /* re-enable interrupts for kmap_flush_unused */ 723 /* re-enable interrupts for kmap_flush_unused */
675 xen_mc_issue(0); 724 xen_mc_issue(0);
676 kmap_flush_unused(); 725 kmap_flush_unused();
677 xen_mc_batch(); 726 xen_mc_batch();
678 } 727 }
679 728
729#ifdef CONFIG_X86_64
730 {
731 pgd_t *user_pgd = xen_get_user_pgd(pgd);
732
733 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
734
735 if (user_pgd) {
736 pin_page(virt_to_page(user_pgd), PT_PGD);
737 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
738 }
739 }
740#else /* CONFIG_X86_32 */
680#ifdef CONFIG_X86_PAE 741#ifdef CONFIG_X86_PAE
681 /* Need to make sure unshared kernel PMD is pinnable */ 742 /* Need to make sure unshared kernel PMD is pinnable */
682 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); 743 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
683#endif 744#endif
684
685 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); 745 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
746#endif /* CONFIG_X86_64 */
686 xen_mc_issue(0); 747 xen_mc_issue(0);
687} 748}
688 749
@@ -763,11 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd)
763 824
764 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 825 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
765 826
827#ifdef CONFIG_X86_64
828 {
829 pgd_t *user_pgd = xen_get_user_pgd(pgd);
830
831 if (user_pgd) {
832 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
833 unpin_page(virt_to_page(user_pgd), PT_PGD);
834 }
835 }
836#endif
837
766#ifdef CONFIG_X86_PAE 838#ifdef CONFIG_X86_PAE
767 /* Need to make sure unshared kernel PMD is unpinned */ 839 /* Need to make sure unshared kernel PMD is unpinned */
768 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); 840 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
769#endif 841#endif
770 pgd_walk(pgd, unpin_page, TASK_SIZE); 842
843 pgd_walk(pgd, unpin_page, USER_LIMIT);
771 844
772 xen_mc_issue(0); 845 xen_mc_issue(0);
773} 846}