aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lguest
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2011-07-22 01:09:48 -0400
committerRusty Russell <rusty@rustcorp.com.au>2011-07-22 01:09:48 -0400
commit5dea1c88ed11a1221581c4b202f053c4fc138704 (patch)
tree59e15d3c696712e26ffb229ff987f33bcc72affe /drivers/lguest
parente0377e25206328998d036cafddcd00a7c3252e3e (diff)
lguest: use a special 1:1 linear pagetable mode until first switch.
The Host used to create some page tables for the Guest to use at the top of Guest memory; it would then tell the Guest where this was. In particular, it created linear mappings for 0 and 0xC0000000 addresses because lguest used to switch to its real page tables quite late in boot. However, since d50d8fe19 Linux initialized boot page tables in head_32.S even before the "are we lguest?" boot jump. So, now we can simplify things: the Host pagetable code assumes 1:1 linear mapping until it first calls the LHCALL_NEW_PGTABLE hypercall, which we now do before we reach C code. This also means that the Host doesn't need to know anything about the Guest's PAGE_OFFSET. (Non-Linux guests might not even have such a thing). Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers/lguest')
-rw-r--r--drivers/lguest/lg.h2
-rw-r--r--drivers/lguest/page_tables.c278
2 files changed, 86 insertions, 194 deletions
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 9136411fadd5..295df06e6590 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -59,6 +59,8 @@ struct lg_cpu {
59 59
60 struct lguest_pages *last_pages; 60 struct lguest_pages *last_pages;
61 61
62 /* Initialization mode: linear map everything. */
63 bool linear_pages;
62 int cpu_pgd; /* Which pgd this cpu is currently using */ 64 int cpu_pgd; /* Which pgd this cpu is currently using */
63 65
64 /* If a hypercall was asked for, this points to the arguments. */ 66 /* If a hypercall was asked for, this points to the arguments. */
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index d21578ee95de..00026222bde8 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -17,7 +17,6 @@
17#include <linux/percpu.h> 17#include <linux/percpu.h>
18#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
19#include <asm/uaccess.h> 19#include <asm/uaccess.h>
20#include <asm/bootparam.h>
21#include "lg.h" 20#include "lg.h"
22 21
23/*M:008 22/*M:008
@@ -325,10 +324,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
325#endif 324#endif
326 325
327 /* First step: get the top-level Guest page table entry. */ 326 /* First step: get the top-level Guest page table entry. */
328 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); 327 if (unlikely(cpu->linear_pages)) {
329 /* Toplevel not present? We can't map it in. */ 328 /* Faking up a linear mapping. */
330 if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) 329 gpgd = __pgd(CHECK_GPGD_MASK);
331 return false; 330 } else {
331 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
332 /* Toplevel not present? We can't map it in. */
333 if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
334 return false;
335 }
332 336
333 /* Now look at the matching shadow entry. */ 337 /* Now look at the matching shadow entry. */
334 spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); 338 spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
@@ -353,10 +357,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
353 } 357 }
354 358
355#ifdef CONFIG_X86_PAE 359#ifdef CONFIG_X86_PAE
356 gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t); 360 if (unlikely(cpu->linear_pages)) {
357 /* Middle level not present? We can't map it in. */ 361 /* Faking up a linear mapping. */
358 if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) 362 gpmd = __pmd(_PAGE_TABLE);
359 return false; 363 } else {
364 gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
365 /* Middle level not present? We can't map it in. */
366 if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
367 return false;
368 }
360 369
361 /* Now look at the matching shadow entry. */ 370 /* Now look at the matching shadow entry. */
362 spmd = spmd_addr(cpu, *spgd, vaddr); 371 spmd = spmd_addr(cpu, *spgd, vaddr);
@@ -397,8 +406,13 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
397 gpte_ptr = gpte_addr(cpu, gpgd, vaddr); 406 gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
398#endif 407#endif
399 408
400 /* Read the actual PTE value. */ 409 if (unlikely(cpu->linear_pages)) {
401 gpte = lgread(cpu, gpte_ptr, pte_t); 410 /* Linear? Make up a PTE which points to same page. */
411 gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
412 } else {
413 /* Read the actual PTE value. */
414 gpte = lgread(cpu, gpte_ptr, pte_t);
415 }
402 416
403 /* If this page isn't in the Guest page tables, we can't page it in. */ 417 /* If this page isn't in the Guest page tables, we can't page it in. */
404 if (!(pte_flags(gpte) & _PAGE_PRESENT)) 418 if (!(pte_flags(gpte) & _PAGE_PRESENT))
@@ -454,7 +468,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
454 * Finally, we write the Guest PTE entry back: we've set the 468 * Finally, we write the Guest PTE entry back: we've set the
455 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. 469 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
456 */ 470 */
457 lgwrite(cpu, gpte_ptr, pte_t, gpte); 471 if (likely(!cpu->linear_pages))
472 lgwrite(cpu, gpte_ptr, pte_t, gpte);
458 473
459 /* 474 /*
460 * The fault is fixed, the page table is populated, the mapping 475 * The fault is fixed, the page table is populated, the mapping
@@ -612,6 +627,11 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
612#ifdef CONFIG_X86_PAE 627#ifdef CONFIG_X86_PAE
613 pmd_t gpmd; 628 pmd_t gpmd;
614#endif 629#endif
630
631 /* Still not set up? Just map 1:1. */
632 if (unlikely(cpu->linear_pages))
633 return vaddr;
634
615 /* First step: get the top-level Guest page table entry. */ 635 /* First step: get the top-level Guest page table entry. */
616 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); 636 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
617 /* Toplevel not present? We can't map it in. */ 637 /* Toplevel not present? We can't map it in. */
@@ -708,32 +728,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
708 return next; 728 return next;
709} 729}
710 730
711/*H:430
712 * (iv) Switching page tables
713 *
714 * Now we've seen all the page table setting and manipulation, let's see
715 * what happens when the Guest changes page tables (ie. changes the top-level
716 * pgdir). This occurs on almost every context switch.
717 */
718void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
719{
720 int newpgdir, repin = 0;
721
722 /* Look to see if we have this one already. */
723 newpgdir = find_pgdir(cpu->lg, pgtable);
724 /*
725 * If not, we allocate or mug an existing one: if it's a fresh one,
726 * repin gets set to 1.
727 */
728 if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
729 newpgdir = new_pgdir(cpu, pgtable, &repin);
730 /* Change the current pgd index to the new one. */
731 cpu->cpu_pgd = newpgdir;
732 /* If it was completely blank, we map in the Guest kernel stack */
733 if (repin)
734 pin_stack_pages(cpu);
735}
736
737/*H:470 731/*H:470
738 * Finally, a routine which throws away everything: all PGD entries in all 732 * Finally, a routine which throws away everything: all PGD entries in all
739 * the shadow page tables, including the Guest's kernel mappings. This is used 733 * the shadow page tables, including the Guest's kernel mappings. This is used
@@ -780,6 +774,44 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu)
780 /* We need the Guest kernel stack mapped again. */ 774 /* We need the Guest kernel stack mapped again. */
781 pin_stack_pages(cpu); 775 pin_stack_pages(cpu);
782} 776}
777
778/*H:430
779 * (iv) Switching page tables
780 *
781 * Now we've seen all the page table setting and manipulation, let's see
782 * what happens when the Guest changes page tables (ie. changes the top-level
783 * pgdir). This occurs on almost every context switch.
784 */
785void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
786{
787 int newpgdir, repin = 0;
788
789 /*
790 * The very first time they call this, we're actually running without
791 * any page tables; we've been making it up. Throw them away now.
792 */
793 if (unlikely(cpu->linear_pages)) {
794 release_all_pagetables(cpu->lg);
795 cpu->linear_pages = false;
796 /* Force allocation of a new pgdir. */
797 newpgdir = ARRAY_SIZE(cpu->lg->pgdirs);
798 } else {
799 /* Look to see if we have this one already. */
800 newpgdir = find_pgdir(cpu->lg, pgtable);
801 }
802
803 /*
804 * If not, we allocate or mug an existing one: if it's a fresh one,
805 * repin gets set to 1.
806 */
807 if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
808 newpgdir = new_pgdir(cpu, pgtable, &repin);
809 /* Change the current pgd index to the new one. */
810 cpu->cpu_pgd = newpgdir;
811 /* If it was completely blank, we map in the Guest kernel stack */
812 if (repin)
813 pin_stack_pages(cpu);
814}
783/*:*/ 815/*:*/
784 816
785/*M:009 817/*M:009
@@ -919,168 +951,26 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
919} 951}
920#endif 952#endif
921 953
922/*H:505
923 * To get through boot, we construct simple identity page mappings (which
924 * set virtual == physical) and linear mappings which will get the Guest far
925 * enough into the boot to create its own. The linear mapping means we
926 * simplify the Guest boot, but it makes assumptions about their PAGE_OFFSET,
927 * as you'll see.
928 *
929 * We lay them out of the way, just below the initrd (which is why we need to
930 * know its size here).
931 */
932static unsigned long setup_pagetables(struct lguest *lg,
933 unsigned long mem,
934 unsigned long initrd_size)
935{
936 pgd_t __user *pgdir;
937 pte_t __user *linear;
938 unsigned long mem_base = (unsigned long)lg->mem_base;
939 unsigned int mapped_pages, i, linear_pages;
940#ifdef CONFIG_X86_PAE
941 pmd_t __user *pmds;
942 unsigned int j;
943 pgd_t pgd;
944 pmd_t pmd;
945#else
946 unsigned int phys_linear;
947#endif
948
949 /*
950 * We have mapped_pages frames to map, so we need linear_pages page
951 * tables to map them.
952 */
953 mapped_pages = mem / PAGE_SIZE;
954 linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE;
955
956 /* We put the toplevel page directory page at the top of memory. */
957 pgdir = (pgd_t *)(mem + mem_base - initrd_size - PAGE_SIZE);
958
959 /* Now we use the next linear_pages pages as pte pages */
960 linear = (void *)pgdir - linear_pages * PAGE_SIZE;
961
962#ifdef CONFIG_X86_PAE
963 /*
964 * And the single mid page goes below that. We only use one, but
965 * that's enough to map 1G, which definitely gets us through boot.
966 */
967 pmds = (void *)linear - PAGE_SIZE;
968#endif
969 /*
970 * Linear mapping is easy: put every page's address into the
971 * mapping in order.
972 */
973 for (i = 0; i < mapped_pages; i++) {
974 pte_t pte;
975 pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER));
976 if (copy_to_user(&linear[i], &pte, sizeof(pte)) != 0)
977 return -EFAULT;
978 }
979
980#ifdef CONFIG_X86_PAE
981 /*
982 * Make the Guest PMD entries point to the corresponding place in the
983 * linear mapping (up to one page worth of PMD).
984 */
985 for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
986 i += PTRS_PER_PTE, j++) {
987 pmd = pfn_pmd(((unsigned long)&linear[i] - mem_base)/PAGE_SIZE,
988 __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
989
990 if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
991 return -EFAULT;
992 }
993
994 /* One PGD entry, pointing to that PMD page. */
995 pgd = __pgd(((unsigned long)pmds - mem_base) | _PAGE_PRESENT);
996 /* Copy it in as the first PGD entry (ie. addresses 0-1G). */
997 if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
998 return -EFAULT;
999 /*
1000 * And the other PGD entry to make the linear mapping at PAGE_OFFSET
1001 */
1002 if (copy_to_user(&pgdir[KERNEL_PGD_BOUNDARY], &pgd, sizeof(pgd)))
1003 return -EFAULT;
1004#else
1005 /*
1006 * The top level points to the linear page table pages above.
1007 * We setup the identity and linear mappings here.
1008 */
1009 phys_linear = (unsigned long)linear - mem_base;
1010 for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
1011 pgd_t pgd;
1012 /*
1013 * Create a PGD entry which points to the right part of the
1014 * linear PTE pages.
1015 */
1016 pgd = __pgd((phys_linear + i * sizeof(pte_t)) |
1017 (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
1018
1019 /*
1020 * Copy it into the PGD page at 0 and PAGE_OFFSET.
1021 */
1022 if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd))
1023 || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET)
1024 + i / PTRS_PER_PTE],
1025 &pgd, sizeof(pgd)))
1026 return -EFAULT;
1027 }
1028#endif
1029
1030 /*
1031 * We return the top level (guest-physical) address: we remember where
1032 * this is to write it into lguest_data when the Guest initializes.
1033 */
1034 return (unsigned long)pgdir - mem_base;
1035}
1036
1037/*H:500 954/*H:500
1038 * (vii) Setting up the page tables initially. 955 * (vii) Setting up the page tables initially.
1039 * 956 *
1040 * When a Guest is first created, the Launcher tells us where the toplevel of 957 * When a Guest is first created, set initialize a shadow page table which
1041 * its first page table is. We set some things up here: 958 * we will populate on future faults. The Guest doesn't have any actual
959 * pagetables yet, so we set linear_pages to tell demand_page() to fake it
960 * for the moment.
1042 */ 961 */
1043int init_guest_pagetable(struct lguest *lg) 962int init_guest_pagetable(struct lguest *lg)
1044{ 963{
1045 u64 mem; 964 struct lg_cpu *cpu = &lg->cpus[0];
1046 u32 initrd_size; 965 int allocated = 0;
1047 struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
1048#ifdef CONFIG_X86_PAE
1049 pgd_t *pgd;
1050 pmd_t *pmd_table;
1051#endif
1052 /*
1053 * Get the Guest memory size and the ramdisk size from the boot header
1054 * located at lg->mem_base (Guest address 0).
1055 */
1056 if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
1057 || get_user(initrd_size, &boot->hdr.ramdisk_size))
1058 return -EFAULT;
1059 966
1060 /* 967 /* lg (and lg->cpus[]) starts zeroed: this allocates a new pgdir */
1061 * We start on the first shadow page table, and give it a blank PGD 968 cpu->cpu_pgd = new_pgdir(cpu, 0, &allocated);
1062 * page. 969 if (!allocated)
1063 */
1064 lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size);
1065 if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir))
1066 return lg->pgdirs[0].gpgdir;
1067 lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
1068 if (!lg->pgdirs[0].pgdir)
1069 return -ENOMEM; 970 return -ENOMEM;
1070 971
1071#ifdef CONFIG_X86_PAE 972 /* We start with a linear mapping until the initialize. */
1072 /* For PAE, we also create the initial mid-level. */ 973 cpu->linear_pages = true;
1073 pgd = lg->pgdirs[0].pgdir;
1074 pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
1075 if (!pmd_table)
1076 return -ENOMEM;
1077
1078 set_pgd(pgd + SWITCHER_PGD_INDEX,
1079 __pgd(__pa(pmd_table) | _PAGE_PRESENT));
1080#endif
1081
1082 /* This is the current page table. */
1083 lg->cpus[0].cpu_pgd = 0;
1084 return 0; 974 return 0;
1085} 975}
1086 976
@@ -1095,10 +985,10 @@ void page_table_guest_data_init(struct lg_cpu *cpu)
1095 * of virtual addresses used by the Switcher. 985 * of virtual addresses used by the Switcher.
1096 */ 986 */
1097 || put_user(RESERVE_MEM * 1024 * 1024, 987 || put_user(RESERVE_MEM * 1024 * 1024,
1098 &cpu->lg->lguest_data->reserve_mem) 988 &cpu->lg->lguest_data->reserve_mem)) {
1099 || put_user(cpu->lg->pgdirs[0].gpgdir,
1100 &cpu->lg->lguest_data->pgdir))
1101 kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); 989 kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
990 return;
991 }
1102 992
1103 /* 993 /*
1104 * In flush_user_mappings() we loop from 0 to 994 * In flush_user_mappings() we loop from 0 to