aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lguest
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2013-04-22 00:40:40 -0400
committerRusty Russell <rusty@rustcorp.com.au>2013-04-22 02:15:01 -0400
commit3412b6ae2924e068f9932f841bdea0f2d8424502 (patch)
tree75b499710ee82715c864d5787383ad35a9cd47bd /drivers/lguest
parentf1f394b1c33d93416c90f97e201d4d386c04af55 (diff)
lguest: don't share Switcher PTE pages between guests.
We currently use the whole top PGD entry for the switcher, so we simply share a fixed page of PTEs between all guests (actually, it's one per Host CPU, to ensure isolation between guests). Changes to a scheme where every guest has its own mappings. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers/lguest')
-rw-r--r--drivers/lguest/core.c10
-rw-r--r--drivers/lguest/lg.h3
-rw-r--r--drivers/lguest/page_tables.c260
3 files changed, 107 insertions, 166 deletions
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index b6c71c32308c..7e1d7ee36478 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -333,15 +333,10 @@ static int __init init(void)
333 if (err) 333 if (err)
334 goto out; 334 goto out;
335 335
336 /* Now we set up the pagetable implementation for the Guests. */
337 err = init_pagetables(lg_switcher_pages);
338 if (err)
339 goto unmap;
340
341 /* We might need to reserve an interrupt vector. */ 336 /* We might need to reserve an interrupt vector. */
342 err = init_interrupts(); 337 err = init_interrupts();
343 if (err) 338 if (err)
344 goto free_pgtables; 339 goto unmap;
345 340
346 /* /dev/lguest needs to be registered. */ 341 /* /dev/lguest needs to be registered. */
347 err = lguest_device_init(); 342 err = lguest_device_init();
@@ -356,8 +351,6 @@ static int __init init(void)
356 351
357free_interrupts: 352free_interrupts:
358 free_interrupts(); 353 free_interrupts();
359free_pgtables:
360 free_pagetables();
361unmap: 354unmap:
362 unmap_switcher(); 355 unmap_switcher();
363out: 356out:
@@ -369,7 +362,6 @@ static void __exit fini(void)
369{ 362{
370 lguest_device_remove(); 363 lguest_device_remove();
371 free_interrupts(); 364 free_interrupts();
372 free_pagetables();
373 unmap_switcher(); 365 unmap_switcher();
374 366
375 lguest_arch_host_fini(); 367 lguest_arch_host_fini();
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 9a345efa83e4..faac9fc6db22 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -14,9 +14,6 @@
14 14
15#include <asm/lguest.h> 15#include <asm/lguest.h>
16 16
17void free_pagetables(void);
18int init_pagetables(struct page **switcher_pages);
19
20struct pgdir { 17struct pgdir {
21 unsigned long gpgdir; 18 unsigned long gpgdir;
22 pgd_t *pgdir; 19 pgd_t *pgdir;
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 009c717fda99..1f48f2712f3a 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -62,20 +62,11 @@
62 * will need the last pmd entry of the last pmd page. 62 * will need the last pmd entry of the last pmd page.
63 */ 63 */
64#ifdef CONFIG_X86_PAE 64#ifdef CONFIG_X86_PAE
65#define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1)
66#define CHECK_GPGD_MASK _PAGE_PRESENT 65#define CHECK_GPGD_MASK _PAGE_PRESENT
67#else 66#else
68#define CHECK_GPGD_MASK _PAGE_TABLE 67#define CHECK_GPGD_MASK _PAGE_TABLE
69#endif 68#endif
70 69
71/*
72 * We actually need a separate PTE page for each CPU. Remember that after the
73 * Switcher code itself comes two pages for each CPU, and we don't want this
74 * CPU's guest to see the pages of any other CPU.
75 */
76static DEFINE_PER_CPU(pte_t *, switcher_pte_pages);
77#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)
78
79/*H:320 70/*H:320
80 * The page table code is curly enough to need helper functions to keep it 71 * The page table code is curly enough to need helper functions to keep it
81 * clear and clean. The kernel itself provides many of them; one advantage 72 * clear and clean. The kernel itself provides many of them; one advantage
@@ -714,9 +705,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
714 int *blank_pgdir) 705 int *blank_pgdir)
715{ 706{
716 unsigned int next; 707 unsigned int next;
717#ifdef CONFIG_X86_PAE
718 pmd_t *pmd_table;
719#endif
720 708
721 /* 709 /*
722 * We pick one entry at random to throw out. Choosing the Least 710 * We pick one entry at random to throw out. Choosing the Least
@@ -731,29 +719,11 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
731 if (!cpu->lg->pgdirs[next].pgdir) 719 if (!cpu->lg->pgdirs[next].pgdir)
732 next = cpu->cpu_pgd; 720 next = cpu->cpu_pgd;
733 else { 721 else {
734#ifdef CONFIG_X86_PAE
735 /* 722 /*
736 * In PAE mode, allocate a pmd page and populate the 723 * This is a blank page, so there are no kernel
737 * last pgd entry. 724 * mappings: caller must map the stack!
738 */ 725 */
739 pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL);
740 if (!pmd_table) {
741 free_page((long)cpu->lg->pgdirs[next].pgdir);
742 set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0));
743 next = cpu->cpu_pgd;
744 } else {
745 set_pgd(cpu->lg->pgdirs[next].pgdir +
746 SWITCHER_PGD_INDEX,
747 __pgd(__pa(pmd_table) | _PAGE_PRESENT));
748 /*
749 * This is a blank page, so there are no kernel
750 * mappings: caller must map the stack!
751 */
752 *blank_pgdir = 1;
753 }
754#else
755 *blank_pgdir = 1; 726 *blank_pgdir = 1;
756#endif
757 } 727 }
758 } 728 }
759 /* Record which Guest toplevel this shadows. */ 729 /* Record which Guest toplevel this shadows. */
@@ -764,6 +734,23 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
764 return next; 734 return next;
765} 735}
766 736
737/*H:501
738 * We do need the Switcher code mapped at all times, so we allocate that
739 * part of the Guest page table here, and populate it when we're about to run
740 * the guest.
741 */
742static bool allocate_switcher_mapping(struct lg_cpu *cpu)
743{
744 int i;
745
746 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
747 if (!find_spte(cpu, switcher_addr + i * PAGE_SIZE, true,
748 CHECK_GPGD_MASK, _PAGE_TABLE))
749 return false;
750 }
751 return true;
752}
753
767/*H:470 754/*H:470
768 * Finally, a routine which throws away everything: all PGD entries in all 755 * Finally, a routine which throws away everything: all PGD entries in all
769 * the shadow page tables, including the Guest's kernel mappings. This is used 756 * the shadow page tables, including the Guest's kernel mappings. This is used
@@ -774,28 +761,14 @@ static void release_all_pagetables(struct lguest *lg)
774 unsigned int i, j; 761 unsigned int i, j;
775 762
776 /* Every shadow pagetable this Guest has */ 763 /* Every shadow pagetable this Guest has */
777 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 764 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) {
778 if (lg->pgdirs[i].pgdir) { 765 if (!lg->pgdirs[i].pgdir)
779#ifdef CONFIG_X86_PAE 766 continue;
780 pgd_t *spgd;
781 pmd_t *pmdpage;
782 unsigned int k;
783 767
784 /* Get the last pmd page. */ 768 /* Every PGD entry. */
785 spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX; 769 for (j = 0; j < PTRS_PER_PGD; j++)
786 pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); 770 release_pgd(lg->pgdirs[i].pgdir + j);
787 771 }
788 /*
789 * And release the pmd entries of that pmd page,
790 * except for the switcher pmd.
791 */
792 for (k = 0; k < SWITCHER_PMD_INDEX; k++)
793 release_pmd(&pmdpage[k]);
794#endif
795 /* Every PGD entry except the Switcher at the top */
796 for (j = 0; j < SWITCHER_PGD_INDEX; j++)
797 release_pgd(lg->pgdirs[i].pgdir + j);
798 }
799} 772}
800 773
801/* 774/*
@@ -809,6 +782,9 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu)
809 release_all_pagetables(cpu->lg); 782 release_all_pagetables(cpu->lg);
810 /* We need the Guest kernel stack mapped again. */ 783 /* We need the Guest kernel stack mapped again. */
811 pin_stack_pages(cpu); 784 pin_stack_pages(cpu);
785 /* And we need Switcher allocated. */
786 if (!allocate_switcher_mapping(cpu))
787 kill_guest(cpu, "Cannot populate switcher mapping");
812} 788}
813 789
814/*H:430 790/*H:430
@@ -844,9 +820,15 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
844 newpgdir = new_pgdir(cpu, pgtable, &repin); 820 newpgdir = new_pgdir(cpu, pgtable, &repin);
845 /* Change the current pgd index to the new one. */ 821 /* Change the current pgd index to the new one. */
846 cpu->cpu_pgd = newpgdir; 822 cpu->cpu_pgd = newpgdir;
847 /* If it was completely blank, we map in the Guest kernel stack */ 823 /*
824 * If it was completely blank, we map in the Guest kernel stack and
825 * the Switcher.
826 */
848 if (repin) 827 if (repin)
849 pin_stack_pages(cpu); 828 pin_stack_pages(cpu);
829
830 if (!allocate_switcher_mapping(cpu))
831 kill_guest(cpu, "Cannot populate switcher mapping");
850} 832}
851/*:*/ 833/*:*/
852 834
@@ -976,14 +958,23 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
976{ 958{
977 int pgdir; 959 int pgdir;
978 960
979 if (idx >= SWITCHER_PGD_INDEX) 961 if (idx > PTRS_PER_PGD) {
962 kill_guest(&lg->cpus[0], "Attempt to set pgd %u/%u",
963 idx, PTRS_PER_PGD);
980 return; 964 return;
965 }
981 966
982 /* If they're talking about a page table we have a shadow for... */ 967 /* If they're talking about a page table we have a shadow for... */
983 pgdir = find_pgdir(lg, gpgdir); 968 pgdir = find_pgdir(lg, gpgdir);
984 if (pgdir < ARRAY_SIZE(lg->pgdirs)) 969 if (pgdir < ARRAY_SIZE(lg->pgdirs)) {
985 /* ... throw it away. */ 970 /* ... throw it away. */
986 release_pgd(lg->pgdirs[pgdir].pgdir + idx); 971 release_pgd(lg->pgdirs[pgdir].pgdir + idx);
972 /* That might have been the Switcher mapping, remap it. */
973 if (!allocate_switcher_mapping(&lg->cpus[0])) {
974 kill_guest(&lg->cpus[0],
975 "Cannot populate switcher mapping");
976 }
977 }
987} 978}
988 979
989#ifdef CONFIG_X86_PAE 980#ifdef CONFIG_X86_PAE
@@ -1001,6 +992,9 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
1001 * we will populate on future faults. The Guest doesn't have any actual 992 * we will populate on future faults. The Guest doesn't have any actual
1002 * pagetables yet, so we set linear_pages to tell demand_page() to fake it 993 * pagetables yet, so we set linear_pages to tell demand_page() to fake it
1003 * for the moment. 994 * for the moment.
995 *
996 * We do need the Switcher to be mapped at all times, so we allocate that
997 * part of the Guest page table here.
1004 */ 998 */
1005int init_guest_pagetable(struct lguest *lg) 999int init_guest_pagetable(struct lguest *lg)
1006{ 1000{
@@ -1014,6 +1008,13 @@ int init_guest_pagetable(struct lguest *lg)
1014 1008
1015 /* We start with a linear mapping until the initialize. */ 1009 /* We start with a linear mapping until the initialize. */
1016 cpu->linear_pages = true; 1010 cpu->linear_pages = true;
1011
1012 /* Allocate the page tables for the Switcher. */
1013 if (!allocate_switcher_mapping(cpu)) {
1014 release_all_pagetables(lg);
1015 return -ENOMEM;
1016 }
1017
1017 return 0; 1018 return 0;
1018} 1019}
1019 1020
@@ -1065,91 +1066,68 @@ void free_guest_pagetable(struct lguest *lg)
1065 * (vi) Mapping the Switcher when the Guest is about to run. 1066 * (vi) Mapping the Switcher when the Guest is about to run.
1066 * 1067 *
1067 * The Switcher and the two pages for this CPU need to be visible in the 1068 * The Switcher and the two pages for this CPU need to be visible in the
1068 * Guest (and not the pages for other CPUs). We have the appropriate PTE pages 1069 * Guest (and not the pages for other CPUs).
1069 * for each CPU already set up, we just need to hook them in now we know which 1070 *
1070 * Guest is about to run on this CPU. 1071 * The pages have all been allocate
1071 */ 1072 */
1072void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) 1073void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
1073{ 1074{
1074 pte_t *switcher_pte_page = __this_cpu_read(switcher_pte_pages); 1075 unsigned long base, i;
1075 pte_t regs_pte; 1076 struct page *percpu_switcher_page, *regs_page;
1077 pte_t *pte;
1076 1078
1077#ifdef CONFIG_X86_PAE 1079 /* Code page should always be mapped, and executable. */
1078 pmd_t switcher_pmd; 1080 pte = find_spte(cpu, switcher_addr, false, 0, 0);
1079 pmd_t *pmd_table; 1081 get_page(lg_switcher_pages[0]);
1080 1082 set_pte(pte, mk_pte(lg_switcher_pages[0], PAGE_KERNEL_RX));
1081 switcher_pmd = pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT,
1082 PAGE_KERNEL_EXEC);
1083
1084 /* Figure out where the pmd page is, by reading the PGD, and converting
1085 * it to a virtual address. */
1086 pmd_table = __va(pgd_pfn(cpu->lg->
1087 pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
1088 << PAGE_SHIFT);
1089 /* Now write it into the shadow page table. */
1090 set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
1091#else
1092 pgd_t switcher_pgd;
1093 1083
1094 /* 1084 /* Clear all the Switcher mappings for any other CPUs. */
1095 * Make the last PGD entry for this Guest point to the Switcher's PTE 1085 /* FIXME: This is dumb: update only when Host CPU changes. */
1096 * page for this CPU (with appropriate flags). 1086 for_each_possible_cpu(i) {
1097 */ 1087 /* Get location of lguest_pages (indexed by Host CPU) */
1098 switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC); 1088 base = switcher_addr + PAGE_SIZE
1089 + i * sizeof(struct lguest_pages);
1099 1090
1100 cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; 1091 /* Get shadow PTE for first page (where we put guest regs). */
1092 pte = find_spte(cpu, base, false, 0, 0);
1093 set_pte(pte, __pte(0));
1094
1095 /* This is where we put R/O state. */
1096 pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0);
1097 set_pte(pte, __pte(0));
1098 }
1101 1099
1102#endif
1103 /* 1100 /*
1104 * We also change the Switcher PTE page. When we're running the Guest, 1101 * When we're running the Guest, we want the Guest's "regs" page to
1105 * we want the Guest's "regs" page to appear where the first Switcher 1102 * appear where the first Switcher page for this CPU is. This is an
1106 * page for this CPU is. This is an optimization: when the Switcher 1103 * optimization: when the Switcher saves the Guest registers, it saves
1107 * saves the Guest registers, it saves them into the first page of this 1104 * them into the first page of this CPU's "struct lguest_pages": if we
1108 * CPU's "struct lguest_pages": if we make sure the Guest's register 1105 * make sure the Guest's register page is already mapped there, we
1109 * page is already mapped there, we don't have to copy them out 1106 * don't have to copy them out again.
1110 * again.
1111 */ 1107 */
1112 regs_pte = pfn_pte(__pa(cpu->regs_page) >> PAGE_SHIFT, PAGE_KERNEL); 1108 /* Find the shadow PTE for this regs page. */
1113 set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], regs_pte); 1109 base = switcher_addr + PAGE_SIZE
1114} 1110 + raw_smp_processor_id() * sizeof(struct lguest_pages);
1115/*:*/ 1111 pte = find_spte(cpu, base, false, 0, 0);
1116 1112 regs_page = pfn_to_page(__pa(cpu->regs_page) >> PAGE_SHIFT);
1117static void free_switcher_pte_pages(void) 1113 get_page(regs_page);
1118{ 1114 set_pte(pte, mk_pte(regs_page, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL)));
1119 unsigned int i;
1120
1121 for_each_possible_cpu(i)
1122 free_page((long)switcher_pte_page(i));
1123}
1124
1125/*H:520
1126 * Setting up the Switcher PTE page for given CPU is fairly easy, given
1127 * the CPU number and the "struct page"s for the Switcher and per-cpu pages.
1128 */
1129static __init void populate_switcher_pte_page(unsigned int cpu,
1130 struct page *switcher_pages[])
1131{
1132 pte_t *pte = switcher_pte_page(cpu);
1133 int i;
1134
1135 /* The first entries maps the Switcher code. */
1136 set_pte(&pte[0], mk_pte(switcher_pages[0],
1137 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
1138
1139 /* The only other thing we map is this CPU's pair of pages. */
1140 i = 1 + cpu*2;
1141
1142 /* First page (Guest registers) is writable from the Guest */
1143 set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_pages[i]),
1144 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
1145 1115
1146 /* 1116 /*
1147 * The second page contains the "struct lguest_ro_state", and is 1117 * We map the second page of the struct lguest_pages read-only in
1148 * read-only. 1118 * the Guest: the IDT, GDT and other things it's not supposed to
1119 * change.
1149 */ 1120 */
1150 set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_pages[i+1]), 1121 base += PAGE_SIZE;
1151 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); 1122 pte = find_spte(cpu, base, false, 0, 0);
1123
1124 percpu_switcher_page
1125 = lg_switcher_pages[1 + raw_smp_processor_id()*2 + 1];
1126 get_page(percpu_switcher_page);
1127 set_pte(pte, mk_pte(percpu_switcher_page,
1128 __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL)));
1152} 1129}
1130/*:*/
1153 1131
1154/* 1132/*
1155 * We've made it through the page table code. Perhaps our tired brains are 1133 * We've made it through the page table code. Perhaps our tired brains are
@@ -1163,29 +1141,3 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
1163 * 1141 *
1164 * There is just one file remaining in the Host. 1142 * There is just one file remaining in the Host.
1165 */ 1143 */
1166
1167/*H:510
1168 * At boot or module load time, init_pagetables() allocates and populates
1169 * the Switcher PTE page for each CPU.
1170 */
1171__init int init_pagetables(struct page **switcher_pages)
1172{
1173 unsigned int i;
1174
1175 for_each_possible_cpu(i) {
1176 switcher_pte_page(i) = (pte_t *)get_zeroed_page(GFP_KERNEL);
1177 if (!switcher_pte_page(i)) {
1178 free_switcher_pte_pages();
1179 return -ENOMEM;
1180 }
1181 populate_switcher_pte_page(i, switcher_pages);
1182 }
1183 return 0;
1184}
1185/*:*/
1186
1187/* Cleaning up simply involves freeing the PTE page for each CPU. */
1188void free_pagetables(void)
1189{
1190 free_switcher_pte_pages();
1191}