diff options
Diffstat (limited to 'drivers/lguest/page_tables.c')
-rw-r--r-- | drivers/lguest/page_tables.c | 260 |
1 files changed, 106 insertions, 154 deletions
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 009c717fda99..1f48f2712f3a 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
@@ -62,20 +62,11 @@ | |||
62 | * will need the last pmd entry of the last pmd page. | 62 | * will need the last pmd entry of the last pmd page. |
63 | */ | 63 | */ |
64 | #ifdef CONFIG_X86_PAE | 64 | #ifdef CONFIG_X86_PAE |
65 | #define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1) | ||
66 | #define CHECK_GPGD_MASK _PAGE_PRESENT | 65 | #define CHECK_GPGD_MASK _PAGE_PRESENT |
67 | #else | 66 | #else |
68 | #define CHECK_GPGD_MASK _PAGE_TABLE | 67 | #define CHECK_GPGD_MASK _PAGE_TABLE |
69 | #endif | 68 | #endif |
70 | 69 | ||
71 | /* | ||
72 | * We actually need a separate PTE page for each CPU. Remember that after the | ||
73 | * Switcher code itself comes two pages for each CPU, and we don't want this | ||
74 | * CPU's guest to see the pages of any other CPU. | ||
75 | */ | ||
76 | static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); | ||
77 | #define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) | ||
78 | |||
79 | /*H:320 | 70 | /*H:320 |
80 | * The page table code is curly enough to need helper functions to keep it | 71 | * The page table code is curly enough to need helper functions to keep it |
81 | * clear and clean. The kernel itself provides many of them; one advantage | 72 | * clear and clean. The kernel itself provides many of them; one advantage |
@@ -714,9 +705,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
714 | int *blank_pgdir) | 705 | int *blank_pgdir) |
715 | { | 706 | { |
716 | unsigned int next; | 707 | unsigned int next; |
717 | #ifdef CONFIG_X86_PAE | ||
718 | pmd_t *pmd_table; | ||
719 | #endif | ||
720 | 708 | ||
721 | /* | 709 | /* |
722 | * We pick one entry at random to throw out. Choosing the Least | 710 | * We pick one entry at random to throw out. Choosing the Least |
@@ -731,29 +719,11 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
731 | if (!cpu->lg->pgdirs[next].pgdir) | 719 | if (!cpu->lg->pgdirs[next].pgdir) |
732 | next = cpu->cpu_pgd; | 720 | next = cpu->cpu_pgd; |
733 | else { | 721 | else { |
734 | #ifdef CONFIG_X86_PAE | ||
735 | /* | 722 | /* |
736 | * In PAE mode, allocate a pmd page and populate the | 723 | * This is a blank page, so there are no kernel |
737 | * last pgd entry. | 724 | * mappings: caller must map the stack! |
738 | */ | 725 | */ |
739 | pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
740 | if (!pmd_table) { | ||
741 | free_page((long)cpu->lg->pgdirs[next].pgdir); | ||
742 | set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0)); | ||
743 | next = cpu->cpu_pgd; | ||
744 | } else { | ||
745 | set_pgd(cpu->lg->pgdirs[next].pgdir + | ||
746 | SWITCHER_PGD_INDEX, | ||
747 | __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | ||
748 | /* | ||
749 | * This is a blank page, so there are no kernel | ||
750 | * mappings: caller must map the stack! | ||
751 | */ | ||
752 | *blank_pgdir = 1; | ||
753 | } | ||
754 | #else | ||
755 | *blank_pgdir = 1; | 726 | *blank_pgdir = 1; |
756 | #endif | ||
757 | } | 727 | } |
758 | } | 728 | } |
759 | /* Record which Guest toplevel this shadows. */ | 729 | /* Record which Guest toplevel this shadows. */ |
@@ -764,6 +734,23 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
764 | return next; | 734 | return next; |
765 | } | 735 | } |
766 | 736 | ||
737 | /*H:501 | ||
738 | * We do need the Switcher code mapped at all times, so we allocate that | ||
739 | * part of the Guest page table here, and populate it when we're about to run | ||
740 | * the guest. | ||
741 | */ | ||
742 | static bool allocate_switcher_mapping(struct lg_cpu *cpu) | ||
743 | { | ||
744 | int i; | ||
745 | |||
746 | for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { | ||
747 | if (!find_spte(cpu, switcher_addr + i * PAGE_SIZE, true, | ||
748 | CHECK_GPGD_MASK, _PAGE_TABLE)) | ||
749 | return false; | ||
750 | } | ||
751 | return true; | ||
752 | } | ||
753 | |||
767 | /*H:470 | 754 | /*H:470 |
768 | * Finally, a routine which throws away everything: all PGD entries in all | 755 | * Finally, a routine which throws away everything: all PGD entries in all |
769 | * the shadow page tables, including the Guest's kernel mappings. This is used | 756 | * the shadow page tables, including the Guest's kernel mappings. This is used |
@@ -774,28 +761,14 @@ static void release_all_pagetables(struct lguest *lg) | |||
774 | unsigned int i, j; | 761 | unsigned int i, j; |
775 | 762 | ||
776 | /* Every shadow pagetable this Guest has */ | 763 | /* Every shadow pagetable this Guest has */ |
777 | for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) | 764 | for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) { |
778 | if (lg->pgdirs[i].pgdir) { | 765 | if (!lg->pgdirs[i].pgdir) |
779 | #ifdef CONFIG_X86_PAE | 766 | continue; |
780 | pgd_t *spgd; | ||
781 | pmd_t *pmdpage; | ||
782 | unsigned int k; | ||
783 | 767 | ||
784 | /* Get the last pmd page. */ | 768 | /* Every PGD entry. */ |
785 | spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX; | 769 | for (j = 0; j < PTRS_PER_PGD; j++) |
786 | pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); | 770 | release_pgd(lg->pgdirs[i].pgdir + j); |
787 | 771 | } | |
788 | /* | ||
789 | * And release the pmd entries of that pmd page, | ||
790 | * except for the switcher pmd. | ||
791 | */ | ||
792 | for (k = 0; k < SWITCHER_PMD_INDEX; k++) | ||
793 | release_pmd(&pmdpage[k]); | ||
794 | #endif | ||
795 | /* Every PGD entry except the Switcher at the top */ | ||
796 | for (j = 0; j < SWITCHER_PGD_INDEX; j++) | ||
797 | release_pgd(lg->pgdirs[i].pgdir + j); | ||
798 | } | ||
799 | } | 772 | } |
800 | 773 | ||
801 | /* | 774 | /* |
@@ -809,6 +782,9 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu) | |||
809 | release_all_pagetables(cpu->lg); | 782 | release_all_pagetables(cpu->lg); |
810 | /* We need the Guest kernel stack mapped again. */ | 783 | /* We need the Guest kernel stack mapped again. */ |
811 | pin_stack_pages(cpu); | 784 | pin_stack_pages(cpu); |
785 | /* And we need Switcher allocated. */ | ||
786 | if (!allocate_switcher_mapping(cpu)) | ||
787 | kill_guest(cpu, "Cannot populate switcher mapping"); | ||
812 | } | 788 | } |
813 | 789 | ||
814 | /*H:430 | 790 | /*H:430 |
@@ -844,9 +820,15 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) | |||
844 | newpgdir = new_pgdir(cpu, pgtable, &repin); | 820 | newpgdir = new_pgdir(cpu, pgtable, &repin); |
845 | /* Change the current pgd index to the new one. */ | 821 | /* Change the current pgd index to the new one. */ |
846 | cpu->cpu_pgd = newpgdir; | 822 | cpu->cpu_pgd = newpgdir; |
847 | /* If it was completely blank, we map in the Guest kernel stack */ | 823 | /* |
824 | * If it was completely blank, we map in the Guest kernel stack and | ||
825 | * the Switcher. | ||
826 | */ | ||
848 | if (repin) | 827 | if (repin) |
849 | pin_stack_pages(cpu); | 828 | pin_stack_pages(cpu); |
829 | |||
830 | if (!allocate_switcher_mapping(cpu)) | ||
831 | kill_guest(cpu, "Cannot populate switcher mapping"); | ||
850 | } | 832 | } |
851 | /*:*/ | 833 | /*:*/ |
852 | 834 | ||
@@ -976,14 +958,23 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx) | |||
976 | { | 958 | { |
977 | int pgdir; | 959 | int pgdir; |
978 | 960 | ||
979 | if (idx >= SWITCHER_PGD_INDEX) | 961 | if (idx > PTRS_PER_PGD) { |
962 | kill_guest(&lg->cpus[0], "Attempt to set pgd %u/%u", | ||
963 | idx, PTRS_PER_PGD); | ||
980 | return; | 964 | return; |
965 | } | ||
981 | 966 | ||
982 | /* If they're talking about a page table we have a shadow for... */ | 967 | /* If they're talking about a page table we have a shadow for... */ |
983 | pgdir = find_pgdir(lg, gpgdir); | 968 | pgdir = find_pgdir(lg, gpgdir); |
984 | if (pgdir < ARRAY_SIZE(lg->pgdirs)) | 969 | if (pgdir < ARRAY_SIZE(lg->pgdirs)) { |
985 | /* ... throw it away. */ | 970 | /* ... throw it away. */ |
986 | release_pgd(lg->pgdirs[pgdir].pgdir + idx); | 971 | release_pgd(lg->pgdirs[pgdir].pgdir + idx); |
972 | /* That might have been the Switcher mapping, remap it. */ | ||
973 | if (!allocate_switcher_mapping(&lg->cpus[0])) { | ||
974 | kill_guest(&lg->cpus[0], | ||
975 | "Cannot populate switcher mapping"); | ||
976 | } | ||
977 | } | ||
987 | } | 978 | } |
988 | 979 | ||
989 | #ifdef CONFIG_X86_PAE | 980 | #ifdef CONFIG_X86_PAE |
@@ -1001,6 +992,9 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx) | |||
1001 | * we will populate on future faults. The Guest doesn't have any actual | 992 | * we will populate on future faults. The Guest doesn't have any actual |
1002 | * pagetables yet, so we set linear_pages to tell demand_page() to fake it | 993 | * pagetables yet, so we set linear_pages to tell demand_page() to fake it |
1003 | * for the moment. | 994 | * for the moment. |
995 | * | ||
996 | * We do need the Switcher to be mapped at all times, so we allocate that | ||
997 | * part of the Guest page table here. | ||
1004 | */ | 998 | */ |
1005 | int init_guest_pagetable(struct lguest *lg) | 999 | int init_guest_pagetable(struct lguest *lg) |
1006 | { | 1000 | { |
@@ -1014,6 +1008,13 @@ int init_guest_pagetable(struct lguest *lg) | |||
1014 | 1008 | ||
1015 | /* We start with a linear mapping until the initialize. */ | 1009 | /* We start with a linear mapping until the initialize. */ |
1016 | cpu->linear_pages = true; | 1010 | cpu->linear_pages = true; |
1011 | |||
1012 | /* Allocate the page tables for the Switcher. */ | ||
1013 | if (!allocate_switcher_mapping(cpu)) { | ||
1014 | release_all_pagetables(lg); | ||
1015 | return -ENOMEM; | ||
1016 | } | ||
1017 | |||
1017 | return 0; | 1018 | return 0; |
1018 | } | 1019 | } |
1019 | 1020 | ||
@@ -1065,91 +1066,68 @@ void free_guest_pagetable(struct lguest *lg) | |||
1065 | * (vi) Mapping the Switcher when the Guest is about to run. | 1066 | * (vi) Mapping the Switcher when the Guest is about to run. |
1066 | * | 1067 | * |
1067 | * The Switcher and the two pages for this CPU need to be visible in the | 1068 | * The Switcher and the two pages for this CPU need to be visible in the |
1068 | * Guest (and not the pages for other CPUs). We have the appropriate PTE pages | 1069 | * Guest (and not the pages for other CPUs). |
1069 | * for each CPU already set up, we just need to hook them in now we know which | 1070 | * |
1070 | * Guest is about to run on this CPU. | 1071 | * The pages have all been allocate |
1071 | */ | 1072 | */ |
1072 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) | 1073 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) |
1073 | { | 1074 | { |
1074 | pte_t *switcher_pte_page = __this_cpu_read(switcher_pte_pages); | 1075 | unsigned long base, i; |
1075 | pte_t regs_pte; | 1076 | struct page *percpu_switcher_page, *regs_page; |
1077 | pte_t *pte; | ||
1076 | 1078 | ||
1077 | #ifdef CONFIG_X86_PAE | 1079 | /* Code page should always be mapped, and executable. */ |
1078 | pmd_t switcher_pmd; | 1080 | pte = find_spte(cpu, switcher_addr, false, 0, 0); |
1079 | pmd_t *pmd_table; | 1081 | get_page(lg_switcher_pages[0]); |
1080 | 1082 | set_pte(pte, mk_pte(lg_switcher_pages[0], PAGE_KERNEL_RX)); | |
1081 | switcher_pmd = pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT, | ||
1082 | PAGE_KERNEL_EXEC); | ||
1083 | |||
1084 | /* Figure out where the pmd page is, by reading the PGD, and converting | ||
1085 | * it to a virtual address. */ | ||
1086 | pmd_table = __va(pgd_pfn(cpu->lg-> | ||
1087 | pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX]) | ||
1088 | << PAGE_SHIFT); | ||
1089 | /* Now write it into the shadow page table. */ | ||
1090 | set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd); | ||
1091 | #else | ||
1092 | pgd_t switcher_pgd; | ||
1093 | 1083 | ||
1094 | /* | 1084 | /* Clear all the Switcher mappings for any other CPUs. */ |
1095 | * Make the last PGD entry for this Guest point to the Switcher's PTE | 1085 | /* FIXME: This is dumb: update only when Host CPU changes. */ |
1096 | * page for this CPU (with appropriate flags). | 1086 | for_each_possible_cpu(i) { |
1097 | */ | 1087 | /* Get location of lguest_pages (indexed by Host CPU) */ |
1098 | switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC); | 1088 | base = switcher_addr + PAGE_SIZE |
1089 | + i * sizeof(struct lguest_pages); | ||
1099 | 1090 | ||
1100 | cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; | 1091 | /* Get shadow PTE for first page (where we put guest regs). */ |
1092 | pte = find_spte(cpu, base, false, 0, 0); | ||
1093 | set_pte(pte, __pte(0)); | ||
1094 | |||
1095 | /* This is where we put R/O state. */ | ||
1096 | pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0); | ||
1097 | set_pte(pte, __pte(0)); | ||
1098 | } | ||
1101 | 1099 | ||
1102 | #endif | ||
1103 | /* | 1100 | /* |
1104 | * We also change the Switcher PTE page. When we're running the Guest, | 1101 | * When we're running the Guest, we want the Guest's "regs" page to |
1105 | * we want the Guest's "regs" page to appear where the first Switcher | 1102 | * appear where the first Switcher page for this CPU is. This is an |
1106 | * page for this CPU is. This is an optimization: when the Switcher | 1103 | * optimization: when the Switcher saves the Guest registers, it saves |
1107 | * saves the Guest registers, it saves them into the first page of this | 1104 | * them into the first page of this CPU's "struct lguest_pages": if we |
1108 | * CPU's "struct lguest_pages": if we make sure the Guest's register | 1105 | * make sure the Guest's register page is already mapped there, we |
1109 | * page is already mapped there, we don't have to copy them out | 1106 | * don't have to copy them out again. |
1110 | * again. | ||
1111 | */ | 1107 | */ |
1112 | regs_pte = pfn_pte(__pa(cpu->regs_page) >> PAGE_SHIFT, PAGE_KERNEL); | 1108 | /* Find the shadow PTE for this regs page. */ |
1113 | set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], regs_pte); | 1109 | base = switcher_addr + PAGE_SIZE |
1114 | } | 1110 | + raw_smp_processor_id() * sizeof(struct lguest_pages); |
1115 | /*:*/ | 1111 | pte = find_spte(cpu, base, false, 0, 0); |
1116 | 1112 | regs_page = pfn_to_page(__pa(cpu->regs_page) >> PAGE_SHIFT); | |
1117 | static void free_switcher_pte_pages(void) | 1113 | get_page(regs_page); |
1118 | { | 1114 | set_pte(pte, mk_pte(regs_page, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL))); |
1119 | unsigned int i; | ||
1120 | |||
1121 | for_each_possible_cpu(i) | ||
1122 | free_page((long)switcher_pte_page(i)); | ||
1123 | } | ||
1124 | |||
1125 | /*H:520 | ||
1126 | * Setting up the Switcher PTE page for given CPU is fairly easy, given | ||
1127 | * the CPU number and the "struct page"s for the Switcher and per-cpu pages. | ||
1128 | */ | ||
1129 | static __init void populate_switcher_pte_page(unsigned int cpu, | ||
1130 | struct page *switcher_pages[]) | ||
1131 | { | ||
1132 | pte_t *pte = switcher_pte_page(cpu); | ||
1133 | int i; | ||
1134 | |||
1135 | /* The first entries maps the Switcher code. */ | ||
1136 | set_pte(&pte[0], mk_pte(switcher_pages[0], | ||
1137 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); | ||
1138 | |||
1139 | /* The only other thing we map is this CPU's pair of pages. */ | ||
1140 | i = 1 + cpu*2; | ||
1141 | |||
1142 | /* First page (Guest registers) is writable from the Guest */ | ||
1143 | set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_pages[i]), | ||
1144 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); | ||
1145 | 1115 | ||
1146 | /* | 1116 | /* |
1147 | * The second page contains the "struct lguest_ro_state", and is | 1117 | * We map the second page of the struct lguest_pages read-only in |
1148 | * read-only. | 1118 | * the Guest: the IDT, GDT and other things it's not supposed to |
1119 | * change. | ||
1149 | */ | 1120 | */ |
1150 | set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_pages[i+1]), | 1121 | base += PAGE_SIZE; |
1151 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); | 1122 | pte = find_spte(cpu, base, false, 0, 0); |
1123 | |||
1124 | percpu_switcher_page | ||
1125 | = lg_switcher_pages[1 + raw_smp_processor_id()*2 + 1]; | ||
1126 | get_page(percpu_switcher_page); | ||
1127 | set_pte(pte, mk_pte(percpu_switcher_page, | ||
1128 | __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL))); | ||
1152 | } | 1129 | } |
1130 | /*:*/ | ||
1153 | 1131 | ||
1154 | /* | 1132 | /* |
1155 | * We've made it through the page table code. Perhaps our tired brains are | 1133 | * We've made it through the page table code. Perhaps our tired brains are |
@@ -1163,29 +1141,3 @@ static __init void populate_switcher_pte_page(unsigned int cpu, | |||
1163 | * | 1141 | * |
1164 | * There is just one file remaining in the Host. | 1142 | * There is just one file remaining in the Host. |
1165 | */ | 1143 | */ |
1166 | |||
1167 | /*H:510 | ||
1168 | * At boot or module load time, init_pagetables() allocates and populates | ||
1169 | * the Switcher PTE page for each CPU. | ||
1170 | */ | ||
1171 | __init int init_pagetables(struct page **switcher_pages) | ||
1172 | { | ||
1173 | unsigned int i; | ||
1174 | |||
1175 | for_each_possible_cpu(i) { | ||
1176 | switcher_pte_page(i) = (pte_t *)get_zeroed_page(GFP_KERNEL); | ||
1177 | if (!switcher_pte_page(i)) { | ||
1178 | free_switcher_pte_pages(); | ||
1179 | return -ENOMEM; | ||
1180 | } | ||
1181 | populate_switcher_pte_page(i, switcher_pages); | ||
1182 | } | ||
1183 | return 0; | ||
1184 | } | ||
1185 | /*:*/ | ||
1186 | |||
1187 | /* Cleaning up simply involves freeing the PTE page for each CPU. */ | ||
1188 | void free_pagetables(void) | ||
1189 | { | ||
1190 | free_switcher_pte_pages(); | ||
1191 | } | ||