diff options
Diffstat (limited to 'drivers/lguest/segments.c')
| -rw-r--r-- | drivers/lguest/segments.c | 106 | 
1 files changed, 69 insertions, 37 deletions
| diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 482ed5a18750..951c57b0a7e0 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | /*P:600 The x86 architecture has segments, which involve a table of descriptors | 1 | /*P:600 | 
| 2 | * The x86 architecture has segments, which involve a table of descriptors | ||
| 2 | * which can be used to do funky things with virtual address interpretation. | 3 | * which can be used to do funky things with virtual address interpretation. | 
| 3 | * We originally used to use segments so the Guest couldn't alter the | 4 | * We originally used to use segments so the Guest couldn't alter the | 
| 4 | * Guest<->Host Switcher, and then we had to trim Guest segments, and restore | 5 | * Guest<->Host Switcher, and then we had to trim Guest segments, and restore | 
| @@ -8,7 +9,8 @@ | |||
| 8 | * | 9 | * | 
| 9 | * In these modern times, the segment handling code consists of simple sanity | 10 | * In these modern times, the segment handling code consists of simple sanity | 
| 10 | * checks, and the worst you'll experience reading this code is butterfly-rash | 11 | * checks, and the worst you'll experience reading this code is butterfly-rash | 
| 11 | * from frolicking through its parklike serenity. :*/ | 12 | * from frolicking through its parklike serenity. | 
| 13 | :*/ | ||
| 12 | #include "lg.h" | 14 | #include "lg.h" | 
| 13 | 15 | ||
| 14 | /*H:600 | 16 | /*H:600 | 
| @@ -41,10 +43,12 @@ | |||
| 41 | * begin. | 43 | * begin. | 
| 42 | */ | 44 | */ | 
| 43 | 45 | ||
| 44 | /* There are several entries we don't let the Guest set. The TSS entry is the | 46 | /* | 
| 47 | * There are several entries we don't let the Guest set. The TSS entry is the | ||
| 45 | * "Task State Segment" which controls all kinds of delicate things. The | 48 | * "Task State Segment" which controls all kinds of delicate things. The | 
| 46 | * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the | 49 | * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the | 
| 47 | * the Guest can't be trusted to deal with double faults. */ | 50 | * the Guest can't be trusted to deal with double faults. | 
| 51 | */ | ||
| 48 | static bool ignored_gdt(unsigned int num) | 52 | static bool ignored_gdt(unsigned int num) | 
| 49 | { | 53 | { | 
| 50 | return (num == GDT_ENTRY_TSS | 54 | return (num == GDT_ENTRY_TSS | 
| @@ -53,42 +57,52 @@ static bool ignored_gdt(unsigned int num) | |||
| 53 | || num == GDT_ENTRY_DOUBLEFAULT_TSS); | 57 | || num == GDT_ENTRY_DOUBLEFAULT_TSS); | 
| 54 | } | 58 | } | 
| 55 | 59 | ||
| 56 | /*H:630 Once the Guest gave us new GDT entries, we fix them up a little. We | 60 | /*H:630 | 
| 61 | * Once the Guest gave us new GDT entries, we fix them up a little. We | ||
| 57 | * don't care if they're invalid: the worst that can happen is a General | 62 | * don't care if they're invalid: the worst that can happen is a General | 
| 58 | * Protection Fault in the Switcher when it restores a Guest segment register | 63 | * Protection Fault in the Switcher when it restores a Guest segment register | 
| 59 | * which tries to use that entry. Then we kill the Guest for causing such a | 64 | * which tries to use that entry. Then we kill the Guest for causing such a | 
| 60 | * mess: the message will be "unhandled trap 256". */ | 65 | * mess: the message will be "unhandled trap 256". | 
| 66 | */ | ||
| 61 | static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end) | 67 | static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end) | 
| 62 | { | 68 | { | 
| 63 | unsigned int i; | 69 | unsigned int i; | 
| 64 | 70 | ||
| 65 | for (i = start; i < end; i++) { | 71 | for (i = start; i < end; i++) { | 
| 66 | /* We never copy these ones to real GDT, so we don't care what | 72 | /* | 
| 67 | * they say */ | 73 | * We never copy these ones to real GDT, so we don't care what | 
| 74 | * they say | ||
| 75 | */ | ||
| 68 | if (ignored_gdt(i)) | 76 | if (ignored_gdt(i)) | 
| 69 | continue; | 77 | continue; | 
| 70 | 78 | ||
| 71 | /* Segment descriptors contain a privilege level: the Guest is | 79 | /* | 
| 80 | * Segment descriptors contain a privilege level: the Guest is | ||
| 72 | * sometimes careless and leaves this as 0, even though it's | 81 | * sometimes careless and leaves this as 0, even though it's | 
| 73 | * running at privilege level 1. If so, we fix it here. */ | 82 | * running at privilege level 1. If so, we fix it here. | 
| 83 | */ | ||
| 74 | if ((cpu->arch.gdt[i].b & 0x00006000) == 0) | 84 | if ((cpu->arch.gdt[i].b & 0x00006000) == 0) | 
| 75 | cpu->arch.gdt[i].b |= (GUEST_PL << 13); | 85 | cpu->arch.gdt[i].b |= (GUEST_PL << 13); | 
| 76 | 86 | ||
| 77 | /* Each descriptor has an "accessed" bit. If we don't set it | 87 | /* | 
| 88 | * Each descriptor has an "accessed" bit. If we don't set it | ||
| 78 | * now, the CPU will try to set it when the Guest first loads | 89 | * now, the CPU will try to set it when the Guest first loads | 
| 79 | * that entry into a segment register. But the GDT isn't | 90 | * that entry into a segment register. But the GDT isn't | 
| 80 | * writable by the Guest, so bad things can happen. */ | 91 | * writable by the Guest, so bad things can happen. | 
| 92 | */ | ||
| 81 | cpu->arch.gdt[i].b |= 0x00000100; | 93 | cpu->arch.gdt[i].b |= 0x00000100; | 
| 82 | } | 94 | } | 
| 83 | } | 95 | } | 
| 84 | 96 | ||
| 85 | /*H:610 Like the IDT, we never simply use the GDT the Guest gives us. We keep | 97 | /*H:610 | 
| 98 | * Like the IDT, we never simply use the GDT the Guest gives us. We keep | ||
| 86 | * a GDT for each CPU, and copy across the Guest's entries each time we want to | 99 | * a GDT for each CPU, and copy across the Guest's entries each time we want to | 
| 87 | * run the Guest on that CPU. | 100 | * run the Guest on that CPU. | 
| 88 | * | 101 | * | 
| 89 | * This routine is called at boot or modprobe time for each CPU to set up the | 102 | * This routine is called at boot or modprobe time for each CPU to set up the | 
| 90 | * constant GDT entries: the ones which are the same no matter what Guest we're | 103 | * constant GDT entries: the ones which are the same no matter what Guest we're | 
| 91 | * running. */ | 104 | * running. | 
| 105 | */ | ||
| 92 | void setup_default_gdt_entries(struct lguest_ro_state *state) | 106 | void setup_default_gdt_entries(struct lguest_ro_state *state) | 
| 93 | { | 107 | { | 
| 94 | struct desc_struct *gdt = state->guest_gdt; | 108 | struct desc_struct *gdt = state->guest_gdt; | 
| @@ -98,30 +112,37 @@ void setup_default_gdt_entries(struct lguest_ro_state *state) | |||
| 98 | gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; | 112 | gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; | 
| 99 | gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; | 113 | gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; | 
| 100 | 114 | ||
| 101 | /* The TSS segment refers to the TSS entry for this particular CPU. | 115 | /* | 
| 116 | * The TSS segment refers to the TSS entry for this particular CPU. | ||
| 102 | * Forgive the magic flags: the 0x8900 means the entry is Present, it's | 117 | * Forgive the magic flags: the 0x8900 means the entry is Present, it's | 
| 103 | * privilege level 0 Available 386 TSS system segment, and the 0x67 | 118 | * privilege level 0 Available 386 TSS system segment, and the 0x67 | 
| 104 | * means Saturn is eclipsed by Mercury in the twelfth house. */ | 119 | * means Saturn is eclipsed by Mercury in the twelfth house. | 
| 120 | */ | ||
| 105 | gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); | 121 | gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); | 
| 106 | gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) | 122 | gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) | 
| 107 | | ((tss >> 16) & 0x000000FF); | 123 | | ((tss >> 16) & 0x000000FF); | 
| 108 | } | 124 | } | 
| 109 | 125 | ||
| 110 | /* This routine sets up the initial Guest GDT for booting. All entries start | 126 | /* | 
| 111 | * as 0 (unusable). */ | 127 | * This routine sets up the initial Guest GDT for booting. All entries start | 
| 128 | * as 0 (unusable). | ||
| 129 | */ | ||
| 112 | void setup_guest_gdt(struct lg_cpu *cpu) | 130 | void setup_guest_gdt(struct lg_cpu *cpu) | 
| 113 | { | 131 | { | 
| 114 | /* Start with full 0-4G segments... */ | 132 | /* | 
| 133 | * Start with full 0-4G segments...except the Guest is allowed to use | ||
| 134 | * them, so set the privilege level appropriately in the flags. | ||
| 135 | */ | ||
| 115 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; | 136 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; | 
| 116 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; | 137 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; | 
| 117 | /* ...except the Guest is allowed to use them, so set the privilege | ||
| 118 | * level appropriately in the flags. */ | ||
| 119 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); | 138 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); | 
| 120 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); | 139 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); | 
| 121 | } | 140 | } | 
| 122 | 141 | ||
| 123 | /*H:650 An optimization of copy_gdt(), for just the three "thead-local storage" | 142 | /*H:650 | 
| 124 | * entries. */ | 143 | * An optimization of copy_gdt(), for just the three "thead-local storage" | 
| 144 | * entries. | ||
| 145 | */ | ||
| 125 | void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) | 146 | void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) | 
| 126 | { | 147 | { | 
| 127 | unsigned int i; | 148 | unsigned int i; | 
| @@ -130,26 +151,34 @@ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) | |||
| 130 | gdt[i] = cpu->arch.gdt[i]; | 151 | gdt[i] = cpu->arch.gdt[i]; | 
| 131 | } | 152 | } | 
| 132 | 153 | ||
| 133 | /*H:640 When the Guest is run on a different CPU, or the GDT entries have | 154 | /*H:640 | 
| 134 | * changed, copy_gdt() is called to copy the Guest's GDT entries across to this | 155 | * When the Guest is run on a different CPU, or the GDT entries have changed, | 
| 135 | * CPU's GDT. */ | 156 | * copy_gdt() is called to copy the Guest's GDT entries across to this CPU's | 
| 157 | * GDT. | ||
| 158 | */ | ||
| 136 | void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt) | 159 | void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt) | 
| 137 | { | 160 | { | 
| 138 | unsigned int i; | 161 | unsigned int i; | 
| 139 | 162 | ||
| 140 | /* The default entries from setup_default_gdt_entries() are not | 163 | /* | 
| 141 | * replaced. See ignored_gdt() above. */ | 164 | * The default entries from setup_default_gdt_entries() are not | 
| 165 | * replaced. See ignored_gdt() above. | ||
| 166 | */ | ||
| 142 | for (i = 0; i < GDT_ENTRIES; i++) | 167 | for (i = 0; i < GDT_ENTRIES; i++) | 
| 143 | if (!ignored_gdt(i)) | 168 | if (!ignored_gdt(i)) | 
| 144 | gdt[i] = cpu->arch.gdt[i]; | 169 | gdt[i] = cpu->arch.gdt[i]; | 
| 145 | } | 170 | } | 
| 146 | 171 | ||
| 147 | /*H:620 This is where the Guest asks us to load a new GDT entry | 172 | /*H:620 | 
| 148 | * (LHCALL_LOAD_GDT_ENTRY). We tweak the entry and copy it in. */ | 173 | * This is where the Guest asks us to load a new GDT entry | 
| 174 | * (LHCALL_LOAD_GDT_ENTRY). We tweak the entry and copy it in. | ||
| 175 | */ | ||
| 149 | void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) | 176 | void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) | 
| 150 | { | 177 | { | 
| 151 | /* We assume the Guest has the same number of GDT entries as the | 178 | /* | 
| 152 | * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ | 179 | * We assume the Guest has the same number of GDT entries as the | 
| 180 | * Host, otherwise we'd have to dynamically allocate the Guest GDT. | ||
| 181 | */ | ||
| 153 | if (num >= ARRAY_SIZE(cpu->arch.gdt)) | 182 | if (num >= ARRAY_SIZE(cpu->arch.gdt)) | 
| 154 | kill_guest(cpu, "too many gdt entries %i", num); | 183 | kill_guest(cpu, "too many gdt entries %i", num); | 
| 155 | 184 | ||
| @@ -157,15 +186,19 @@ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) | |||
| 157 | cpu->arch.gdt[num].a = lo; | 186 | cpu->arch.gdt[num].a = lo; | 
| 158 | cpu->arch.gdt[num].b = hi; | 187 | cpu->arch.gdt[num].b = hi; | 
| 159 | fixup_gdt_table(cpu, num, num+1); | 188 | fixup_gdt_table(cpu, num, num+1); | 
| 160 | /* Mark that the GDT changed so the core knows it has to copy it again, | 189 | /* | 
| 161 | * even if the Guest is run on the same CPU. */ | 190 | * Mark that the GDT changed so the core knows it has to copy it again, | 
| 191 | * even if the Guest is run on the same CPU. | ||
| 192 | */ | ||
| 162 | cpu->changed |= CHANGED_GDT; | 193 | cpu->changed |= CHANGED_GDT; | 
| 163 | } | 194 | } | 
| 164 | 195 | ||
| 165 | /* This is the fast-track version for just changing the three TLS entries. | 196 | /* | 
| 197 | * This is the fast-track version for just changing the three TLS entries. | ||
| 166 | * Remember that this happens on every context switch, so it's worth | 198 | * Remember that this happens on every context switch, so it's worth | 
| 167 | * optimizing. But wouldn't it be neater to have a single hypercall to cover | 199 | * optimizing. But wouldn't it be neater to have a single hypercall to cover | 
| 168 | * both cases? */ | 200 | * both cases? | 
| 201 | */ | ||
| 169 | void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) | 202 | void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) | 
| 170 | { | 203 | { | 
| 171 | struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN]; | 204 | struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN]; | 
| @@ -175,7 +208,6 @@ void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) | |||
| 175 | /* Note that just the TLS entries have changed. */ | 208 | /* Note that just the TLS entries have changed. */ | 
| 176 | cpu->changed |= CHANGED_GDT_TLS; | 209 | cpu->changed |= CHANGED_GDT_TLS; | 
| 177 | } | 210 | } | 
| 178 | /*:*/ | ||
| 179 | 211 | ||
| 180 | /*H:660 | 212 | /*H:660 | 
| 181 | * With this, we have finished the Host. | 213 | * With this, we have finished the Host. | 
