diff options
Diffstat (limited to 'drivers/lguest/interrupts_and_traps.c')
-rw-r--r-- | drivers/lguest/interrupts_and_traps.c | 125 |
1 files changed, 83 insertions, 42 deletions
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 39731232d82..82966982cb3 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c | |||
@@ -12,8 +12,14 @@ | |||
12 | * them first, so we also have a way of "reflecting" them into the Guest as if | 12 | * them first, so we also have a way of "reflecting" them into the Guest as if |
13 | * they had been delivered to it directly. :*/ | 13 | * they had been delivered to it directly. :*/ |
14 | #include <linux/uaccess.h> | 14 | #include <linux/uaccess.h> |
15 | #include <linux/interrupt.h> | ||
16 | #include <linux/module.h> | ||
15 | #include "lg.h" | 17 | #include "lg.h" |
16 | 18 | ||
19 | /* Allow Guests to use a non-128 (ie. non-Linux) syscall trap. */ | ||
20 | static unsigned int syscall_vector = SYSCALL_VECTOR; | ||
21 | module_param(syscall_vector, uint, 0444); | ||
22 | |||
17 | /* The address of the interrupt handler is split into two bits: */ | 23 | /* The address of the interrupt handler is split into two bits: */ |
18 | static unsigned long idt_address(u32 lo, u32 hi) | 24 | static unsigned long idt_address(u32 lo, u32 hi) |
19 | { | 25 | { |
@@ -39,7 +45,7 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) | |||
39 | { | 45 | { |
40 | /* Stack grows upwards: move stack then write value. */ | 46 | /* Stack grows upwards: move stack then write value. */ |
41 | *gstack -= 4; | 47 | *gstack -= 4; |
42 | lgwrite_u32(lg, *gstack, val); | 48 | lgwrite(lg, *gstack, u32, val); |
43 | } | 49 | } |
44 | 50 | ||
45 | /*H:210 The set_guest_interrupt() routine actually delivers the interrupt or | 51 | /*H:210 The set_guest_interrupt() routine actually delivers the interrupt or |
@@ -56,8 +62,9 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) | |||
56 | * it). */ | 62 | * it). */ |
57 | static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) | 63 | static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) |
58 | { | 64 | { |
59 | unsigned long gstack; | 65 | unsigned long gstack, origstack; |
60 | u32 eflags, ss, irq_enable; | 66 | u32 eflags, ss, irq_enable; |
67 | unsigned long virtstack; | ||
61 | 68 | ||
62 | /* There are two cases for interrupts: one where the Guest is already | 69 | /* There are two cases for interrupts: one where the Guest is already |
63 | * in the kernel, and a more complex one where the Guest is in | 70 | * in the kernel, and a more complex one where the Guest is in |
@@ -65,8 +72,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) | |||
65 | if ((lg->regs->ss&0x3) != GUEST_PL) { | 72 | if ((lg->regs->ss&0x3) != GUEST_PL) { |
66 | /* The Guest told us their kernel stack with the SET_STACK | 73 | /* The Guest told us their kernel stack with the SET_STACK |
67 | * hypercall: both the virtual address and the segment */ | 74 | * hypercall: both the virtual address and the segment */ |
68 | gstack = guest_pa(lg, lg->esp1); | 75 | virtstack = lg->esp1; |
69 | ss = lg->ss1; | 76 | ss = lg->ss1; |
77 | |||
78 | origstack = gstack = guest_pa(lg, virtstack); | ||
70 | /* We push the old stack segment and pointer onto the new | 79 | /* We push the old stack segment and pointer onto the new |
71 | * stack: when the Guest does an "iret" back from the interrupt | 80 | * stack: when the Guest does an "iret" back from the interrupt |
72 | * handler the CPU will notice they're dropping privilege | 81 | * handler the CPU will notice they're dropping privilege |
@@ -75,8 +84,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) | |||
75 | push_guest_stack(lg, &gstack, lg->regs->esp); | 84 | push_guest_stack(lg, &gstack, lg->regs->esp); |
76 | } else { | 85 | } else { |
77 | /* We're staying on the same Guest (kernel) stack. */ | 86 | /* We're staying on the same Guest (kernel) stack. */ |
78 | gstack = guest_pa(lg, lg->regs->esp); | 87 | virtstack = lg->regs->esp; |
79 | ss = lg->regs->ss; | 88 | ss = lg->regs->ss; |
89 | |||
90 | origstack = gstack = guest_pa(lg, virtstack); | ||
80 | } | 91 | } |
81 | 92 | ||
82 | /* Remember that we never let the Guest actually disable interrupts, so | 93 | /* Remember that we never let the Guest actually disable interrupts, so |
@@ -102,7 +113,7 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) | |||
102 | /* Now we've pushed all the old state, we change the stack, the code | 113 | /* Now we've pushed all the old state, we change the stack, the code |
103 | * segment and the address to execute. */ | 114 | * segment and the address to execute. */ |
104 | lg->regs->ss = ss; | 115 | lg->regs->ss = ss; |
105 | lg->regs->esp = gstack + lg->page_offset; | 116 | lg->regs->esp = virtstack + (gstack - origstack); |
106 | lg->regs->cs = (__KERNEL_CS|GUEST_PL); | 117 | lg->regs->cs = (__KERNEL_CS|GUEST_PL); |
107 | lg->regs->eip = idt_address(lo, hi); | 118 | lg->regs->eip = idt_address(lo, hi); |
108 | 119 | ||
@@ -165,7 +176,7 @@ void maybe_do_interrupt(struct lguest *lg) | |||
165 | /* Look at the IDT entry the Guest gave us for this interrupt. The | 176 | /* Look at the IDT entry the Guest gave us for this interrupt. The |
166 | * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip | 177 | * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip |
167 | * over them. */ | 178 | * over them. */ |
168 | idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq]; | 179 | idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; |
169 | /* If they don't have a handler (yet?), we just ignore it */ | 180 | /* If they don't have a handler (yet?), we just ignore it */ |
170 | if (idt_present(idt->a, idt->b)) { | 181 | if (idt_present(idt->a, idt->b)) { |
171 | /* OK, mark it no longer pending and deliver it. */ | 182 | /* OK, mark it no longer pending and deliver it. */ |
@@ -183,6 +194,47 @@ void maybe_do_interrupt(struct lguest *lg) | |||
183 | * timer interrupt. */ | 194 | * timer interrupt. */ |
184 | write_timestamp(lg); | 195 | write_timestamp(lg); |
185 | } | 196 | } |
197 | /*:*/ | ||
198 | |||
199 | /* Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent | ||
200 | * me a patch, so we support that too. It'd be a big step for lguest if half | ||
201 | * the Plan 9 user base were to start using it. | ||
202 | * | ||
203 | * Actually now I think of it, it's possible that Ron *is* half the Plan 9 | ||
204 | * userbase. Oh well. */ | ||
205 | static bool could_be_syscall(unsigned int num) | ||
206 | { | ||
207 | /* Normal Linux SYSCALL_VECTOR or reserved vector? */ | ||
208 | return num == SYSCALL_VECTOR || num == syscall_vector; | ||
209 | } | ||
210 | |||
211 | /* The syscall vector it wants must be unused by Host. */ | ||
212 | bool check_syscall_vector(struct lguest *lg) | ||
213 | { | ||
214 | u32 vector; | ||
215 | |||
216 | if (get_user(vector, &lg->lguest_data->syscall_vec)) | ||
217 | return false; | ||
218 | |||
219 | return could_be_syscall(vector); | ||
220 | } | ||
221 | |||
222 | int init_interrupts(void) | ||
223 | { | ||
224 | /* If they want some strange system call vector, reserve it now */ | ||
225 | if (syscall_vector != SYSCALL_VECTOR | ||
226 | && test_and_set_bit(syscall_vector, used_vectors)) { | ||
227 | printk("lg: couldn't reserve syscall %u\n", syscall_vector); | ||
228 | return -EBUSY; | ||
229 | } | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | void free_interrupts(void) | ||
234 | { | ||
235 | if (syscall_vector != SYSCALL_VECTOR) | ||
236 | clear_bit(syscall_vector, used_vectors); | ||
237 | } | ||
186 | 238 | ||
187 | /*H:220 Now we've got the routines to deliver interrupts, delivering traps | 239 | /*H:220 Now we've got the routines to deliver interrupts, delivering traps |
188 | * like page fault is easy. The only trick is that Intel decided that some | 240 | * like page fault is easy. The only trick is that Intel decided that some |
@@ -197,14 +249,14 @@ int deliver_trap(struct lguest *lg, unsigned int num) | |||
197 | { | 249 | { |
198 | /* Trap numbers are always 8 bit, but we set an impossible trap number | 250 | /* Trap numbers are always 8 bit, but we set an impossible trap number |
199 | * for traps inside the Switcher, so check that here. */ | 251 | * for traps inside the Switcher, so check that here. */ |
200 | if (num >= ARRAY_SIZE(lg->idt)) | 252 | if (num >= ARRAY_SIZE(lg->arch.idt)) |
201 | return 0; | 253 | return 0; |
202 | 254 | ||
203 | /* Early on the Guest hasn't set the IDT entries (or maybe it put a | 255 | /* Early on the Guest hasn't set the IDT entries (or maybe it put a |
204 | * bogus one in): if we fail here, the Guest will be killed. */ | 256 | * bogus one in): if we fail here, the Guest will be killed. */ |
205 | if (!idt_present(lg->idt[num].a, lg->idt[num].b)) | 257 | if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b)) |
206 | return 0; | 258 | return 0; |
207 | set_guest_interrupt(lg, lg->idt[num].a, lg->idt[num].b, has_err(num)); | 259 | set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b, has_err(num)); |
208 | return 1; | 260 | return 1; |
209 | } | 261 | } |
210 | 262 | ||
@@ -218,28 +270,20 @@ int deliver_trap(struct lguest *lg, unsigned int num) | |||
218 | * system calls down from 1750ns to 270ns. Plus, if lguest didn't do it, all | 270 | * system calls down from 1750ns to 270ns. Plus, if lguest didn't do it, all |
219 | * the other hypervisors would tease it. | 271 | * the other hypervisors would tease it. |
220 | * | 272 | * |
221 | * This routine determines if a trap can be delivered directly. */ | 273 | * This routine indicates if a particular trap number could be delivered |
222 | static int direct_trap(const struct lguest *lg, | 274 | * directly. */ |
223 | const struct desc_struct *trap, | 275 | static int direct_trap(unsigned int num) |
224 | unsigned int num) | ||
225 | { | 276 | { |
226 | /* Hardware interrupts don't go to the Guest at all (except system | 277 | /* Hardware interrupts don't go to the Guest at all (except system |
227 | * call). */ | 278 | * call). */ |
228 | if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR) | 279 | if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num)) |
229 | return 0; | 280 | return 0; |
230 | 281 | ||
231 | /* The Host needs to see page faults (for shadow paging and to save the | 282 | /* The Host needs to see page faults (for shadow paging and to save the |
232 | * fault address), general protection faults (in/out emulation) and | 283 | * fault address), general protection faults (in/out emulation) and |
233 | * device not available (TS handling), and of course, the hypercall | 284 | * device not available (TS handling), and of course, the hypercall |
234 | * trap. */ | 285 | * trap. */ |
235 | if (num == 14 || num == 13 || num == 7 || num == LGUEST_TRAP_ENTRY) | 286 | return num != 14 && num != 13 && num != 7 && num != LGUEST_TRAP_ENTRY; |
236 | return 0; | ||
237 | |||
238 | /* Only trap gates (type 15) can go direct to the Guest. Interrupt | ||
239 | * gates (type 14) disable interrupts as they are entered, which we | ||
240 | * never let the Guest do. Not present entries (type 0x0) also can't | ||
241 | * go direct, of course 8) */ | ||
242 | return idt_type(trap->a, trap->b) == 0xF; | ||
243 | } | 287 | } |
244 | /*:*/ | 288 | /*:*/ |
245 | 289 | ||
@@ -348,15 +392,11 @@ void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) | |||
348 | * to copy this again. */ | 392 | * to copy this again. */ |
349 | lg->changed |= CHANGED_IDT; | 393 | lg->changed |= CHANGED_IDT; |
350 | 394 | ||
351 | /* The IDT which we keep in "struct lguest" only contains 32 entries | 395 | /* Check that the Guest doesn't try to step outside the bounds. */ |
352 | * for the traps and LGUEST_IRQS (32) entries for interrupts. We | 396 | if (num >= ARRAY_SIZE(lg->arch.idt)) |
353 | * ignore attempts to set handlers for higher interrupt numbers, except | 397 | kill_guest(lg, "Setting idt entry %u", num); |
354 | * for the system call "interrupt" at 128: we have a special IDT entry | 398 | else |
355 | * for that. */ | 399 | set_trap(lg, &lg->arch.idt[num], num, lo, hi); |
356 | if (num < ARRAY_SIZE(lg->idt)) | ||
357 | set_trap(lg, &lg->idt[num], num, lo, hi); | ||
358 | else if (num == SYSCALL_VECTOR) | ||
359 | set_trap(lg, &lg->syscall_idt, num, lo, hi); | ||
360 | } | 400 | } |
361 | 401 | ||
362 | /* The default entry for each interrupt points into the Switcher routines which | 402 | /* The default entry for each interrupt points into the Switcher routines which |
@@ -399,20 +439,21 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt, | |||
399 | 439 | ||
400 | /* We can simply copy the direct traps, otherwise we use the default | 440 | /* We can simply copy the direct traps, otherwise we use the default |
401 | * ones in the Switcher: they will return to the Host. */ | 441 | * ones in the Switcher: they will return to the Host. */ |
402 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) { | 442 | for (i = 0; i < ARRAY_SIZE(lg->arch.idt); i++) { |
403 | if (direct_trap(lg, &lg->idt[i], i)) | 443 | /* If no Guest can ever override this trap, leave it alone. */ |
404 | idt[i] = lg->idt[i]; | 444 | if (!direct_trap(i)) |
445 | continue; | ||
446 | |||
447 | /* Only trap gates (type 15) can go direct to the Guest. | ||
448 | * Interrupt gates (type 14) disable interrupts as they are | ||
449 | * entered, which we never let the Guest do. Not present | ||
450 | * entries (type 0x0) also can't go direct, of course. */ | ||
451 | if (idt_type(lg->arch.idt[i].a, lg->arch.idt[i].b) == 0xF) | ||
452 | idt[i] = lg->arch.idt[i]; | ||
405 | else | 453 | else |
454 | /* Reset it to the default. */ | ||
406 | default_idt_entry(&idt[i], i, def[i]); | 455 | default_idt_entry(&idt[i], i, def[i]); |
407 | } | 456 | } |
408 | |||
409 | /* Don't forget the system call trap! The IDT entries for other | ||
410 | * interupts never change, so no need to copy them. */ | ||
411 | i = SYSCALL_VECTOR; | ||
412 | if (direct_trap(lg, &lg->syscall_idt, i)) | ||
413 | idt[i] = lg->syscall_idt; | ||
414 | else | ||
415 | default_idt_entry(&idt[i], i, def[i]); | ||
416 | } | 457 | } |
417 | 458 | ||
418 | void guest_set_clockevent(struct lguest *lg, unsigned long delta) | 459 | void guest_set_clockevent(struct lguest *lg, unsigned long delta) |