aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lguest/interrupts_and_traps.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/lguest/interrupts_and_traps.c')
-rw-r--r--drivers/lguest/interrupts_and_traps.c288
1 files changed, 191 insertions, 97 deletions
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 0e9067b0d507..18648180db02 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -1,4 +1,5 @@
1/*P:800 Interrupts (traps) are complicated enough to earn their own file. 1/*P:800
2 * Interrupts (traps) are complicated enough to earn their own file.
2 * There are three classes of interrupts: 3 * There are three classes of interrupts:
3 * 4 *
4 * 1) Real hardware interrupts which occur while we're running the Guest, 5 * 1) Real hardware interrupts which occur while we're running the Guest,
@@ -10,7 +11,8 @@
10 * just like real hardware would deliver them. Traps from the Guest can be set 11 * just like real hardware would deliver them. Traps from the Guest can be set
11 * up to go directly back into the Guest, but sometimes the Host wants to see 12 * up to go directly back into the Guest, but sometimes the Host wants to see
12 * them first, so we also have a way of "reflecting" them into the Guest as if 13 * them first, so we also have a way of "reflecting" them into the Guest as if
13 * they had been delivered to it directly. :*/ 14 * they had been delivered to it directly.
15:*/
14#include <linux/uaccess.h> 16#include <linux/uaccess.h>
15#include <linux/interrupt.h> 17#include <linux/interrupt.h>
16#include <linux/module.h> 18#include <linux/module.h>
@@ -26,8 +28,10 @@ static unsigned long idt_address(u32 lo, u32 hi)
26 return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); 28 return (lo & 0x0000FFFF) | (hi & 0xFFFF0000);
27} 29}
28 30
29/* The "type" of the interrupt handler is a 4 bit field: we only support a 31/*
30 * couple of types. */ 32 * The "type" of the interrupt handler is a 4 bit field: we only support a
33 * couple of types.
34 */
31static int idt_type(u32 lo, u32 hi) 35static int idt_type(u32 lo, u32 hi)
32{ 36{
33 return (hi >> 8) & 0xF; 37 return (hi >> 8) & 0xF;
@@ -39,8 +43,10 @@ static bool idt_present(u32 lo, u32 hi)
39 return (hi & 0x8000); 43 return (hi & 0x8000);
40} 44}
41 45
42/* We need a helper to "push" a value onto the Guest's stack, since that's a 46/*
43 * big part of what delivering an interrupt does. */ 47 * We need a helper to "push" a value onto the Guest's stack, since that's a
48 * big part of what delivering an interrupt does.
49 */
44static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) 50static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
45{ 51{
46 /* Stack grows upwards: move stack then write value. */ 52 /* Stack grows upwards: move stack then write value. */
@@ -48,7 +54,8 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
48 lgwrite(cpu, *gstack, u32, val); 54 lgwrite(cpu, *gstack, u32, val);
49} 55}
50 56
51/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or 57/*H:210
58 * The set_guest_interrupt() routine actually delivers the interrupt or
52 * trap. The mechanics of delivering traps and interrupts to the Guest are the 59 * trap. The mechanics of delivering traps and interrupts to the Guest are the
53 * same, except some traps have an "error code" which gets pushed onto the 60 * same, except some traps have an "error code" which gets pushed onto the
54 * stack as well: the caller tells us if this is one. 61 * stack as well: the caller tells us if this is one.
@@ -59,7 +66,8 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
59 * 66 *
60 * We set up the stack just like the CPU does for a real interrupt, so it's 67 * We set up the stack just like the CPU does for a real interrupt, so it's
61 * identical for the Guest (and the standard "iret" instruction will undo 68 * identical for the Guest (and the standard "iret" instruction will undo
62 * it). */ 69 * it).
70 */
63static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, 71static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
64 bool has_err) 72 bool has_err)
65{ 73{
@@ -67,20 +75,26 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
67 u32 eflags, ss, irq_enable; 75 u32 eflags, ss, irq_enable;
68 unsigned long virtstack; 76 unsigned long virtstack;
69 77
70 /* There are two cases for interrupts: one where the Guest is already 78 /*
79 * There are two cases for interrupts: one where the Guest is already
71 * in the kernel, and a more complex one where the Guest is in 80 * in the kernel, and a more complex one where the Guest is in
72 * userspace. We check the privilege level to find out. */ 81 * userspace. We check the privilege level to find out.
82 */
73 if ((cpu->regs->ss&0x3) != GUEST_PL) { 83 if ((cpu->regs->ss&0x3) != GUEST_PL) {
74 /* The Guest told us their kernel stack with the SET_STACK 84 /*
75 * hypercall: both the virtual address and the segment */ 85 * The Guest told us their kernel stack with the SET_STACK
86 * hypercall: both the virtual address and the segment.
87 */
76 virtstack = cpu->esp1; 88 virtstack = cpu->esp1;
77 ss = cpu->ss1; 89 ss = cpu->ss1;
78 90
79 origstack = gstack = guest_pa(cpu, virtstack); 91 origstack = gstack = guest_pa(cpu, virtstack);
80 /* We push the old stack segment and pointer onto the new 92 /*
93 * We push the old stack segment and pointer onto the new
81 * stack: when the Guest does an "iret" back from the interrupt 94 * stack: when the Guest does an "iret" back from the interrupt
82 * handler the CPU will notice they're dropping privilege 95 * handler the CPU will notice they're dropping privilege
83 * levels and expect these here. */ 96 * levels and expect these here.
97 */
84 push_guest_stack(cpu, &gstack, cpu->regs->ss); 98 push_guest_stack(cpu, &gstack, cpu->regs->ss);
85 push_guest_stack(cpu, &gstack, cpu->regs->esp); 99 push_guest_stack(cpu, &gstack, cpu->regs->esp);
86 } else { 100 } else {
@@ -91,18 +105,22 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
91 origstack = gstack = guest_pa(cpu, virtstack); 105 origstack = gstack = guest_pa(cpu, virtstack);
92 } 106 }
93 107
94 /* Remember that we never let the Guest actually disable interrupts, so 108 /*
109 * Remember that we never let the Guest actually disable interrupts, so
95 * the "Interrupt Flag" bit is always set. We copy that bit from the 110 * the "Interrupt Flag" bit is always set. We copy that bit from the
96 * Guest's "irq_enabled" field into the eflags word: we saw the Guest 111 * Guest's "irq_enabled" field into the eflags word: we saw the Guest
97 * copy it back in "lguest_iret". */ 112 * copy it back in "lguest_iret".
113 */
98 eflags = cpu->regs->eflags; 114 eflags = cpu->regs->eflags;
99 if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0 115 if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0
100 && !(irq_enable & X86_EFLAGS_IF)) 116 && !(irq_enable & X86_EFLAGS_IF))
101 eflags &= ~X86_EFLAGS_IF; 117 eflags &= ~X86_EFLAGS_IF;
102 118
103 /* An interrupt is expected to push three things on the stack: the old 119 /*
120 * An interrupt is expected to push three things on the stack: the old
104 * "eflags" word, the old code segment, and the old instruction 121 * "eflags" word, the old code segment, and the old instruction
105 * pointer. */ 122 * pointer.
123 */
106 push_guest_stack(cpu, &gstack, eflags); 124 push_guest_stack(cpu, &gstack, eflags);
107 push_guest_stack(cpu, &gstack, cpu->regs->cs); 125 push_guest_stack(cpu, &gstack, cpu->regs->cs);
108 push_guest_stack(cpu, &gstack, cpu->regs->eip); 126 push_guest_stack(cpu, &gstack, cpu->regs->eip);
@@ -111,15 +129,19 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
111 if (has_err) 129 if (has_err)
112 push_guest_stack(cpu, &gstack, cpu->regs->errcode); 130 push_guest_stack(cpu, &gstack, cpu->regs->errcode);
113 131
114 /* Now we've pushed all the old state, we change the stack, the code 132 /*
115 * segment and the address to execute. */ 133 * Now we've pushed all the old state, we change the stack, the code
134 * segment and the address to execute.
135 */
116 cpu->regs->ss = ss; 136 cpu->regs->ss = ss;
117 cpu->regs->esp = virtstack + (gstack - origstack); 137 cpu->regs->esp = virtstack + (gstack - origstack);
118 cpu->regs->cs = (__KERNEL_CS|GUEST_PL); 138 cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
119 cpu->regs->eip = idt_address(lo, hi); 139 cpu->regs->eip = idt_address(lo, hi);
120 140
121 /* There are two kinds of interrupt handlers: 0xE is an "interrupt 141 /*
122 * gate" which expects interrupts to be disabled on entry. */ 142 * There are two kinds of interrupt handlers: 0xE is an "interrupt
143 * gate" which expects interrupts to be disabled on entry.
144 */
123 if (idt_type(lo, hi) == 0xE) 145 if (idt_type(lo, hi) == 0xE)
124 if (put_user(0, &cpu->lg->lguest_data->irq_enabled)) 146 if (put_user(0, &cpu->lg->lguest_data->irq_enabled))
125 kill_guest(cpu, "Disabling interrupts"); 147 kill_guest(cpu, "Disabling interrupts");
@@ -130,7 +152,8 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
130 * 152 *
131 * interrupt_pending() returns the first pending interrupt which isn't blocked 153 * interrupt_pending() returns the first pending interrupt which isn't blocked
132 * by the Guest. It is called before every entry to the Guest, and just before 154 * by the Guest. It is called before every entry to the Guest, and just before
133 * we go to sleep when the Guest has halted itself. */ 155 * we go to sleep when the Guest has halted itself.
156 */
134unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) 157unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
135{ 158{
136 unsigned int irq; 159 unsigned int irq;
@@ -140,8 +163,10 @@ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
140 if (!cpu->lg->lguest_data) 163 if (!cpu->lg->lguest_data)
141 return LGUEST_IRQS; 164 return LGUEST_IRQS;
142 165
143 /* Take our "irqs_pending" array and remove any interrupts the Guest 166 /*
144 * wants blocked: the result ends up in "blk". */ 167 * Take our "irqs_pending" array and remove any interrupts the Guest
168 * wants blocked: the result ends up in "blk".
169 */
145 if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, 170 if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
146 sizeof(blk))) 171 sizeof(blk)))
147 return LGUEST_IRQS; 172 return LGUEST_IRQS;
@@ -154,16 +179,20 @@ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
154 return irq; 179 return irq;
155} 180}
156 181
157/* This actually diverts the Guest to running an interrupt handler, once an 182/*
158 * interrupt has been identified by interrupt_pending(). */ 183 * This actually diverts the Guest to running an interrupt handler, once an
184 * interrupt has been identified by interrupt_pending().
185 */
159void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) 186void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
160{ 187{
161 struct desc_struct *idt; 188 struct desc_struct *idt;
162 189
163 BUG_ON(irq >= LGUEST_IRQS); 190 BUG_ON(irq >= LGUEST_IRQS);
164 191
165 /* They may be in the middle of an iret, where they asked us never to 192 /*
166 * deliver interrupts. */ 193 * They may be in the middle of an iret, where they asked us never to
194 * deliver interrupts.
195 */
167 if (cpu->regs->eip >= cpu->lg->noirq_start && 196 if (cpu->regs->eip >= cpu->lg->noirq_start &&
168 (cpu->regs->eip < cpu->lg->noirq_end)) 197 (cpu->regs->eip < cpu->lg->noirq_end))
169 return; 198 return;
@@ -187,29 +216,37 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
187 } 216 }
188 } 217 }
189 218
190 /* Look at the IDT entry the Guest gave us for this interrupt. The 219 /*
220 * Look at the IDT entry the Guest gave us for this interrupt. The
191 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip 221 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
192 * over them. */ 222 * over them.
223 */
193 idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; 224 idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
194 /* If they don't have a handler (yet?), we just ignore it */ 225 /* If they don't have a handler (yet?), we just ignore it */
195 if (idt_present(idt->a, idt->b)) { 226 if (idt_present(idt->a, idt->b)) {
196 /* OK, mark it no longer pending and deliver it. */ 227 /* OK, mark it no longer pending and deliver it. */
197 clear_bit(irq, cpu->irqs_pending); 228 clear_bit(irq, cpu->irqs_pending);
198 /* set_guest_interrupt() takes the interrupt descriptor and a 229 /*
230 * set_guest_interrupt() takes the interrupt descriptor and a
199 * flag to say whether this interrupt pushes an error code onto 231 * flag to say whether this interrupt pushes an error code onto
200 * the stack as well: virtual interrupts never do. */ 232 * the stack as well: virtual interrupts never do.
233 */
201 set_guest_interrupt(cpu, idt->a, idt->b, false); 234 set_guest_interrupt(cpu, idt->a, idt->b, false);
202 } 235 }
203 236
204 /* Every time we deliver an interrupt, we update the timestamp in the 237 /*
238 * Every time we deliver an interrupt, we update the timestamp in the
205 * Guest's lguest_data struct. It would be better for the Guest if we 239 * Guest's lguest_data struct. It would be better for the Guest if we
206 * did this more often, but it can actually be quite slow: doing it 240 * did this more often, but it can actually be quite slow: doing it
207 * here is a compromise which means at least it gets updated every 241 * here is a compromise which means at least it gets updated every
208 * timer interrupt. */ 242 * timer interrupt.
243 */
209 write_timestamp(cpu); 244 write_timestamp(cpu);
210 245
211 /* If there are no other interrupts we want to deliver, clear 246 /*
212 * the pending flag. */ 247 * If there are no other interrupts we want to deliver, clear
248 * the pending flag.
249 */
213 if (!more) 250 if (!more)
214 put_user(0, &cpu->lg->lguest_data->irq_pending); 251 put_user(0, &cpu->lg->lguest_data->irq_pending);
215} 252}
@@ -217,24 +254,29 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
217/* And this is the routine when we want to set an interrupt for the Guest. */ 254/* And this is the routine when we want to set an interrupt for the Guest. */
218void set_interrupt(struct lg_cpu *cpu, unsigned int irq) 255void set_interrupt(struct lg_cpu *cpu, unsigned int irq)
219{ 256{
220 /* Next time the Guest runs, the core code will see if it can deliver 257 /*
221 * this interrupt. */ 258 * Next time the Guest runs, the core code will see if it can deliver
259 * this interrupt.
260 */
222 set_bit(irq, cpu->irqs_pending); 261 set_bit(irq, cpu->irqs_pending);
223 262
224 /* Make sure it sees it; it might be asleep (eg. halted), or 263 /*
225 * running the Guest right now, in which case kick_process() 264 * Make sure it sees it; it might be asleep (eg. halted), or running
226 * will knock it out. */ 265 * the Guest right now, in which case kick_process() will knock it out.
266 */
227 if (!wake_up_process(cpu->tsk)) 267 if (!wake_up_process(cpu->tsk))
228 kick_process(cpu->tsk); 268 kick_process(cpu->tsk);
229} 269}
230/*:*/ 270/*:*/
231 271
232/* Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent 272/*
273 * Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent
233 * me a patch, so we support that too. It'd be a big step for lguest if half 274 * me a patch, so we support that too. It'd be a big step for lguest if half
234 * the Plan 9 user base were to start using it. 275 * the Plan 9 user base were to start using it.
235 * 276 *
236 * Actually now I think of it, it's possible that Ron *is* half the Plan 9 277 * Actually now I think of it, it's possible that Ron *is* half the Plan 9
237 * userbase. Oh well. */ 278 * userbase. Oh well.
279 */
238static bool could_be_syscall(unsigned int num) 280static bool could_be_syscall(unsigned int num)
239{ 281{
240 /* Normal Linux SYSCALL_VECTOR or reserved vector? */ 282 /* Normal Linux SYSCALL_VECTOR or reserved vector? */
@@ -274,9 +316,11 @@ void free_interrupts(void)
274 clear_bit(syscall_vector, used_vectors); 316 clear_bit(syscall_vector, used_vectors);
275} 317}
276 318
277/*H:220 Now we've got the routines to deliver interrupts, delivering traps like 319/*H:220
320 * Now we've got the routines to deliver interrupts, delivering traps like
278 * page fault is easy. The only trick is that Intel decided that some traps 321 * page fault is easy. The only trick is that Intel decided that some traps
279 * should have error codes: */ 322 * should have error codes:
323 */
280static bool has_err(unsigned int trap) 324static bool has_err(unsigned int trap)
281{ 325{
282 return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); 326 return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17);
@@ -285,13 +329,17 @@ static bool has_err(unsigned int trap)
285/* deliver_trap() returns true if it could deliver the trap. */ 329/* deliver_trap() returns true if it could deliver the trap. */
286bool deliver_trap(struct lg_cpu *cpu, unsigned int num) 330bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
287{ 331{
288 /* Trap numbers are always 8 bit, but we set an impossible trap number 332 /*
289 * for traps inside the Switcher, so check that here. */ 333 * Trap numbers are always 8 bit, but we set an impossible trap number
334 * for traps inside the Switcher, so check that here.
335 */
290 if (num >= ARRAY_SIZE(cpu->arch.idt)) 336 if (num >= ARRAY_SIZE(cpu->arch.idt))
291 return false; 337 return false;
292 338
293 /* Early on the Guest hasn't set the IDT entries (or maybe it put a 339 /*
294 * bogus one in): if we fail here, the Guest will be killed. */ 340 * Early on the Guest hasn't set the IDT entries (or maybe it put a
341 * bogus one in): if we fail here, the Guest will be killed.
342 */
295 if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b)) 343 if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b))
296 return false; 344 return false;
297 set_guest_interrupt(cpu, cpu->arch.idt[num].a, 345 set_guest_interrupt(cpu, cpu->arch.idt[num].a,
@@ -299,7 +347,8 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
299 return true; 347 return true;
300} 348}
301 349
302/*H:250 Here's the hard part: returning to the Host every time a trap happens 350/*H:250
351 * Here's the hard part: returning to the Host every time a trap happens
303 * and then calling deliver_trap() and re-entering the Guest is slow. 352 * and then calling deliver_trap() and re-entering the Guest is slow.
304 * Particularly because Guest userspace system calls are traps (usually trap 353 * Particularly because Guest userspace system calls are traps (usually trap
305 * 128). 354 * 128).
@@ -311,69 +360,87 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
311 * the other hypervisors would beat it up at lunchtime. 360 * the other hypervisors would beat it up at lunchtime.
312 * 361 *
313 * This routine indicates if a particular trap number could be delivered 362 * This routine indicates if a particular trap number could be delivered
314 * directly. */ 363 * directly.
364 */
315static bool direct_trap(unsigned int num) 365static bool direct_trap(unsigned int num)
316{ 366{
317 /* Hardware interrupts don't go to the Guest at all (except system 367 /*
318 * call). */ 368 * Hardware interrupts don't go to the Guest at all (except system
369 * call).
370 */
319 if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num)) 371 if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num))
320 return false; 372 return false;
321 373
322 /* The Host needs to see page faults (for shadow paging and to save the 374 /*
375 * The Host needs to see page faults (for shadow paging and to save the
323 * fault address), general protection faults (in/out emulation) and 376 * fault address), general protection faults (in/out emulation) and
324 * device not available (TS handling), invalid opcode fault (kvm hcall), 377 * device not available (TS handling), invalid opcode fault (kvm hcall),
325 * and of course, the hypercall trap. */ 378 * and of course, the hypercall trap.
379 */
326 return num != 14 && num != 13 && num != 7 && 380 return num != 14 && num != 13 && num != 7 &&
327 num != 6 && num != LGUEST_TRAP_ENTRY; 381 num != 6 && num != LGUEST_TRAP_ENTRY;
328} 382}
329/*:*/ 383/*:*/
330 384
331/*M:005 The Guest has the ability to turn its interrupt gates into trap gates, 385/*M:005
386 * The Guest has the ability to turn its interrupt gates into trap gates,
332 * if it is careful. The Host will let trap gates can go directly to the 387 * if it is careful. The Host will let trap gates can go directly to the
333 * Guest, but the Guest needs the interrupts atomically disabled for an 388 * Guest, but the Guest needs the interrupts atomically disabled for an
334 * interrupt gate. It can do this by pointing the trap gate at instructions 389 * interrupt gate. It can do this by pointing the trap gate at instructions
335 * within noirq_start and noirq_end, where it can safely disable interrupts. */ 390 * within noirq_start and noirq_end, where it can safely disable interrupts.
391 */
336 392
337/*M:006 The Guests do not use the sysenter (fast system call) instruction, 393/*M:006
394 * The Guests do not use the sysenter (fast system call) instruction,
338 * because it's hardcoded to enter privilege level 0 and so can't go direct. 395 * because it's hardcoded to enter privilege level 0 and so can't go direct.
339 * It's about twice as fast as the older "int 0x80" system call, so it might 396 * It's about twice as fast as the older "int 0x80" system call, so it might
340 * still be worthwhile to handle it in the Switcher and lcall down to the 397 * still be worthwhile to handle it in the Switcher and lcall down to the
341 * Guest. The sysenter semantics are hairy tho: search for that keyword in 398 * Guest. The sysenter semantics are hairy tho: search for that keyword in
342 * entry.S :*/ 399 * entry.S
400:*/
343 401
344/*H:260 When we make traps go directly into the Guest, we need to make sure 402/*H:260
403 * When we make traps go directly into the Guest, we need to make sure
345 * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the 404 * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the
346 * CPU trying to deliver the trap will fault while trying to push the interrupt 405 * CPU trying to deliver the trap will fault while trying to push the interrupt
347 * words on the stack: this is called a double fault, and it forces us to kill 406 * words on the stack: this is called a double fault, and it forces us to kill
348 * the Guest. 407 * the Guest.
349 * 408 *
350 * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */ 409 * Which is deeply unfair, because (literally!) it wasn't the Guests' fault.
410 */
351void pin_stack_pages(struct lg_cpu *cpu) 411void pin_stack_pages(struct lg_cpu *cpu)
352{ 412{
353 unsigned int i; 413 unsigned int i;
354 414
355 /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or 415 /*
356 * two pages of stack space. */ 416 * Depending on the CONFIG_4KSTACKS option, the Guest can have one or
417 * two pages of stack space.
418 */
357 for (i = 0; i < cpu->lg->stack_pages; i++) 419 for (i = 0; i < cpu->lg->stack_pages; i++)
358 /* The stack grows *upwards*, so the address we're given is the 420 /*
421 * The stack grows *upwards*, so the address we're given is the
359 * start of the page after the kernel stack. Subtract one to 422 * start of the page after the kernel stack. Subtract one to
360 * get back onto the first stack page, and keep subtracting to 423 * get back onto the first stack page, and keep subtracting to
361 * get to the rest of the stack pages. */ 424 * get to the rest of the stack pages.
425 */
362 pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE); 426 pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE);
363} 427}
364 428
365/* Direct traps also mean that we need to know whenever the Guest wants to use 429/*
430 * Direct traps also mean that we need to know whenever the Guest wants to use
366 * a different kernel stack, so we can change the IDT entries to use that 431 * a different kernel stack, so we can change the IDT entries to use that
367 * stack. The IDT entries expect a virtual address, so unlike most addresses 432 * stack. The IDT entries expect a virtual address, so unlike most addresses
368 * the Guest gives us, the "esp" (stack pointer) value here is virtual, not 433 * the Guest gives us, the "esp" (stack pointer) value here is virtual, not
369 * physical. 434 * physical.
370 * 435 *
371 * In Linux each process has its own kernel stack, so this happens a lot: we 436 * In Linux each process has its own kernel stack, so this happens a lot: we
372 * change stacks on each context switch. */ 437 * change stacks on each context switch.
438 */
373void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) 439void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages)
374{ 440{
375 /* You are not allowed have a stack segment with privilege level 0: bad 441 /*
376 * Guest! */ 442 * You're not allowed a stack segment with privilege level 0: bad Guest!
443 */
377 if ((seg & 0x3) != GUEST_PL) 444 if ((seg & 0x3) != GUEST_PL)
378 kill_guest(cpu, "bad stack segment %i", seg); 445 kill_guest(cpu, "bad stack segment %i", seg);
379 /* We only expect one or two stack pages. */ 446 /* We only expect one or two stack pages. */
@@ -387,11 +454,15 @@ void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages)
387 pin_stack_pages(cpu); 454 pin_stack_pages(cpu);
388} 455}
389 456
390/* All this reference to mapping stacks leads us neatly into the other complex 457/*
391 * part of the Host: page table handling. */ 458 * All this reference to mapping stacks leads us neatly into the other complex
459 * part of the Host: page table handling.
460 */
392 461
393/*H:235 This is the routine which actually checks the Guest's IDT entry and 462/*H:235
394 * transfers it into the entry in "struct lguest": */ 463 * This is the routine which actually checks the Guest's IDT entry and
464 * transfers it into the entry in "struct lguest":
465 */
395static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, 466static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap,
396 unsigned int num, u32 lo, u32 hi) 467 unsigned int num, u32 lo, u32 hi)
397{ 468{
@@ -407,30 +478,38 @@ static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap,
407 if (type != 0xE && type != 0xF) 478 if (type != 0xE && type != 0xF)
408 kill_guest(cpu, "bad IDT type %i", type); 479 kill_guest(cpu, "bad IDT type %i", type);
409 480
410 /* We only copy the handler address, present bit, privilege level and 481 /*
482 * We only copy the handler address, present bit, privilege level and
411 * type. The privilege level controls where the trap can be triggered 483 * type. The privilege level controls where the trap can be triggered
412 * manually with an "int" instruction. This is usually GUEST_PL, 484 * manually with an "int" instruction. This is usually GUEST_PL,
413 * except for system calls which userspace can use. */ 485 * except for system calls which userspace can use.
486 */
414 trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); 487 trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF);
415 trap->b = (hi&0xFFFFEF00); 488 trap->b = (hi&0xFFFFEF00);
416} 489}
417 490
418/*H:230 While we're here, dealing with delivering traps and interrupts to the 491/*H:230
492 * While we're here, dealing with delivering traps and interrupts to the
419 * Guest, we might as well complete the picture: how the Guest tells us where 493 * Guest, we might as well complete the picture: how the Guest tells us where
420 * it wants them to go. This would be simple, except making traps fast 494 * it wants them to go. This would be simple, except making traps fast
421 * requires some tricks. 495 * requires some tricks.
422 * 496 *
423 * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the 497 * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the
424 * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */ 498 * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here.
499 */
425void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) 500void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi)
426{ 501{
427 /* Guest never handles: NMI, doublefault, spurious interrupt or 502 /*
428 * hypercall. We ignore when it tries to set them. */ 503 * Guest never handles: NMI, doublefault, spurious interrupt or
504 * hypercall. We ignore when it tries to set them.
505 */
429 if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) 506 if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY)
430 return; 507 return;
431 508
432 /* Mark the IDT as changed: next time the Guest runs we'll know we have 509 /*
433 * to copy this again. */ 510 * Mark the IDT as changed: next time the Guest runs we'll know we have
511 * to copy this again.
512 */
434 cpu->changed |= CHANGED_IDT; 513 cpu->changed |= CHANGED_IDT;
435 514
436 /* Check that the Guest doesn't try to step outside the bounds. */ 515 /* Check that the Guest doesn't try to step outside the bounds. */
@@ -440,9 +519,11 @@ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi)
440 set_trap(cpu, &cpu->arch.idt[num], num, lo, hi); 519 set_trap(cpu, &cpu->arch.idt[num], num, lo, hi);
441} 520}
442 521
443/* The default entry for each interrupt points into the Switcher routines which 522/*
523 * The default entry for each interrupt points into the Switcher routines which
444 * simply return to the Host. The run_guest() loop will then call 524 * simply return to the Host. The run_guest() loop will then call
445 * deliver_trap() to bounce it back into the Guest. */ 525 * deliver_trap() to bounce it back into the Guest.
526 */
446static void default_idt_entry(struct desc_struct *idt, 527static void default_idt_entry(struct desc_struct *idt,
447 int trap, 528 int trap,
448 const unsigned long handler, 529 const unsigned long handler,
@@ -451,13 +532,17 @@ static void default_idt_entry(struct desc_struct *idt,
451 /* A present interrupt gate. */ 532 /* A present interrupt gate. */
452 u32 flags = 0x8e00; 533 u32 flags = 0x8e00;
453 534
454 /* Set the privilege level on the entry for the hypercall: this allows 535 /*
455 * the Guest to use the "int" instruction to trigger it. */ 536 * Set the privilege level on the entry for the hypercall: this allows
537 * the Guest to use the "int" instruction to trigger it.
538 */
456 if (trap == LGUEST_TRAP_ENTRY) 539 if (trap == LGUEST_TRAP_ENTRY)
457 flags |= (GUEST_PL << 13); 540 flags |= (GUEST_PL << 13);
458 else if (base) 541 else if (base)
459 /* Copy priv. level from what Guest asked for. This allows 542 /*
460 * debug (int 3) traps from Guest userspace, for example. */ 543 * Copy privilege level from what Guest asked for. This allows
544 * debug (int 3) traps from Guest userspace, for example.
545 */
461 flags |= (base->b & 0x6000); 546 flags |= (base->b & 0x6000);
462 547
463 /* Now pack it into the IDT entry in its weird format. */ 548 /* Now pack it into the IDT entry in its weird format. */
@@ -475,16 +560,20 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
475 default_idt_entry(&state->guest_idt[i], i, def[i], NULL); 560 default_idt_entry(&state->guest_idt[i], i, def[i], NULL);
476} 561}
477 562
478/*H:240 We don't use the IDT entries in the "struct lguest" directly, instead 563/*H:240
564 * We don't use the IDT entries in the "struct lguest" directly, instead
479 * we copy them into the IDT which we've set up for Guests on this CPU, just 565 * we copy them into the IDT which we've set up for Guests on this CPU, just
480 * before we run the Guest. This routine does that copy. */ 566 * before we run the Guest. This routine does that copy.
567 */
481void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, 568void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
482 const unsigned long *def) 569 const unsigned long *def)
483{ 570{
484 unsigned int i; 571 unsigned int i;
485 572
486 /* We can simply copy the direct traps, otherwise we use the default 573 /*
487 * ones in the Switcher: they will return to the Host. */ 574 * We can simply copy the direct traps, otherwise we use the default
575 * ones in the Switcher: they will return to the Host.
576 */
488 for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { 577 for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) {
489 const struct desc_struct *gidt = &cpu->arch.idt[i]; 578 const struct desc_struct *gidt = &cpu->arch.idt[i];
490 579
@@ -492,14 +581,16 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
492 if (!direct_trap(i)) 581 if (!direct_trap(i))
493 continue; 582 continue;
494 583
495 /* Only trap gates (type 15) can go direct to the Guest. 584 /*
585 * Only trap gates (type 15) can go direct to the Guest.
496 * Interrupt gates (type 14) disable interrupts as they are 586 * Interrupt gates (type 14) disable interrupts as they are
497 * entered, which we never let the Guest do. Not present 587 * entered, which we never let the Guest do. Not present
498 * entries (type 0x0) also can't go direct, of course. 588 * entries (type 0x0) also can't go direct, of course.
499 * 589 *
500 * If it can't go direct, we still need to copy the priv. level: 590 * If it can't go direct, we still need to copy the priv. level:
501 * they might want to give userspace access to a software 591 * they might want to give userspace access to a software
502 * interrupt. */ 592 * interrupt.
593 */
503 if (idt_type(gidt->a, gidt->b) == 0xF) 594 if (idt_type(gidt->a, gidt->b) == 0xF)
504 idt[i] = *gidt; 595 idt[i] = *gidt;
505 else 596 else
@@ -518,7 +609,8 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
518 * the next timer interrupt (in nanoseconds). We use the high-resolution timer 609 * the next timer interrupt (in nanoseconds). We use the high-resolution timer
519 * infrastructure to set a callback at that time. 610 * infrastructure to set a callback at that time.
520 * 611 *
521 * 0 means "turn off the clock". */ 612 * 0 means "turn off the clock".
613 */
522void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) 614void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta)
523{ 615{
524 ktime_t expires; 616 ktime_t expires;
@@ -529,9 +621,11 @@ void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta)
529 return; 621 return;
530 } 622 }
531 623
532 /* We use wallclock time here, so the Guest might not be running for 624 /*
625 * We use wallclock time here, so the Guest might not be running for
533 * all the time between now and the timer interrupt it asked for. This 626 * all the time between now and the timer interrupt it asked for. This
534 * is almost always the right thing to do. */ 627 * is almost always the right thing to do.
628 */
535 expires = ktime_add_ns(ktime_get_real(), delta); 629 expires = ktime_add_ns(ktime_get_real(), delta);
536 hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS); 630 hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS);
537} 631}