diff options
| author | Rusty Russell <rusty@rustcorp.com.au> | 2015-02-10 23:45:10 -0500 |
|---|---|---|
| committer | Rusty Russell <rusty@rustcorp.com.au> | 2015-02-11 01:17:31 -0500 |
| commit | c565650b1028bc551e5d16dd0ec8f7078da7cace (patch) | |
| tree | 019581b2a4821eba84ebdff179034c5ac57ca1a3 /drivers/lguest | |
| parent | c9e433e4b852b70ea267388cf9b5d8096b04c44c (diff) | |
lguest: send trap 13 through to userspace.
We copy 7 bytes at eip for userspace's instruction decode; we have to
carefully handle the case where eip is at the end of a page. We can't
leave this to userspace since kernel has all the page table decode
logic.
The decode logic moves to userspace, basically unchanged.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers/lguest')
| -rw-r--r-- | drivers/lguest/x86/core.c | 133 |
1 files changed, 43 insertions, 90 deletions
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index f7a16b4ea456..42e87bf14113 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
| @@ -314,95 +314,52 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
| 314 | * usually attached to a PC. | 314 | * usually attached to a PC. |
| 315 | * | 315 | * |
| 316 | * When the Guest uses one of these instructions, we get a trap (General | 316 | * When the Guest uses one of these instructions, we get a trap (General |
| 317 | * Protection Fault) and come here. We see if it's one of those troublesome | 317 | * Protection Fault) and come here. We queue this to be sent out to the |
| 318 | * instructions and skip over it. We return true if we did. | 318 | * Launcher to handle. |
| 319 | */ | 319 | */ |
| 320 | static int emulate_insn(struct lg_cpu *cpu) | ||
| 321 | { | ||
| 322 | u8 insn; | ||
| 323 | unsigned int insnlen = 0, in = 0, small_operand = 0; | ||
| 324 | /* | ||
| 325 | * The eip contains the *virtual* address of the Guest's instruction: | ||
| 326 | * walk the Guest's page tables to find the "physical" address. | ||
| 327 | */ | ||
| 328 | unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); | ||
| 329 | |||
| 330 | /* | ||
| 331 | * This must be the Guest kernel trying to do something, not userspace! | ||
| 332 | * The bottom two bits of the CS segment register are the privilege | ||
| 333 | * level. | ||
| 334 | */ | ||
| 335 | if ((cpu->regs->cs & 3) != GUEST_PL) | ||
| 336 | return 0; | ||
| 337 | 320 | ||
| 338 | /* Decoding x86 instructions is icky. */ | 321 | /* |
| 339 | insn = lgread(cpu, physaddr, u8); | 322 | * The eip contains the *virtual* address of the Guest's instruction: |
| 340 | 323 | * we copy the instruction here so the Launcher doesn't have to walk | |
| 341 | /* | 324 | * the page tables to decode it. We handle the case (eg. in a kernel |
| 342 | * Around 2.6.33, the kernel started using an emulation for the | 325 | * module) where the instruction is over two pages, and the pages are |
| 343 | * cmpxchg8b instruction in early boot on many configurations. This | 326 | * virtually but not physically contiguous. |
| 344 | * code isn't paravirtualized, and it tries to disable interrupts. | 327 | * |
| 345 | * Ignore it, which will Mostly Work. | 328 | * The longest possible x86 instruction is 15 bytes, but we don't handle |
| 346 | */ | 329 | * anything that strange. |
| 347 | if (insn == 0xfa) { | 330 | */ |
| 348 | /* "cli", or Clear Interrupt Enable instruction. Skip it. */ | 331 | static void copy_from_guest(struct lg_cpu *cpu, |
| 349 | cpu->regs->eip++; | 332 | void *dst, unsigned long vaddr, size_t len) |
| 350 | return 1; | 333 | { |
| 334 | size_t to_page_end = PAGE_SIZE - (vaddr % PAGE_SIZE); | ||
| 335 | unsigned long paddr; | ||
| 336 | |||
| 337 | BUG_ON(len > PAGE_SIZE); | ||
| 338 | |||
| 339 | /* If it goes over a page, copy in two parts. */ | ||
| 340 | if (len > to_page_end) { | ||
| 341 | /* But make sure the next page is mapped! */ | ||
| 342 | if (__guest_pa(cpu, vaddr + to_page_end, &paddr)) | ||
| 343 | copy_from_guest(cpu, dst + to_page_end, | ||
| 344 | vaddr + to_page_end, | ||
| 345 | len - to_page_end); | ||
| 346 | else | ||
| 347 | /* Otherwise fill with zeroes. */ | ||
| 348 | memset(dst + to_page_end, 0, len - to_page_end); | ||
| 349 | len = to_page_end; | ||
| 351 | } | 350 | } |
| 352 | 351 | ||
| 353 | /* | 352 | /* This will kill the guest if it isn't mapped, but that |
| 354 | * 0x66 is an "operand prefix". It means a 16, not 32 bit in/out. | 353 | * shouldn't happen. */ |
| 355 | */ | 354 | __lgread(cpu, dst, guest_pa(cpu, vaddr), len); |
| 356 | if (insn == 0x66) { | 355 | } |
| 357 | small_operand = 1; | ||
| 358 | /* The instruction is 1 byte so far, read the next byte. */ | ||
| 359 | insnlen = 1; | ||
| 360 | insn = lgread(cpu, physaddr + insnlen, u8); | ||
| 361 | } | ||
| 362 | 356 | ||
| 363 | /* | ||
| 364 | * We can ignore the lower bit for the moment and decode the 4 opcodes | ||
| 365 | * we need to emulate. | ||
| 366 | */ | ||
| 367 | switch (insn & 0xFE) { | ||
| 368 | case 0xE4: /* in <next byte>,%al */ | ||
| 369 | insnlen += 2; | ||
| 370 | in = 1; | ||
| 371 | break; | ||
| 372 | case 0xEC: /* in (%dx),%al */ | ||
| 373 | insnlen += 1; | ||
| 374 | in = 1; | ||
| 375 | break; | ||
| 376 | case 0xE6: /* out %al,<next byte> */ | ||
| 377 | insnlen += 2; | ||
| 378 | break; | ||
| 379 | case 0xEE: /* out %al,(%dx) */ | ||
| 380 | insnlen += 1; | ||
| 381 | break; | ||
| 382 | default: | ||
| 383 | /* OK, we don't know what this is, can't emulate. */ | ||
| 384 | return 0; | ||
| 385 | } | ||
| 386 | 357 | ||
| 387 | /* | 358 | static void setup_emulate_insn(struct lg_cpu *cpu) |
| 388 | * If it was an "IN" instruction, they expect the result to be read | 359 | { |
| 389 | * into %eax, so we change %eax. We always return all-ones, which | 360 | cpu->pending.trap = 13; |
| 390 | * traditionally means "there's nothing there". | 361 | copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip, |
| 391 | */ | 362 | sizeof(cpu->pending.insn)); |
| 392 | if (in) { | ||
| 393 | /* Lower bit tells means it's a 32/16 bit access */ | ||
| 394 | if (insn & 0x1) { | ||
| 395 | if (small_operand) | ||
| 396 | cpu->regs->eax |= 0xFFFF; | ||
| 397 | else | ||
| 398 | cpu->regs->eax = 0xFFFFFFFF; | ||
| 399 | } else | ||
| 400 | cpu->regs->eax |= 0xFF; | ||
| 401 | } | ||
| 402 | /* Finally, we've "done" the instruction, so move past it. */ | ||
| 403 | cpu->regs->eip += insnlen; | ||
| 404 | /* Success! */ | ||
| 405 | return 1; | ||
| 406 | } | 363 | } |
| 407 | 364 | ||
| 408 | /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ | 365 | /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ |
| @@ -410,14 +367,10 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) | |||
| 410 | { | 367 | { |
| 411 | switch (cpu->regs->trapnum) { | 368 | switch (cpu->regs->trapnum) { |
| 412 | case 13: /* We've intercepted a General Protection Fault. */ | 369 | case 13: /* We've intercepted a General Protection Fault. */ |
| 413 | /* | 370 | /* Hand to Launcher to emulate those pesky IN and OUT insns */ |
| 414 | * Check if this was one of those annoying IN or OUT | ||
| 415 | * instructions which we need to emulate. If so, we just go | ||
| 416 | * back into the Guest after we've done it. | ||
| 417 | */ | ||
| 418 | if (cpu->regs->errcode == 0) { | 371 | if (cpu->regs->errcode == 0) { |
| 419 | if (emulate_insn(cpu)) | 372 | setup_emulate_insn(cpu); |
| 420 | return; | 373 | return; |
| 421 | } | 374 | } |
| 422 | break; | 375 | break; |
| 423 | case 14: /* We've intercepted a Page Fault. */ | 376 | case 14: /* We've intercepted a Page Fault. */ |
