diff options
Diffstat (limited to 'drivers/lguest/x86/core.c')
-rw-r--r-- | drivers/lguest/x86/core.c | 198 |
1 files changed, 107 insertions, 91 deletions
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 6adfd7ba4c97..30f2aef69d78 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
@@ -182,6 +182,52 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) | |||
182 | } | 182 | } |
183 | /*:*/ | 183 | /*:*/ |
184 | 184 | ||
185 | unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, size_t reg_off, bool any) | ||
186 | { | ||
187 | switch (reg_off) { | ||
188 | case offsetof(struct pt_regs, bx): | ||
189 | return &cpu->regs->ebx; | ||
190 | case offsetof(struct pt_regs, cx): | ||
191 | return &cpu->regs->ecx; | ||
192 | case offsetof(struct pt_regs, dx): | ||
193 | return &cpu->regs->edx; | ||
194 | case offsetof(struct pt_regs, si): | ||
195 | return &cpu->regs->esi; | ||
196 | case offsetof(struct pt_regs, di): | ||
197 | return &cpu->regs->edi; | ||
198 | case offsetof(struct pt_regs, bp): | ||
199 | return &cpu->regs->ebp; | ||
200 | case offsetof(struct pt_regs, ax): | ||
201 | return &cpu->regs->eax; | ||
202 | case offsetof(struct pt_regs, ip): | ||
203 | return &cpu->regs->eip; | ||
204 | case offsetof(struct pt_regs, sp): | ||
205 | return &cpu->regs->esp; | ||
206 | } | ||
207 | |||
208 | /* Launcher can read these, but we don't allow any setting. */ | ||
209 | if (any) { | ||
210 | switch (reg_off) { | ||
211 | case offsetof(struct pt_regs, ds): | ||
212 | return &cpu->regs->ds; | ||
213 | case offsetof(struct pt_regs, es): | ||
214 | return &cpu->regs->es; | ||
215 | case offsetof(struct pt_regs, fs): | ||
216 | return &cpu->regs->fs; | ||
217 | case offsetof(struct pt_regs, gs): | ||
218 | return &cpu->regs->gs; | ||
219 | case offsetof(struct pt_regs, cs): | ||
220 | return &cpu->regs->cs; | ||
221 | case offsetof(struct pt_regs, flags): | ||
222 | return &cpu->regs->eflags; | ||
223 | case offsetof(struct pt_regs, ss): | ||
224 | return &cpu->regs->ss; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | return NULL; | ||
229 | } | ||
230 | |||
185 | /*M:002 | 231 | /*M:002 |
186 | * There are hooks in the scheduler which we can register to tell when we | 232 | * There are hooks in the scheduler which we can register to tell when we |
187 | * get kicked off the CPU (preempt_notifier_register()). This would allow us | 233 | * get kicked off the CPU (preempt_notifier_register()). This would allow us |
@@ -269,110 +315,73 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
269 | * usually attached to a PC. | 315 | * usually attached to a PC. |
270 | * | 316 | * |
271 | * When the Guest uses one of these instructions, we get a trap (General | 317 | * When the Guest uses one of these instructions, we get a trap (General |
272 | * Protection Fault) and come here. We see if it's one of those troublesome | 318 | * Protection Fault) and come here. We queue this to be sent out to the |
273 | * instructions and skip over it. We return true if we did. | 319 | * Launcher to handle. |
274 | */ | 320 | */ |
275 | static int emulate_insn(struct lg_cpu *cpu) | ||
276 | { | ||
277 | u8 insn; | ||
278 | unsigned int insnlen = 0, in = 0, small_operand = 0; | ||
279 | /* | ||
280 | * The eip contains the *virtual* address of the Guest's instruction: | ||
281 | * walk the Guest's page tables to find the "physical" address. | ||
282 | */ | ||
283 | unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); | ||
284 | |||
285 | /* | ||
286 | * This must be the Guest kernel trying to do something, not userspace! | ||
287 | * The bottom two bits of the CS segment register are the privilege | ||
288 | * level. | ||
289 | */ | ||
290 | if ((cpu->regs->cs & 3) != GUEST_PL) | ||
291 | return 0; | ||
292 | |||
293 | /* Decoding x86 instructions is icky. */ | ||
294 | insn = lgread(cpu, physaddr, u8); | ||
295 | 321 | ||
296 | /* | 322 | /* |
297 | * Around 2.6.33, the kernel started using an emulation for the | 323 | * The eip contains the *virtual* address of the Guest's instruction: |
298 | * cmpxchg8b instruction in early boot on many configurations. This | 324 | * we copy the instruction here so the Launcher doesn't have to walk |
299 | * code isn't paravirtualized, and it tries to disable interrupts. | 325 | * the page tables to decode it. We handle the case (eg. in a kernel |
300 | * Ignore it, which will Mostly Work. | 326 | * module) where the instruction is over two pages, and the pages are |
301 | */ | 327 | * virtually but not physically contiguous. |
302 | if (insn == 0xfa) { | 328 | * |
303 | /* "cli", or Clear Interrupt Enable instruction. Skip it. */ | 329 | * The longest possible x86 instruction is 15 bytes, but we don't handle |
304 | cpu->regs->eip++; | 330 | * anything that strange. |
305 | return 1; | 331 | */ |
332 | static void copy_from_guest(struct lg_cpu *cpu, | ||
333 | void *dst, unsigned long vaddr, size_t len) | ||
334 | { | ||
335 | size_t to_page_end = PAGE_SIZE - (vaddr % PAGE_SIZE); | ||
336 | unsigned long paddr; | ||
337 | |||
338 | BUG_ON(len > PAGE_SIZE); | ||
339 | |||
340 | /* If it goes over a page, copy in two parts. */ | ||
341 | if (len > to_page_end) { | ||
342 | /* But make sure the next page is mapped! */ | ||
343 | if (__guest_pa(cpu, vaddr + to_page_end, &paddr)) | ||
344 | copy_from_guest(cpu, dst + to_page_end, | ||
345 | vaddr + to_page_end, | ||
346 | len - to_page_end); | ||
347 | else | ||
348 | /* Otherwise fill with zeroes. */ | ||
349 | memset(dst + to_page_end, 0, len - to_page_end); | ||
350 | len = to_page_end; | ||
306 | } | 351 | } |
307 | 352 | ||
308 | /* | 353 | /* This will kill the guest if it isn't mapped, but that |
309 | * 0x66 is an "operand prefix". It means a 16, not 32 bit in/out. | 354 | * shouldn't happen. */ |
310 | */ | 355 | __lgread(cpu, dst, guest_pa(cpu, vaddr), len); |
311 | if (insn == 0x66) { | 356 | } |
312 | small_operand = 1; | ||
313 | /* The instruction is 1 byte so far, read the next byte. */ | ||
314 | insnlen = 1; | ||
315 | insn = lgread(cpu, physaddr + insnlen, u8); | ||
316 | } | ||
317 | 357 | ||
318 | /* | ||
319 | * We can ignore the lower bit for the moment and decode the 4 opcodes | ||
320 | * we need to emulate. | ||
321 | */ | ||
322 | switch (insn & 0xFE) { | ||
323 | case 0xE4: /* in <next byte>,%al */ | ||
324 | insnlen += 2; | ||
325 | in = 1; | ||
326 | break; | ||
327 | case 0xEC: /* in (%dx),%al */ | ||
328 | insnlen += 1; | ||
329 | in = 1; | ||
330 | break; | ||
331 | case 0xE6: /* out %al,<next byte> */ | ||
332 | insnlen += 2; | ||
333 | break; | ||
334 | case 0xEE: /* out %al,(%dx) */ | ||
335 | insnlen += 1; | ||
336 | break; | ||
337 | default: | ||
338 | /* OK, we don't know what this is, can't emulate. */ | ||
339 | return 0; | ||
340 | } | ||
341 | 358 | ||
342 | /* | 359 | static void setup_emulate_insn(struct lg_cpu *cpu) |
343 | * If it was an "IN" instruction, they expect the result to be read | 360 | { |
344 | * into %eax, so we change %eax. We always return all-ones, which | 361 | cpu->pending.trap = 13; |
345 | * traditionally means "there's nothing there". | 362 | copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip, |
346 | */ | 363 | sizeof(cpu->pending.insn)); |
347 | if (in) { | 364 | } |
348 | /* Lower bit tells means it's a 32/16 bit access */ | 365 | |
349 | if (insn & 0x1) { | 366 | static void setup_iomem_insn(struct lg_cpu *cpu, unsigned long iomem_addr) |
350 | if (small_operand) | 367 | { |
351 | cpu->regs->eax |= 0xFFFF; | 368 | cpu->pending.trap = 14; |
352 | else | 369 | cpu->pending.addr = iomem_addr; |
353 | cpu->regs->eax = 0xFFFFFFFF; | 370 | copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip, |
354 | } else | 371 | sizeof(cpu->pending.insn)); |
355 | cpu->regs->eax |= 0xFF; | ||
356 | } | ||
357 | /* Finally, we've "done" the instruction, so move past it. */ | ||
358 | cpu->regs->eip += insnlen; | ||
359 | /* Success! */ | ||
360 | return 1; | ||
361 | } | 372 | } |
362 | 373 | ||
363 | /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ | 374 | /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ |
364 | void lguest_arch_handle_trap(struct lg_cpu *cpu) | 375 | void lguest_arch_handle_trap(struct lg_cpu *cpu) |
365 | { | 376 | { |
377 | unsigned long iomem_addr; | ||
378 | |||
366 | switch (cpu->regs->trapnum) { | 379 | switch (cpu->regs->trapnum) { |
367 | case 13: /* We've intercepted a General Protection Fault. */ | 380 | case 13: /* We've intercepted a General Protection Fault. */ |
368 | /* | 381 | /* Hand to Launcher to emulate those pesky IN and OUT insns */ |
369 | * Check if this was one of those annoying IN or OUT | ||
370 | * instructions which we need to emulate. If so, we just go | ||
371 | * back into the Guest after we've done it. | ||
372 | */ | ||
373 | if (cpu->regs->errcode == 0) { | 382 | if (cpu->regs->errcode == 0) { |
374 | if (emulate_insn(cpu)) | 383 | setup_emulate_insn(cpu); |
375 | return; | 384 | return; |
376 | } | 385 | } |
377 | break; | 386 | break; |
378 | case 14: /* We've intercepted a Page Fault. */ | 387 | case 14: /* We've intercepted a Page Fault. */ |
@@ -387,9 +396,16 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) | |||
387 | * whether kernel or userspace code. | 396 | * whether kernel or userspace code. |
388 | */ | 397 | */ |
389 | if (demand_page(cpu, cpu->arch.last_pagefault, | 398 | if (demand_page(cpu, cpu->arch.last_pagefault, |
390 | cpu->regs->errcode)) | 399 | cpu->regs->errcode, &iomem_addr)) |
391 | return; | 400 | return; |
392 | 401 | ||
402 | /* Was this an access to memory mapped IO? */ | ||
403 | if (iomem_addr) { | ||
404 | /* Tell Launcher, let it handle it. */ | ||
405 | setup_iomem_insn(cpu, iomem_addr); | ||
406 | return; | ||
407 | } | ||
408 | |||
393 | /* | 409 | /* |
394 | * OK, it's really not there (or not OK): the Guest needs to | 410 | * OK, it's really not there (or not OK): the Guest needs to |
395 | * know. We write out the cr2 value so it knows where the | 411 | * know. We write out the cr2 value so it knows where the |