aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lguest/x86/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/lguest/x86/core.c')
-rw-r--r--drivers/lguest/x86/core.c198
1 files changed, 107 insertions, 91 deletions
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 6adfd7ba4c97..30f2aef69d78 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -182,6 +182,52 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
182} 182}
183/*:*/ 183/*:*/
184 184
185unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, size_t reg_off, bool any)
186{
187 switch (reg_off) {
188 case offsetof(struct pt_regs, bx):
189 return &cpu->regs->ebx;
190 case offsetof(struct pt_regs, cx):
191 return &cpu->regs->ecx;
192 case offsetof(struct pt_regs, dx):
193 return &cpu->regs->edx;
194 case offsetof(struct pt_regs, si):
195 return &cpu->regs->esi;
196 case offsetof(struct pt_regs, di):
197 return &cpu->regs->edi;
198 case offsetof(struct pt_regs, bp):
199 return &cpu->regs->ebp;
200 case offsetof(struct pt_regs, ax):
201 return &cpu->regs->eax;
202 case offsetof(struct pt_regs, ip):
203 return &cpu->regs->eip;
204 case offsetof(struct pt_regs, sp):
205 return &cpu->regs->esp;
206 }
207
208 /* Launcher can read these, but we don't allow any setting. */
209 if (any) {
210 switch (reg_off) {
211 case offsetof(struct pt_regs, ds):
212 return &cpu->regs->ds;
213 case offsetof(struct pt_regs, es):
214 return &cpu->regs->es;
215 case offsetof(struct pt_regs, fs):
216 return &cpu->regs->fs;
217 case offsetof(struct pt_regs, gs):
218 return &cpu->regs->gs;
219 case offsetof(struct pt_regs, cs):
220 return &cpu->regs->cs;
221 case offsetof(struct pt_regs, flags):
222 return &cpu->regs->eflags;
223 case offsetof(struct pt_regs, ss):
224 return &cpu->regs->ss;
225 }
226 }
227
228 return NULL;
229}
230
185/*M:002 231/*M:002
186 * There are hooks in the scheduler which we can register to tell when we 232 * There are hooks in the scheduler which we can register to tell when we
187 * get kicked off the CPU (preempt_notifier_register()). This would allow us 233 * get kicked off the CPU (preempt_notifier_register()). This would allow us
@@ -269,110 +315,73 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
269 * usually attached to a PC. 315 * usually attached to a PC.
270 * 316 *
271 * When the Guest uses one of these instructions, we get a trap (General 317 * When the Guest uses one of these instructions, we get a trap (General
272 * Protection Fault) and come here. We see if it's one of those troublesome 318 * Protection Fault) and come here. We queue this to be sent out to the
273 * instructions and skip over it. We return true if we did. 319 * Launcher to handle.
274 */ 320 */
275static int emulate_insn(struct lg_cpu *cpu)
276{
277 u8 insn;
278 unsigned int insnlen = 0, in = 0, small_operand = 0;
279 /*
280 * The eip contains the *virtual* address of the Guest's instruction:
281 * walk the Guest's page tables to find the "physical" address.
282 */
283 unsigned long physaddr = guest_pa(cpu, cpu->regs->eip);
284
285 /*
286 * This must be the Guest kernel trying to do something, not userspace!
287 * The bottom two bits of the CS segment register are the privilege
288 * level.
289 */
290 if ((cpu->regs->cs & 3) != GUEST_PL)
291 return 0;
292
293 /* Decoding x86 instructions is icky. */
294 insn = lgread(cpu, physaddr, u8);
295 321
296 /* 322/*
297 * Around 2.6.33, the kernel started using an emulation for the 323 * The eip contains the *virtual* address of the Guest's instruction:
298 * cmpxchg8b instruction in early boot on many configurations. This 324 * we copy the instruction here so the Launcher doesn't have to walk
299 * code isn't paravirtualized, and it tries to disable interrupts. 325 * the page tables to decode it. We handle the case (eg. in a kernel
300 * Ignore it, which will Mostly Work. 326 * module) where the instruction is over two pages, and the pages are
301 */ 327 * virtually but not physically contiguous.
302 if (insn == 0xfa) { 328 *
303 /* "cli", or Clear Interrupt Enable instruction. Skip it. */ 329 * The longest possible x86 instruction is 15 bytes, but we don't handle
304 cpu->regs->eip++; 330 * anything that strange.
305 return 1; 331 */
332static void copy_from_guest(struct lg_cpu *cpu,
333 void *dst, unsigned long vaddr, size_t len)
334{
335 size_t to_page_end = PAGE_SIZE - (vaddr % PAGE_SIZE);
336 unsigned long paddr;
337
338 BUG_ON(len > PAGE_SIZE);
339
340 /* If it goes over a page, copy in two parts. */
341 if (len > to_page_end) {
342 /* But make sure the next page is mapped! */
343 if (__guest_pa(cpu, vaddr + to_page_end, &paddr))
344 copy_from_guest(cpu, dst + to_page_end,
345 vaddr + to_page_end,
346 len - to_page_end);
347 else
348 /* Otherwise fill with zeroes. */
349 memset(dst + to_page_end, 0, len - to_page_end);
350 len = to_page_end;
306 } 351 }
307 352
308 /* 353 /* This will kill the guest if it isn't mapped, but that
309 * 0x66 is an "operand prefix". It means a 16, not 32 bit in/out. 354 * shouldn't happen. */
310 */ 355 __lgread(cpu, dst, guest_pa(cpu, vaddr), len);
311 if (insn == 0x66) { 356}
312 small_operand = 1;
313 /* The instruction is 1 byte so far, read the next byte. */
314 insnlen = 1;
315 insn = lgread(cpu, physaddr + insnlen, u8);
316 }
317 357
318 /*
319 * We can ignore the lower bit for the moment and decode the 4 opcodes
320 * we need to emulate.
321 */
322 switch (insn & 0xFE) {
323 case 0xE4: /* in <next byte>,%al */
324 insnlen += 2;
325 in = 1;
326 break;
327 case 0xEC: /* in (%dx),%al */
328 insnlen += 1;
329 in = 1;
330 break;
331 case 0xE6: /* out %al,<next byte> */
332 insnlen += 2;
333 break;
334 case 0xEE: /* out %al,(%dx) */
335 insnlen += 1;
336 break;
337 default:
338 /* OK, we don't know what this is, can't emulate. */
339 return 0;
340 }
341 358
342 /* 359static void setup_emulate_insn(struct lg_cpu *cpu)
343 * If it was an "IN" instruction, they expect the result to be read 360{
344 * into %eax, so we change %eax. We always return all-ones, which 361 cpu->pending.trap = 13;
345 * traditionally means "there's nothing there". 362 copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip,
346 */ 363 sizeof(cpu->pending.insn));
347 if (in) { 364}
348 /* Lower bit tells means it's a 32/16 bit access */ 365
349 if (insn & 0x1) { 366static void setup_iomem_insn(struct lg_cpu *cpu, unsigned long iomem_addr)
350 if (small_operand) 367{
351 cpu->regs->eax |= 0xFFFF; 368 cpu->pending.trap = 14;
352 else 369 cpu->pending.addr = iomem_addr;
353 cpu->regs->eax = 0xFFFFFFFF; 370 copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip,
354 } else 371 sizeof(cpu->pending.insn));
355 cpu->regs->eax |= 0xFF;
356 }
357 /* Finally, we've "done" the instruction, so move past it. */
358 cpu->regs->eip += insnlen;
359 /* Success! */
360 return 1;
361} 372}
362 373
363/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ 374/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
364void lguest_arch_handle_trap(struct lg_cpu *cpu) 375void lguest_arch_handle_trap(struct lg_cpu *cpu)
365{ 376{
377 unsigned long iomem_addr;
378
366 switch (cpu->regs->trapnum) { 379 switch (cpu->regs->trapnum) {
367 case 13: /* We've intercepted a General Protection Fault. */ 380 case 13: /* We've intercepted a General Protection Fault. */
368 /* 381 /* Hand to Launcher to emulate those pesky IN and OUT insns */
369 * Check if this was one of those annoying IN or OUT
370 * instructions which we need to emulate. If so, we just go
371 * back into the Guest after we've done it.
372 */
373 if (cpu->regs->errcode == 0) { 382 if (cpu->regs->errcode == 0) {
374 if (emulate_insn(cpu)) 383 setup_emulate_insn(cpu);
375 return; 384 return;
376 } 385 }
377 break; 386 break;
378 case 14: /* We've intercepted a Page Fault. */ 387 case 14: /* We've intercepted a Page Fault. */
@@ -387,9 +396,16 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
387 * whether kernel or userspace code. 396 * whether kernel or userspace code.
388 */ 397 */
389 if (demand_page(cpu, cpu->arch.last_pagefault, 398 if (demand_page(cpu, cpu->arch.last_pagefault,
390 cpu->regs->errcode)) 399 cpu->regs->errcode, &iomem_addr))
391 return; 400 return;
392 401
402 /* Was this an access to memory mapped IO? */
403 if (iomem_addr) {
404 /* Tell Launcher, let it handle it. */
405 setup_iomem_insn(cpu, iomem_addr);
406 return;
407 }
408
393 /* 409 /*
394 * OK, it's really not there (or not OK): the Guest needs to 410 * OK, it's really not there (or not OK): the Guest needs to
395 * know. We write out the cr2 value so it knows where the 411 * know. We write out the cr2 value so it knows where the