diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2015-02-10 23:45:10 -0500 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2015-02-11 01:17:33 -0500 |
commit | 7313d5217e6b9817897172d6a6ff477bdc415ed6 (patch) | |
tree | 448fc79d9048af296aeb6c6fbbf5da84dc6f7046 | |
parent | d1c29465b8a52d8fc5a59aac92c6b206b69fe631 (diff) |
lguest: add iomem region, where guest page faults get sent to userspace.
This lets us implement PCI.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r-- | drivers/lguest/lg.h | 7 | ||||
-rw-r--r-- | drivers/lguest/lguest_user.c | 3 | ||||
-rw-r--r-- | drivers/lguest/page_tables.c | 33 | ||||
-rw-r--r-- | drivers/lguest/x86/core.c | 19 | ||||
-rw-r--r-- | tools/lguest/lguest.c | 3 |
5 files changed, 58 insertions, 7 deletions
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 9da4f351e077..eb81abc05995 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
@@ -97,8 +97,12 @@ struct lguest { | |||
97 | struct lg_cpu cpus[NR_CPUS]; | 97 | struct lg_cpu cpus[NR_CPUS]; |
98 | unsigned int nr_cpus; | 98 | unsigned int nr_cpus; |
99 | 99 | ||
100 | /* Valid guest memory pages must be < this. */ | ||
100 | u32 pfn_limit; | 101 | u32 pfn_limit; |
101 | 102 | ||
103 | /* Device memory is >= pfn_limit and < device_limit. */ | ||
104 | u32 device_limit; | ||
105 | |||
102 | /* | 106 | /* |
103 | * This provides the offset to the base of guest-physical memory in the | 107 | * This provides the offset to the base of guest-physical memory in the |
104 | * Launcher. | 108 | * Launcher. |
@@ -200,7 +204,8 @@ void guest_pagetable_flush_user(struct lg_cpu *cpu); | |||
200 | void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir, | 204 | void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir, |
201 | unsigned long vaddr, pte_t val); | 205 | unsigned long vaddr, pte_t val); |
202 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages); | 206 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages); |
203 | bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode); | 207 | bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode, |
208 | unsigned long *iomem); | ||
204 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr); | 209 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr); |
205 | bool __guest_pa(struct lg_cpu *cpu, unsigned long vaddr, unsigned long *paddr); | 210 | bool __guest_pa(struct lg_cpu *cpu, unsigned long vaddr, unsigned long *paddr); |
206 | unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr); | 211 | unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr); |
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index be996d173615..c8b0e8575b44 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -385,7 +385,7 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
385 | /* "struct lguest" contains all we (the Host) know about a Guest. */ | 385 | /* "struct lguest" contains all we (the Host) know about a Guest. */ |
386 | struct lguest *lg; | 386 | struct lguest *lg; |
387 | int err; | 387 | int err; |
388 | unsigned long args[3]; | 388 | unsigned long args[4]; |
389 | 389 | ||
390 | /* | 390 | /* |
391 | * We grab the Big Lguest lock, which protects against multiple | 391 | * We grab the Big Lguest lock, which protects against multiple |
@@ -419,6 +419,7 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
419 | /* Populate the easy fields of our "struct lguest" */ | 419 | /* Populate the easy fields of our "struct lguest" */ |
420 | lg->mem_base = (void __user *)args[0]; | 420 | lg->mem_base = (void __user *)args[0]; |
421 | lg->pfn_limit = args[1]; | 421 | lg->pfn_limit = args[1]; |
422 | lg->device_limit = args[3]; | ||
422 | 423 | ||
423 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ | 424 | /* This is the first cpu (cpu 0) and it will start booting at args[2] */ |
424 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); | 425 | err = lg_cpu_start(&lg->cpus[0], 0, args[2]); |
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 69c35caa955a..e3abebc912c0 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
@@ -250,6 +250,16 @@ static void release_pte(pte_t pte) | |||
250 | } | 250 | } |
251 | /*:*/ | 251 | /*:*/ |
252 | 252 | ||
253 | static bool gpte_in_iomem(struct lg_cpu *cpu, pte_t gpte) | ||
254 | { | ||
255 | /* We don't handle large pages. */ | ||
256 | if (pte_flags(gpte) & _PAGE_PSE) | ||
257 | return false; | ||
258 | |||
259 | return (pte_pfn(gpte) >= cpu->lg->pfn_limit | ||
260 | && pte_pfn(gpte) < cpu->lg->device_limit); | ||
261 | } | ||
262 | |||
253 | static bool check_gpte(struct lg_cpu *cpu, pte_t gpte) | 263 | static bool check_gpte(struct lg_cpu *cpu, pte_t gpte) |
254 | { | 264 | { |
255 | if ((pte_flags(gpte) & _PAGE_PSE) || | 265 | if ((pte_flags(gpte) & _PAGE_PSE) || |
@@ -374,8 +384,14 @@ static pte_t *find_spte(struct lg_cpu *cpu, unsigned long vaddr, bool allocate, | |||
374 | * | 384 | * |
375 | * If we fixed up the fault (ie. we mapped the address), this routine returns | 385 | * If we fixed up the fault (ie. we mapped the address), this routine returns |
376 | * true. Otherwise, it was a real fault and we need to tell the Guest. | 386 | * true. Otherwise, it was a real fault and we need to tell the Guest. |
387 | * | ||
388 | * There's a corner case: they're trying to access memory between | ||
389 | * pfn_limit and device_limit, which is I/O memory. In this case, we | ||
390 | * return false and set @iomem to the physical address, so the the | ||
391 | * Launcher can handle the instruction manually. | ||
377 | */ | 392 | */ |
378 | bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | 393 | bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode, |
394 | unsigned long *iomem) | ||
379 | { | 395 | { |
380 | unsigned long gpte_ptr; | 396 | unsigned long gpte_ptr; |
381 | pte_t gpte; | 397 | pte_t gpte; |
@@ -383,6 +399,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
383 | pmd_t gpmd; | 399 | pmd_t gpmd; |
384 | pgd_t gpgd; | 400 | pgd_t gpgd; |
385 | 401 | ||
402 | *iomem = 0; | ||
403 | |||
386 | /* We never demand page the Switcher, so trying is a mistake. */ | 404 | /* We never demand page the Switcher, so trying is a mistake. */ |
387 | if (vaddr >= switcher_addr) | 405 | if (vaddr >= switcher_addr) |
388 | return false; | 406 | return false; |
@@ -459,6 +477,12 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
459 | if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER)) | 477 | if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER)) |
460 | return false; | 478 | return false; |
461 | 479 | ||
480 | /* If they're accessing io memory, we expect a fault. */ | ||
481 | if (gpte_in_iomem(cpu, gpte)) { | ||
482 | *iomem = (pte_pfn(gpte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); | ||
483 | return false; | ||
484 | } | ||
485 | |||
462 | /* | 486 | /* |
463 | * Check that the Guest PTE flags are OK, and the page number is below | 487 | * Check that the Guest PTE flags are OK, and the page number is below |
464 | * the pfn_limit (ie. not mapping the Launcher binary). | 488 | * the pfn_limit (ie. not mapping the Launcher binary). |
@@ -553,7 +577,9 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) | |||
553 | */ | 577 | */ |
554 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr) | 578 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr) |
555 | { | 579 | { |
556 | if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) | 580 | unsigned long iomem; |
581 | |||
582 | if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2, &iomem)) | ||
557 | kill_guest(cpu, "bad stack page %#lx", vaddr); | 583 | kill_guest(cpu, "bad stack page %#lx", vaddr); |
558 | } | 584 | } |
559 | /*:*/ | 585 | /*:*/ |
@@ -928,7 +954,8 @@ static void __guest_set_pte(struct lg_cpu *cpu, int idx, | |||
928 | * now. This shaves 10% off a copy-on-write | 954 | * now. This shaves 10% off a copy-on-write |
929 | * micro-benchmark. | 955 | * micro-benchmark. |
930 | */ | 956 | */ |
931 | if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { | 957 | if ((pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) |
958 | && !gpte_in_iomem(cpu, gpte)) { | ||
932 | if (!check_gpte(cpu, gpte)) | 959 | if (!check_gpte(cpu, gpte)) |
933 | return; | 960 | return; |
934 | set_pte(spte, | 961 | set_pte(spte, |
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 42e87bf14113..18d841e738bc 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
@@ -362,9 +362,19 @@ static void setup_emulate_insn(struct lg_cpu *cpu) | |||
362 | sizeof(cpu->pending.insn)); | 362 | sizeof(cpu->pending.insn)); |
363 | } | 363 | } |
364 | 364 | ||
365 | static void setup_iomem_insn(struct lg_cpu *cpu, unsigned long iomem_addr) | ||
366 | { | ||
367 | cpu->pending.trap = 14; | ||
368 | cpu->pending.addr = iomem_addr; | ||
369 | copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip, | ||
370 | sizeof(cpu->pending.insn)); | ||
371 | } | ||
372 | |||
365 | /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ | 373 | /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ |
366 | void lguest_arch_handle_trap(struct lg_cpu *cpu) | 374 | void lguest_arch_handle_trap(struct lg_cpu *cpu) |
367 | { | 375 | { |
376 | unsigned long iomem_addr; | ||
377 | |||
368 | switch (cpu->regs->trapnum) { | 378 | switch (cpu->regs->trapnum) { |
369 | case 13: /* We've intercepted a General Protection Fault. */ | 379 | case 13: /* We've intercepted a General Protection Fault. */ |
370 | /* Hand to Launcher to emulate those pesky IN and OUT insns */ | 380 | /* Hand to Launcher to emulate those pesky IN and OUT insns */ |
@@ -385,8 +395,15 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) | |||
385 | * whether kernel or userspace code. | 395 | * whether kernel or userspace code. |
386 | */ | 396 | */ |
387 | if (demand_page(cpu, cpu->arch.last_pagefault, | 397 | if (demand_page(cpu, cpu->arch.last_pagefault, |
388 | cpu->regs->errcode)) | 398 | cpu->regs->errcode, &iomem_addr)) |
399 | return; | ||
400 | |||
401 | /* Was this an access to memory mapped IO? */ | ||
402 | if (iomem_addr) { | ||
403 | /* Tell Launcher, let it handle it. */ | ||
404 | setup_iomem_insn(cpu, iomem_addr); | ||
389 | return; | 405 | return; |
406 | } | ||
390 | 407 | ||
391 | /* | 408 | /* |
392 | * OK, it's really not there (or not OK): the Guest needs to | 409 | * OK, it's really not there (or not OK): the Guest needs to |
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index 485fe13db12e..02f353989e6c 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c | |||
@@ -548,7 +548,8 @@ static void tell_kernel(unsigned long start) | |||
548 | { | 548 | { |
549 | unsigned long args[] = { LHREQ_INITIALIZE, | 549 | unsigned long args[] = { LHREQ_INITIALIZE, |
550 | (unsigned long)guest_base, | 550 | (unsigned long)guest_base, |
551 | guest_limit / getpagesize(), start }; | 551 | guest_limit / getpagesize(), start, |
552 | guest_limit / getpagesize() }; | ||
552 | verbose("Guest: %p - %p (%#lx)\n", | 553 | verbose("Guest: %p - %p (%#lx)\n", |
553 | guest_base, guest_base + guest_limit, guest_limit); | 554 | guest_base, guest_base + guest_limit, guest_limit); |
554 | lguest_fd = open_or_die("/dev/lguest", O_RDWR); | 555 | lguest_fd = open_or_die("/dev/lguest", O_RDWR); |