aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2015-02-10 23:45:10 -0500
committerRusty Russell <rusty@rustcorp.com.au>2015-02-11 01:17:33 -0500
commit7313d5217e6b9817897172d6a6ff477bdc415ed6 (patch)
tree448fc79d9048af296aeb6c6fbbf5da84dc6f7046
parentd1c29465b8a52d8fc5a59aac92c6b206b69fe631 (diff)
lguest: add iomem region, where guest page faults get sent to userspace.
This lets us implement PCI. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r--drivers/lguest/lg.h7
-rw-r--r--drivers/lguest/lguest_user.c3
-rw-r--r--drivers/lguest/page_tables.c33
-rw-r--r--drivers/lguest/x86/core.c19
-rw-r--r--tools/lguest/lguest.c3
5 files changed, 58 insertions, 7 deletions
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 9da4f351e077..eb81abc05995 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -97,8 +97,12 @@ struct lguest {
97 struct lg_cpu cpus[NR_CPUS]; 97 struct lg_cpu cpus[NR_CPUS];
98 unsigned int nr_cpus; 98 unsigned int nr_cpus;
99 99
100 /* Valid guest memory pages must be < this. */
100 u32 pfn_limit; 101 u32 pfn_limit;
101 102
103 /* Device memory is >= pfn_limit and < device_limit. */
104 u32 device_limit;
105
102 /* 106 /*
103 * This provides the offset to the base of guest-physical memory in the 107 * This provides the offset to the base of guest-physical memory in the
104 * Launcher. 108 * Launcher.
@@ -200,7 +204,8 @@ void guest_pagetable_flush_user(struct lg_cpu *cpu);
200void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir, 204void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
201 unsigned long vaddr, pte_t val); 205 unsigned long vaddr, pte_t val);
202void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages); 206void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages);
203bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode); 207bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode,
208 unsigned long *iomem);
204void pin_page(struct lg_cpu *cpu, unsigned long vaddr); 209void pin_page(struct lg_cpu *cpu, unsigned long vaddr);
205bool __guest_pa(struct lg_cpu *cpu, unsigned long vaddr, unsigned long *paddr); 210bool __guest_pa(struct lg_cpu *cpu, unsigned long vaddr, unsigned long *paddr);
206unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr); 211unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr);
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index be996d173615..c8b0e8575b44 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -385,7 +385,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
385 /* "struct lguest" contains all we (the Host) know about a Guest. */ 385 /* "struct lguest" contains all we (the Host) know about a Guest. */
386 struct lguest *lg; 386 struct lguest *lg;
387 int err; 387 int err;
388 unsigned long args[3]; 388 unsigned long args[4];
389 389
390 /* 390 /*
391 * We grab the Big Lguest lock, which protects against multiple 391 * We grab the Big Lguest lock, which protects against multiple
@@ -419,6 +419,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
419 /* Populate the easy fields of our "struct lguest" */ 419 /* Populate the easy fields of our "struct lguest" */
420 lg->mem_base = (void __user *)args[0]; 420 lg->mem_base = (void __user *)args[0];
421 lg->pfn_limit = args[1]; 421 lg->pfn_limit = args[1];
422 lg->device_limit = args[3];
422 423
423 /* This is the first cpu (cpu 0) and it will start booting at args[2] */ 424 /* This is the first cpu (cpu 0) and it will start booting at args[2] */
424 err = lg_cpu_start(&lg->cpus[0], 0, args[2]); 425 err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 69c35caa955a..e3abebc912c0 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -250,6 +250,16 @@ static void release_pte(pte_t pte)
250} 250}
251/*:*/ 251/*:*/
252 252
253static bool gpte_in_iomem(struct lg_cpu *cpu, pte_t gpte)
254{
255 /* We don't handle large pages. */
256 if (pte_flags(gpte) & _PAGE_PSE)
257 return false;
258
259 return (pte_pfn(gpte) >= cpu->lg->pfn_limit
260 && pte_pfn(gpte) < cpu->lg->device_limit);
261}
262
253static bool check_gpte(struct lg_cpu *cpu, pte_t gpte) 263static bool check_gpte(struct lg_cpu *cpu, pte_t gpte)
254{ 264{
255 if ((pte_flags(gpte) & _PAGE_PSE) || 265 if ((pte_flags(gpte) & _PAGE_PSE) ||
@@ -374,8 +384,14 @@ static pte_t *find_spte(struct lg_cpu *cpu, unsigned long vaddr, bool allocate,
374 * 384 *
375 * If we fixed up the fault (ie. we mapped the address), this routine returns 385 * If we fixed up the fault (ie. we mapped the address), this routine returns
376 * true. Otherwise, it was a real fault and we need to tell the Guest. 386 * true. Otherwise, it was a real fault and we need to tell the Guest.
387 *
388 * There's a corner case: they're trying to access memory between
389 * pfn_limit and device_limit, which is I/O memory. In this case, we
390 * return false and set @iomem to the physical address, so the the
391 * Launcher can handle the instruction manually.
377 */ 392 */
378bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) 393bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode,
394 unsigned long *iomem)
379{ 395{
380 unsigned long gpte_ptr; 396 unsigned long gpte_ptr;
381 pte_t gpte; 397 pte_t gpte;
@@ -383,6 +399,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
383 pmd_t gpmd; 399 pmd_t gpmd;
384 pgd_t gpgd; 400 pgd_t gpgd;
385 401
402 *iomem = 0;
403
386 /* We never demand page the Switcher, so trying is a mistake. */ 404 /* We never demand page the Switcher, so trying is a mistake. */
387 if (vaddr >= switcher_addr) 405 if (vaddr >= switcher_addr)
388 return false; 406 return false;
@@ -459,6 +477,12 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
459 if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER)) 477 if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
460 return false; 478 return false;
461 479
480 /* If they're accessing io memory, we expect a fault. */
481 if (gpte_in_iomem(cpu, gpte)) {
482 *iomem = (pte_pfn(gpte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
483 return false;
484 }
485
462 /* 486 /*
463 * Check that the Guest PTE flags are OK, and the page number is below 487 * Check that the Guest PTE flags are OK, and the page number is below
464 * the pfn_limit (ie. not mapping the Launcher binary). 488 * the pfn_limit (ie. not mapping the Launcher binary).
@@ -553,7 +577,9 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
553 */ 577 */
554void pin_page(struct lg_cpu *cpu, unsigned long vaddr) 578void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
555{ 579{
556 if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) 580 unsigned long iomem;
581
582 if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2, &iomem))
557 kill_guest(cpu, "bad stack page %#lx", vaddr); 583 kill_guest(cpu, "bad stack page %#lx", vaddr);
558} 584}
559/*:*/ 585/*:*/
@@ -928,7 +954,8 @@ static void __guest_set_pte(struct lg_cpu *cpu, int idx,
928 * now. This shaves 10% off a copy-on-write 954 * now. This shaves 10% off a copy-on-write
929 * micro-benchmark. 955 * micro-benchmark.
930 */ 956 */
931 if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { 957 if ((pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED))
958 && !gpte_in_iomem(cpu, gpte)) {
932 if (!check_gpte(cpu, gpte)) 959 if (!check_gpte(cpu, gpte))
933 return; 960 return;
934 set_pte(spte, 961 set_pte(spte,
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 42e87bf14113..18d841e738bc 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -362,9 +362,19 @@ static void setup_emulate_insn(struct lg_cpu *cpu)
362 sizeof(cpu->pending.insn)); 362 sizeof(cpu->pending.insn));
363} 363}
364 364
365static void setup_iomem_insn(struct lg_cpu *cpu, unsigned long iomem_addr)
366{
367 cpu->pending.trap = 14;
368 cpu->pending.addr = iomem_addr;
369 copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip,
370 sizeof(cpu->pending.insn));
371}
372
365/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ 373/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
366void lguest_arch_handle_trap(struct lg_cpu *cpu) 374void lguest_arch_handle_trap(struct lg_cpu *cpu)
367{ 375{
376 unsigned long iomem_addr;
377
368 switch (cpu->regs->trapnum) { 378 switch (cpu->regs->trapnum) {
369 case 13: /* We've intercepted a General Protection Fault. */ 379 case 13: /* We've intercepted a General Protection Fault. */
370 /* Hand to Launcher to emulate those pesky IN and OUT insns */ 380 /* Hand to Launcher to emulate those pesky IN and OUT insns */
@@ -385,8 +395,15 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
385 * whether kernel or userspace code. 395 * whether kernel or userspace code.
386 */ 396 */
387 if (demand_page(cpu, cpu->arch.last_pagefault, 397 if (demand_page(cpu, cpu->arch.last_pagefault,
388 cpu->regs->errcode)) 398 cpu->regs->errcode, &iomem_addr))
399 return;
400
401 /* Was this an access to memory mapped IO? */
402 if (iomem_addr) {
403 /* Tell Launcher, let it handle it. */
404 setup_iomem_insn(cpu, iomem_addr);
389 return; 405 return;
406 }
390 407
391 /* 408 /*
392 * OK, it's really not there (or not OK): the Guest needs to 409 * OK, it's really not there (or not OK): the Guest needs to
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index 485fe13db12e..02f353989e6c 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -548,7 +548,8 @@ static void tell_kernel(unsigned long start)
548{ 548{
549 unsigned long args[] = { LHREQ_INITIALIZE, 549 unsigned long args[] = { LHREQ_INITIALIZE,
550 (unsigned long)guest_base, 550 (unsigned long)guest_base,
551 guest_limit / getpagesize(), start }; 551 guest_limit / getpagesize(), start,
552 guest_limit / getpagesize() };
552 verbose("Guest: %p - %p (%#lx)\n", 553 verbose("Guest: %p - %p (%#lx)\n",
553 guest_base, guest_base + guest_limit, guest_limit); 554 guest_base, guest_base + guest_limit, guest_limit);
554 lguest_fd = open_or_die("/dev/lguest", O_RDWR); 555 lguest_fd = open_or_die("/dev/lguest", O_RDWR);