aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lguest
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2007-10-21 21:03:36 -0400
committerRusty Russell <rusty@rustcorp.com.au>2007-10-23 01:49:54 -0400
commit47436aa4ad054c1c7c8231618e86ebd9305308dc (patch)
treea9ba6e0521f9116442144a86e781a3164ec86094 /drivers/lguest
parentc18acd73ffc209def08003a1927473096f66c5ad (diff)
Boot with virtual == physical to get closer to native Linux.
1) This allows us to get alot closer to booting bzImages. 2) It means we don't have to know page_offset. 3) The Guest needs to modify the boot pagetables to create the PAGE_OFFSET mapping before jumping to C code. 4) guest_pa() walks the page tables rather than using page_offset. 5) We don't use page_offset to figure out whether to emulate: it was always kinda quesationable, and won't work for instructions done before remapping (bzImage unpacking in particular). 6) We still want the kernel address for tlb flushing: have the initial hypercall give us that, too. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers/lguest')
-rw-r--r--drivers/lguest/hypercalls.c8
-rw-r--r--drivers/lguest/interrupts_and_traps.c13
-rw-r--r--drivers/lguest/lg.h8
-rw-r--r--drivers/lguest/lguest_user.c11
-rw-r--r--drivers/lguest/page_tables.c47
-rw-r--r--drivers/lguest/x86/core.c7
6 files changed, 62 insertions, 32 deletions
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 02d0ae268267..13b5f2f813de 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -181,15 +181,15 @@ static void initialize(struct lguest *lg)
181 /* The Guest tells us where we're not to deliver interrupts by putting 181 /* The Guest tells us where we're not to deliver interrupts by putting
182 * the range of addresses into "struct lguest_data". */ 182 * the range of addresses into "struct lguest_data". */
183 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) 183 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
184 || get_user(lg->noirq_end, &lg->lguest_data->noirq_end) 184 || get_user(lg->noirq_end, &lg->lguest_data->noirq_end))
185 /* We tell the Guest that it can't use the top 4MB of virtual
186 * addresses used by the Switcher. */
187 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem))
188 kill_guest(lg, "bad guest page %p", lg->lguest_data); 185 kill_guest(lg, "bad guest page %p", lg->lguest_data);
189 186
190 /* We write the current time into the Guest's data page once now. */ 187 /* We write the current time into the Guest's data page once now. */
191 write_timestamp(lg); 188 write_timestamp(lg);
192 189
190 /* page_tables.c will also do some setup. */
191 page_table_guest_data_init(lg);
192
193 /* This is the one case where the above accesses might have been the 193 /* This is the one case where the above accesses might have been the
194 * first write to a Guest page. This may have caused a copy-on-write 194 * first write to a Guest page. This may have caused a copy-on-write
195 * fault, but the Guest might be referring to the old (read-only) 195 * fault, but the Guest might be referring to the old (read-only)
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index a57d757eab6e..3271c0031a1b 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -62,8 +62,9 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
62 * it). */ 62 * it). */
63static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) 63static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
64{ 64{
65 unsigned long gstack; 65 unsigned long gstack, origstack;
66 u32 eflags, ss, irq_enable; 66 u32 eflags, ss, irq_enable;
67 unsigned long virtstack;
67 68
68 /* There are two cases for interrupts: one where the Guest is already 69 /* There are two cases for interrupts: one where the Guest is already
69 * in the kernel, and a more complex one where the Guest is in 70 * in the kernel, and a more complex one where the Guest is in
@@ -71,8 +72,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
71 if ((lg->regs->ss&0x3) != GUEST_PL) { 72 if ((lg->regs->ss&0x3) != GUEST_PL) {
72 /* The Guest told us their kernel stack with the SET_STACK 73 /* The Guest told us their kernel stack with the SET_STACK
73 * hypercall: both the virtual address and the segment */ 74 * hypercall: both the virtual address and the segment */
74 gstack = guest_pa(lg, lg->esp1); 75 virtstack = lg->esp1;
75 ss = lg->ss1; 76 ss = lg->ss1;
77
78 origstack = gstack = guest_pa(lg, virtstack);
76 /* We push the old stack segment and pointer onto the new 79 /* We push the old stack segment and pointer onto the new
77 * stack: when the Guest does an "iret" back from the interrupt 80 * stack: when the Guest does an "iret" back from the interrupt
78 * handler the CPU will notice they're dropping privilege 81 * handler the CPU will notice they're dropping privilege
@@ -81,8 +84,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
81 push_guest_stack(lg, &gstack, lg->regs->esp); 84 push_guest_stack(lg, &gstack, lg->regs->esp);
82 } else { 85 } else {
83 /* We're staying on the same Guest (kernel) stack. */ 86 /* We're staying on the same Guest (kernel) stack. */
84 gstack = guest_pa(lg, lg->regs->esp); 87 virtstack = lg->regs->esp;
85 ss = lg->regs->ss; 88 ss = lg->regs->ss;
89
90 origstack = gstack = guest_pa(lg, virtstack);
86 } 91 }
87 92
88 /* Remember that we never let the Guest actually disable interrupts, so 93 /* Remember that we never let the Guest actually disable interrupts, so
@@ -108,7 +113,7 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
108 /* Now we've pushed all the old state, we change the stack, the code 113 /* Now we've pushed all the old state, we change the stack, the code
109 * segment and the address to execute. */ 114 * segment and the address to execute. */
110 lg->regs->ss = ss; 115 lg->regs->ss = ss;
111 lg->regs->esp = gstack + lg->page_offset; 116 lg->regs->esp = virtstack + (gstack - origstack);
112 lg->regs->cs = (__KERNEL_CS|GUEST_PL); 117 lg->regs->cs = (__KERNEL_CS|GUEST_PL);
113 lg->regs->eip = idt_address(lo, hi); 118 lg->regs->eip = idt_address(lo, hi);
114 119
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 7408cebe995e..e4845d7f0688 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -63,7 +63,7 @@ struct lguest
63 /* This provides the offset to the base of guest-physical 63 /* This provides the offset to the base of guest-physical
64 * memory in the Launcher. */ 64 * memory in the Launcher. */
65 void __user *mem_base; 65 void __user *mem_base;
66 u32 page_offset; 66 unsigned long kernel_address;
67 u32 cr2; 67 u32 cr2;
68 int halted; 68 int halted;
69 int ts; 69 int ts;
@@ -165,6 +165,8 @@ void guest_set_pte(struct lguest *lg, unsigned long gpgdir,
165void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); 165void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages);
166int demand_page(struct lguest *info, unsigned long cr2, int errcode); 166int demand_page(struct lguest *info, unsigned long cr2, int errcode);
167void pin_page(struct lguest *lg, unsigned long vaddr); 167void pin_page(struct lguest *lg, unsigned long vaddr);
168unsigned long guest_pa(struct lguest *lg, unsigned long vaddr);
169void page_table_guest_data_init(struct lguest *lg);
168 170
169/* <arch>/core.c: */ 171/* <arch>/core.c: */
170void lguest_arch_host_init(void); 172void lguest_arch_host_init(void);
@@ -229,9 +231,5 @@ do { \
229} while(0) 231} while(0)
230/* (End of aside) :*/ 232/* (End of aside) :*/
231 233
232static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr)
233{
234 return vaddr - lg->page_offset;
235}
236#endif /* __ASSEMBLY__ */ 234#endif /* __ASSEMBLY__ */
237#endif /* _LGUEST_H */ 235#endif /* _LGUEST_H */
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index b184652e45d7..61b177e1e649 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -111,7 +111,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
111 return run_guest(lg, (unsigned long __user *)user); 111 return run_guest(lg, (unsigned long __user *)user);
112} 112}
113 113
114/*L:020 The initialization write supplies 5 pointer sized (32 or 64 bit) 114/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit)
115 * values (in addition to the LHREQ_INITIALIZE value). These are: 115 * values (in addition to the LHREQ_INITIALIZE value). These are:
116 * 116 *
117 * base: The start of the Guest-physical memory inside the Launcher memory. 117 * base: The start of the Guest-physical memory inside the Launcher memory.
@@ -124,12 +124,6 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
124 * pagetables (which are set up by the Launcher). 124 * pagetables (which are set up by the Launcher).
125 * 125 *
126 * start: The first instruction to execute ("eip" in x86-speak). 126 * start: The first instruction to execute ("eip" in x86-speak).
127 *
128 * page_offset: The PAGE_OFFSET constant in the Guest kernel. We should
129 * probably wean the code off this, but it's a very useful constant! Any
130 * address above this is within the Guest kernel, and any kernel address can
131 * quickly converted from physical to virtual by adding PAGE_OFFSET. It's
132 * 0xC0000000 (3G) by default, but it's configurable at kernel build time.
133 */ 127 */
134static int initialize(struct file *file, const unsigned long __user *input) 128static int initialize(struct file *file, const unsigned long __user *input)
135{ 129{
@@ -137,7 +131,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
137 * Guest. */ 131 * Guest. */
138 struct lguest *lg; 132 struct lguest *lg;
139 int err; 133 int err;
140 unsigned long args[5]; 134 unsigned long args[4];
141 135
142 /* We grab the Big Lguest lock, which protects against multiple 136 /* We grab the Big Lguest lock, which protects against multiple
143 * simultaneous initializations. */ 137 * simultaneous initializations. */
@@ -162,7 +156,6 @@ static int initialize(struct file *file, const unsigned long __user *input)
162 /* Populate the easy fields of our "struct lguest" */ 156 /* Populate the easy fields of our "struct lguest" */
163 lg->mem_base = (void __user *)(long)args[0]; 157 lg->mem_base = (void __user *)(long)args[0];
164 lg->pfn_limit = args[1]; 158 lg->pfn_limit = args[1];
165 lg->page_offset = args[4];
166 159
167 /* We need a complete page for the Guest registers: they are accessible 160 /* We need a complete page for the Guest registers: they are accessible
168 * to the Guest and we can only grant it access to whole pages. */ 161 * to the Guest and we can only grant it access to whole pages. */
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index bfe3650b28d6..fe3c7575647b 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -13,6 +13,7 @@
13#include <linux/random.h> 13#include <linux/random.h>
14#include <linux/percpu.h> 14#include <linux/percpu.h>
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#include <asm/uaccess.h>
16#include "lg.h" 17#include "lg.h"
17 18
18/*M:008 We hold reference to pages, which prevents them from being swapped. 19/*M:008 We hold reference to pages, which prevents them from being swapped.
@@ -345,7 +346,7 @@ static void flush_user_mappings(struct lguest *lg, int idx)
345{ 346{
346 unsigned int i; 347 unsigned int i;
347 /* Release every pgd entry up to the kernel's address. */ 348 /* Release every pgd entry up to the kernel's address. */
348 for (i = 0; i < pgd_index(lg->page_offset); i++) 349 for (i = 0; i < pgd_index(lg->kernel_address); i++)
349 release_pgd(lg, lg->pgdirs[idx].pgdir + i); 350 release_pgd(lg, lg->pgdirs[idx].pgdir + i);
350} 351}
351 352
@@ -358,6 +359,25 @@ void guest_pagetable_flush_user(struct lguest *lg)
358} 359}
359/*:*/ 360/*:*/
360 361
362/* We walk down the guest page tables to get a guest-physical address */
363unsigned long guest_pa(struct lguest *lg, unsigned long vaddr)
364{
365 pgd_t gpgd;
366 pte_t gpte;
367
368 /* First step: get the top-level Guest page table entry. */
369 gpgd = __pgd(lgread_u32(lg, gpgd_addr(lg, vaddr)));
370 /* Toplevel not present? We can't map it in. */
371 if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
372 kill_guest(lg, "Bad address %#lx", vaddr);
373
374 gpte = __pte(lgread_u32(lg, gpte_addr(lg, gpgd, vaddr)));
375 if (!(pte_flags(gpte) & _PAGE_PRESENT))
376 kill_guest(lg, "Bad address %#lx", vaddr);
377
378 return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK);
379}
380
361/* We keep several page tables. This is a simple routine to find the page 381/* We keep several page tables. This is a simple routine to find the page
362 * table (if any) corresponding to this top-level address the Guest has given 382 * table (if any) corresponding to this top-level address the Guest has given
363 * us. */ 383 * us. */
@@ -500,7 +520,7 @@ void guest_set_pte(struct lguest *lg,
500{ 520{
501 /* Kernel mappings must be changed on all top levels. Slow, but 521 /* Kernel mappings must be changed on all top levels. Slow, but
502 * doesn't happen often. */ 522 * doesn't happen often. */
503 if (vaddr >= lg->page_offset) { 523 if (vaddr >= lg->kernel_address) {
504 unsigned int i; 524 unsigned int i;
505 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 525 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
506 if (lg->pgdirs[i].pgdir) 526 if (lg->pgdirs[i].pgdir)
@@ -550,11 +570,6 @@ void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
550 * its first page table is. We set some things up here: */ 570 * its first page table is. We set some things up here: */
551int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) 571int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
552{ 572{
553 /* In flush_user_mappings() we loop from 0 to
554 * "pgd_index(lg->page_offset)". This assumes it won't hit
555 * the Switcher mappings, so check that now. */
556 if (pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX)
557 return -EINVAL;
558 /* We start on the first shadow page table, and give it a blank PGD 573 /* We start on the first shadow page table, and give it a blank PGD
559 * page. */ 574 * page. */
560 lg->pgdidx = 0; 575 lg->pgdidx = 0;
@@ -565,6 +580,24 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
565 return 0; 580 return 0;
566} 581}
567 582
583/* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */
584void page_table_guest_data_init(struct lguest *lg)
585{
586 /* We get the kernel address: above this is all kernel memory. */
587 if (get_user(lg->kernel_address, &lg->lguest_data->kernel_address)
588 /* We tell the Guest that it can't use the top 4MB of virtual
589 * addresses used by the Switcher. */
590 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
591 || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir))
592 kill_guest(lg, "bad guest page %p", lg->lguest_data);
593
594 /* In flush_user_mappings() we loop from 0 to
595 * "pgd_index(lg->kernel_address)". This assumes it won't hit the
596 * Switcher mappings, so check that now. */
597 if (pgd_index(lg->kernel_address) >= SWITCHER_PGD_INDEX)
598 kill_guest(lg, "bad kernel address %#lx", lg->kernel_address);
599}
600
568/* When a Guest dies, our cleanup is fairly simple. */ 601/* When a Guest dies, our cleanup is fairly simple. */
569void free_guest_pagetable(struct lguest *lg) 602void free_guest_pagetable(struct lguest *lg)
570{ 603{
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index a125109446dc..39f64c95de18 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -216,9 +216,10 @@ static int emulate_insn(struct lguest *lg)
216 * guest_pa just subtracts the Guest's page_offset. */ 216 * guest_pa just subtracts the Guest's page_offset. */
217 unsigned long physaddr = guest_pa(lg, lg->regs->eip); 217 unsigned long physaddr = guest_pa(lg, lg->regs->eip);
218 218
219 /* The guest_pa() function only works for Guest kernel addresses, but 219 /* This must be the Guest kernel trying to do something, not userspace!
220 * that's all we're trying to do anyway. */ 220 * The bottom two bits of the CS segment register are the privilege
221 if (lg->regs->eip < lg->page_offset) 221 * level. */
222 if ((lg->regs->cs & 3) != GUEST_PL)
222 return 0; 223 return 0;
223 224
224 /* Decoding x86 instructions is icky. */ 225 /* Decoding x86 instructions is icky. */