aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2008-03-28 12:05:53 -0400
committerRusty Russell <rusty@rustcorp.com.au>2008-03-27 20:05:54 -0400
commita6bd8e13034dd7d60b6f14217096efa192d0adc1 (patch)
tree23890908b06eb8357e6ce633d35df1216f5e4213 /drivers
parente18b094f0faa4889b06a112da17230a10b88c815 (diff)
lguest: comment documentation update.
Took some cycles to re-read the Lguest Journey end-to-end, fix some rot and tighten some phrases. Only comments change. No new jokes, but a couple of recycled old jokes. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/lguest/core.c18
-rw-r--r--drivers/lguest/hypercalls.c11
-rw-r--r--drivers/lguest/interrupts_and_traps.c7
-rw-r--r--drivers/lguest/lguest_device.c11
-rw-r--r--drivers/lguest/lguest_user.c30
-rw-r--r--drivers/lguest/page_tables.c32
-rw-r--r--drivers/lguest/x86/core.c33
-rw-r--r--drivers/lguest/x86/switcher_32.S8
8 files changed, 91 insertions, 59 deletions
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index c632c08cbbdc..5eea4356d703 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -1,8 +1,6 @@
1/*P:400 This contains run_guest() which actually calls into the Host<->Guest 1/*P:400 This contains run_guest() which actually calls into the Host<->Guest
2 * Switcher and analyzes the return, such as determining if the Guest wants the 2 * Switcher and analyzes the return, such as determining if the Guest wants the
3 * Host to do something. This file also contains useful helper routines, and a 3 * Host to do something. This file also contains useful helper routines. :*/
4 * couple of non-obvious setup and teardown pieces which were implemented after
5 * days of debugging pain. :*/
6#include <linux/module.h> 4#include <linux/module.h>
7#include <linux/stringify.h> 5#include <linux/stringify.h>
8#include <linux/stddef.h> 6#include <linux/stddef.h>
@@ -49,8 +47,8 @@ static __init int map_switcher(void)
49 * easy. 47 * easy.
50 */ 48 */
51 49
52 /* We allocate an array of "struct page"s. map_vm_area() wants the 50 /* We allocate an array of struct page pointers. map_vm_area() wants
53 * pages in this form, rather than just an array of pointers. */ 51 * this, rather than just an array of pages. */
54 switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, 52 switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES,
55 GFP_KERNEL); 53 GFP_KERNEL);
56 if (!switcher_page) { 54 if (!switcher_page) {
@@ -172,7 +170,7 @@ void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)
172 } 170 }
173} 171}
174 172
175/* This is the write (copy into guest) version. */ 173/* This is the write (copy into Guest) version. */
176void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, 174void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
177 unsigned bytes) 175 unsigned bytes)
178{ 176{
@@ -209,9 +207,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
209 if (cpu->break_out) 207 if (cpu->break_out)
210 return -EAGAIN; 208 return -EAGAIN;
211 209
212 /* Check if there are any interrupts which can be delivered 210 /* Check if there are any interrupts which can be delivered now:
213 * now: if so, this sets up the hander to be executed when we 211 * if so, this sets up the hander to be executed when we next
214 * next run the Guest. */ 212 * run the Guest. */
215 maybe_do_interrupt(cpu); 213 maybe_do_interrupt(cpu);
216 214
217 /* All long-lived kernel loops need to check with this horrible 215 /* All long-lived kernel loops need to check with this horrible
@@ -246,8 +244,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
246 lguest_arch_handle_trap(cpu); 244 lguest_arch_handle_trap(cpu);
247 } 245 }
248 246
247 /* Special case: Guest is 'dead' but wants a reboot. */
249 if (cpu->lg->dead == ERR_PTR(-ERESTART)) 248 if (cpu->lg->dead == ERR_PTR(-ERESTART))
250 return -ERESTART; 249 return -ERESTART;
250
251 /* The Guest is dead => "No such file or directory" */ 251 /* The Guest is dead => "No such file or directory" */
252 return -ENOENT; 252 return -ENOENT;
253} 253}
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 0f2cb4fd7c69..54d66f05fefa 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -29,7 +29,7 @@
29#include "lg.h" 29#include "lg.h"
30 30
31/*H:120 This is the core hypercall routine: where the Guest gets what it wants. 31/*H:120 This is the core hypercall routine: where the Guest gets what it wants.
32 * Or gets killed. Or, in the case of LHCALL_CRASH, both. */ 32 * Or gets killed. Or, in the case of LHCALL_SHUTDOWN, both. */
33static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) 33static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
34{ 34{
35 switch (args->arg0) { 35 switch (args->arg0) {
@@ -190,6 +190,13 @@ static void initialize(struct lg_cpu *cpu)
190 * pagetable. */ 190 * pagetable. */
191 guest_pagetable_clear_all(cpu); 191 guest_pagetable_clear_all(cpu);
192} 192}
193/*:*/
194
195/*M:013 If a Guest reads from a page (so creates a mapping) that it has never
196 * written to, and then the Launcher writes to it (ie. the output of a virtual
197 * device), the Guest will still see the old page. In practice, this never
198 * happens: why would the Guest read a page which it has never written to? But
199 * a similar scenario might one day bite us, so it's worth mentioning. :*/
193 200
194/*H:100 201/*H:100
195 * Hypercalls 202 * Hypercalls
@@ -227,7 +234,7 @@ void do_hypercalls(struct lg_cpu *cpu)
227 * However, if we are signalled or the Guest sends I/O to the 234 * However, if we are signalled or the Guest sends I/O to the
228 * Launcher, the run_guest() loop will exit without running the 235 * Launcher, the run_guest() loop will exit without running the
229 * Guest. When it comes back it would try to re-run the 236 * Guest. When it comes back it would try to re-run the
230 * hypercall. */ 237 * hypercall. Finding that bug sucked. */
231 cpu->hcall = NULL; 238 cpu->hcall = NULL;
232 } 239 }
233} 240}
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 32e97c1858e5..0414ddf87587 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -144,7 +144,6 @@ void maybe_do_interrupt(struct lg_cpu *cpu)
144 if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, 144 if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
145 sizeof(blk))) 145 sizeof(blk)))
146 return; 146 return;
147
148 bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS); 147 bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
149 148
150 /* Find the first interrupt. */ 149 /* Find the first interrupt. */
@@ -237,9 +236,9 @@ void free_interrupts(void)
237 clear_bit(syscall_vector, used_vectors); 236 clear_bit(syscall_vector, used_vectors);
238} 237}
239 238
240/*H:220 Now we've got the routines to deliver interrupts, delivering traps 239/*H:220 Now we've got the routines to deliver interrupts, delivering traps like
241 * like page fault is easy. The only trick is that Intel decided that some 240 * page fault is easy. The only trick is that Intel decided that some traps
242 * traps should have error codes: */ 241 * should have error codes: */
243static int has_err(unsigned int trap) 242static int has_err(unsigned int trap)
244{ 243{
245 return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); 244 return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17);
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 1b2ec0bf5eb1..2bc9bf7e88e5 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -1,10 +1,10 @@
1/*P:050 Lguest guests use a very simple method to describe devices. It's a 1/*P:050 Lguest guests use a very simple method to describe devices. It's a
2 * series of device descriptors contained just above the top of normal 2 * series of device descriptors contained just above the top of normal Guest
3 * memory. 3 * memory.
4 * 4 *
5 * We use the standard "virtio" device infrastructure, which provides us with a 5 * We use the standard "virtio" device infrastructure, which provides us with a
6 * console, a network and a block driver. Each one expects some configuration 6 * console, a network and a block driver. Each one expects some configuration
7 * information and a "virtqueue" mechanism to send and receive data. :*/ 7 * information and a "virtqueue" or two to send and receive data. :*/
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/bootmem.h> 9#include <linux/bootmem.h>
10#include <linux/lguest_launcher.h> 10#include <linux/lguest_launcher.h>
@@ -53,7 +53,7 @@ struct lguest_device {
53 * Device configurations 53 * Device configurations
54 * 54 *
55 * The configuration information for a device consists of one or more 55 * The configuration information for a device consists of one or more
56 * virtqueues, a feature bitmaks, and some configuration bytes. The 56 * virtqueues, a feature bitmap, and some configuration bytes. The
57 * configuration bytes don't really matter to us: the Launcher sets them up, and 57 * configuration bytes don't really matter to us: the Launcher sets them up, and
58 * the driver will look at them during setup. 58 * the driver will look at them during setup.
59 * 59 *
@@ -179,7 +179,7 @@ struct lguest_vq_info
179}; 179};
180 180
181/* When the virtio_ring code wants to prod the Host, it calls us here and we 181/* When the virtio_ring code wants to prod the Host, it calls us here and we
182 * make a hypercall. We hand the page number of the virtqueue so the Host 182 * make a hypercall. We hand the physical address of the virtqueue so the Host
183 * knows which virtqueue we're talking about. */ 183 * knows which virtqueue we're talking about. */
184static void lg_notify(struct virtqueue *vq) 184static void lg_notify(struct virtqueue *vq)
185{ 185{
@@ -199,7 +199,8 @@ static void lg_notify(struct virtqueue *vq)
199 * allocate its own pages and tell the Host where they are, but for lguest it's 199 * allocate its own pages and tell the Host where they are, but for lguest it's
200 * simpler for the Host to simply tell us where the pages are. 200 * simpler for the Host to simply tell us where the pages are.
201 * 201 *
202 * So we provide devices with a "find virtqueue and set it up" function. */ 202 * So we provide drivers with a "find the Nth virtqueue and set it up"
203 * function. */
203static struct virtqueue *lg_find_vq(struct virtio_device *vdev, 204static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
204 unsigned index, 205 unsigned index,
205 void (*callback)(struct virtqueue *vq)) 206 void (*callback)(struct virtqueue *vq))
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 2221485b0773..564e425d71dd 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -73,7 +73,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
73 if (current != cpu->tsk) 73 if (current != cpu->tsk)
74 return -EPERM; 74 return -EPERM;
75 75
76 /* If the guest is already dead, we indicate why */ 76 /* If the Guest is already dead, we indicate why */
77 if (lg->dead) { 77 if (lg->dead) {
78 size_t len; 78 size_t len;
79 79
@@ -88,7 +88,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
88 return len; 88 return len;
89 } 89 }
90 90
91 /* If we returned from read() last time because the Guest notified, 91 /* If we returned from read() last time because the Guest sent I/O,
92 * clear the flag. */ 92 * clear the flag. */
93 if (cpu->pending_notify) 93 if (cpu->pending_notify)
94 cpu->pending_notify = 0; 94 cpu->pending_notify = 0;
@@ -97,14 +97,20 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
97 return run_guest(cpu, (unsigned long __user *)user); 97 return run_guest(cpu, (unsigned long __user *)user);
98} 98}
99 99
100/*L:025 This actually initializes a CPU. For the moment, a Guest is only
101 * uniprocessor, so "id" is always 0. */
100static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) 102static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
101{ 103{
104 /* We have a limited number the number of CPUs in the lguest struct. */
102 if (id >= NR_CPUS) 105 if (id >= NR_CPUS)
103 return -EINVAL; 106 return -EINVAL;
104 107
108 /* Set up this CPU's id, and pointer back to the lguest struct. */
105 cpu->id = id; 109 cpu->id = id;
106 cpu->lg = container_of((cpu - id), struct lguest, cpus[0]); 110 cpu->lg = container_of((cpu - id), struct lguest, cpus[0]);
107 cpu->lg->nr_cpus++; 111 cpu->lg->nr_cpus++;
112
113 /* Each CPU has a timer it can set. */
108 init_clockdev(cpu); 114 init_clockdev(cpu);
109 115
110 /* We need a complete page for the Guest registers: they are accessible 116 /* We need a complete page for the Guest registers: they are accessible
@@ -120,11 +126,11 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
120 * address. */ 126 * address. */
121 lguest_arch_setup_regs(cpu, start_ip); 127 lguest_arch_setup_regs(cpu, start_ip);
122 128
123 /* Initialize the queue for the waker to wait on */ 129 /* Initialize the queue for the Waker to wait on */
124 init_waitqueue_head(&cpu->break_wq); 130 init_waitqueue_head(&cpu->break_wq);
125 131
126 /* We keep a pointer to the Launcher task (ie. current task) for when 132 /* We keep a pointer to the Launcher task (ie. current task) for when
127 * other Guests want to wake this one (inter-Guest I/O). */ 133 * other Guests want to wake this one (eg. console input). */
128 cpu->tsk = current; 134 cpu->tsk = current;
129 135
130 /* We need to keep a pointer to the Launcher's memory map, because if 136 /* We need to keep a pointer to the Launcher's memory map, because if
@@ -136,6 +142,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
136 * when the same Guest runs on the same CPU twice. */ 142 * when the same Guest runs on the same CPU twice. */
137 cpu->last_pages = NULL; 143 cpu->last_pages = NULL;
138 144
145 /* No error == success. */
139 return 0; 146 return 0;
140} 147}
141 148
@@ -185,14 +192,13 @@ static int initialize(struct file *file, const unsigned long __user *input)
185 lg->mem_base = (void __user *)(long)args[0]; 192 lg->mem_base = (void __user *)(long)args[0];
186 lg->pfn_limit = args[1]; 193 lg->pfn_limit = args[1];
187 194
188 /* This is the first cpu */ 195 /* This is the first cpu (cpu 0) and it will start booting at args[3] */
189 err = lg_cpu_start(&lg->cpus[0], 0, args[3]); 196 err = lg_cpu_start(&lg->cpus[0], 0, args[3]);
190 if (err) 197 if (err)
191 goto release_guest; 198 goto release_guest;
192 199
193 /* Initialize the Guest's shadow page tables, using the toplevel 200 /* Initialize the Guest's shadow page tables, using the toplevel
194 * address the Launcher gave us. This allocates memory, so can 201 * address the Launcher gave us. This allocates memory, so can fail. */
195 * fail. */
196 err = init_guest_pagetable(lg, args[2]); 202 err = init_guest_pagetable(lg, args[2]);
197 if (err) 203 if (err)
198 goto free_regs; 204 goto free_regs;
@@ -218,11 +224,16 @@ unlock:
218/*L:010 The first operation the Launcher does must be a write. All writes 224/*L:010 The first operation the Launcher does must be a write. All writes
219 * start with an unsigned long number: for the first write this must be 225 * start with an unsigned long number: for the first write this must be
220 * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use 226 * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use
221 * writes of other values to send interrupts. */ 227 * writes of other values to send interrupts.
228 *
229 * Note that we overload the "offset" in the /dev/lguest file to indicate what
230 * CPU number we're dealing with. Currently this is always 0, since we only
231 * support uniprocessor Guests, but you can see the beginnings of SMP support
232 * here. */
222static ssize_t write(struct file *file, const char __user *in, 233static ssize_t write(struct file *file, const char __user *in,
223 size_t size, loff_t *off) 234 size_t size, loff_t *off)
224{ 235{
225 /* Once the guest is initialized, we hold the "struct lguest" in the 236 /* Once the Guest is initialized, we hold the "struct lguest" in the
226 * file private data. */ 237 * file private data. */
227 struct lguest *lg = file->private_data; 238 struct lguest *lg = file->private_data;
228 const unsigned long __user *input = (const unsigned long __user *)in; 239 const unsigned long __user *input = (const unsigned long __user *)in;
@@ -230,6 +241,7 @@ static ssize_t write(struct file *file, const char __user *in,
230 struct lg_cpu *uninitialized_var(cpu); 241 struct lg_cpu *uninitialized_var(cpu);
231 unsigned int cpu_id = *off; 242 unsigned int cpu_id = *off;
232 243
244 /* The first value tells us what this request is. */
233 if (get_user(req, input) != 0) 245 if (get_user(req, input) != 0)
234 return -EFAULT; 246 return -EFAULT;
235 input++; 247 input++;
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a7f64a9d67e0..d93500f24fbb 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -2,8 +2,8 @@
2 * previous encounters. It's functional, and as neat as it can be in the 2 * previous encounters. It's functional, and as neat as it can be in the
3 * circumstances, but be wary, for these things are subtle and break easily. 3 * circumstances, but be wary, for these things are subtle and break easily.
4 * The Guest provides a virtual to physical mapping, but we can neither trust 4 * The Guest provides a virtual to physical mapping, but we can neither trust
5 * it nor use it: we verify and convert it here to point the hardware to the 5 * it nor use it: we verify and convert it here then point the CPU to the
6 * actual Guest pages when running the Guest. :*/ 6 * converted Guest pages when running the Guest. :*/
7 7
8/* Copyright (C) Rusty Russell IBM Corporation 2006. 8/* Copyright (C) Rusty Russell IBM Corporation 2006.
9 * GPL v2 and any later version */ 9 * GPL v2 and any later version */
@@ -106,6 +106,11 @@ static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
106 BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); 106 BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
107 return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t); 107 return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t);
108} 108}
109/*:*/
110
111/*M:014 get_pfn is slow; it takes the mmap sem and calls get_user_pages. We
112 * could probably try to grab batches of pages here as an optimization
113 * (ie. pre-faulting). :*/
109 114
110/*H:350 This routine takes a page number given by the Guest and converts it to 115/*H:350 This routine takes a page number given by the Guest and converts it to
111 * an actual, physical page number. It can fail for several reasons: the 116 * an actual, physical page number. It can fail for several reasons: the
@@ -113,8 +118,8 @@ static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
113 * and the page is read-only, or the write flag was set and the page was 118 * and the page is read-only, or the write flag was set and the page was
114 * shared so had to be copied, but we ran out of memory. 119 * shared so had to be copied, but we ran out of memory.
115 * 120 *
116 * This holds a reference to the page, so release_pte() is careful to 121 * This holds a reference to the page, so release_pte() is careful to put that
117 * put that back. */ 122 * back. */
118static unsigned long get_pfn(unsigned long virtpfn, int write) 123static unsigned long get_pfn(unsigned long virtpfn, int write)
119{ 124{
120 struct page *page; 125 struct page *page;
@@ -532,13 +537,13 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
532 * all processes. So when the page table above that address changes, we update 537 * all processes. So when the page table above that address changes, we update
533 * all the page tables, not just the current one. This is rare. 538 * all the page tables, not just the current one. This is rare.
534 * 539 *
535 * The benefit is that when we have to track a new page table, we can copy keep 540 * The benefit is that when we have to track a new page table, we can keep all
536 * all the kernel mappings. This speeds up context switch immensely. */ 541 * the kernel mappings. This speeds up context switch immensely. */
537void guest_set_pte(struct lg_cpu *cpu, 542void guest_set_pte(struct lg_cpu *cpu,
538 unsigned long gpgdir, unsigned long vaddr, pte_t gpte) 543 unsigned long gpgdir, unsigned long vaddr, pte_t gpte)
539{ 544{
540 /* Kernel mappings must be changed on all top levels. Slow, but 545 /* Kernel mappings must be changed on all top levels. Slow, but doesn't
541 * doesn't happen often. */ 546 * happen often. */
542 if (vaddr >= cpu->lg->kernel_address) { 547 if (vaddr >= cpu->lg->kernel_address) {
543 unsigned int i; 548 unsigned int i;
544 for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) 549 for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++)
@@ -704,12 +709,11 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
704/* We've made it through the page table code. Perhaps our tired brains are 709/* We've made it through the page table code. Perhaps our tired brains are
705 * still processing the details, or perhaps we're simply glad it's over. 710 * still processing the details, or perhaps we're simply glad it's over.
706 * 711 *
707 * If nothing else, note that all this complexity in juggling shadow page 712 * If nothing else, note that all this complexity in juggling shadow page tables
708 * tables in sync with the Guest's page tables is for one reason: for most 713 * in sync with the Guest's page tables is for one reason: for most Guests this
709 * Guests this page table dance determines how bad performance will be. This 714 * page table dance determines how bad performance will be. This is why Xen
710 * is why Xen uses exotic direct Guest pagetable manipulation, and why both 715 * uses exotic direct Guest pagetable manipulation, and why both Intel and AMD
711 * Intel and AMD have implemented shadow page table support directly into 716 * have implemented shadow page table support directly into hardware.
712 * hardware.
713 * 717 *
714 * There is just one file remaining in the Host. */ 718 * There is just one file remaining in the Host. */
715 719
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 635187812d52..5126d5d9ea0e 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -17,6 +17,13 @@
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */ 19 */
20/*P:450 This file contains the x86-specific lguest code. It used to be all
21 * mixed in with drivers/lguest/core.c but several foolhardy code slashers
22 * wrestled most of the dependencies out to here in preparation for porting
23 * lguest to other architectures (see what I mean by foolhardy?).
24 *
25 * This also contains a couple of non-obvious setup and teardown pieces which
26 * were implemented after days of debugging pain. :*/
20#include <linux/kernel.h> 27#include <linux/kernel.h>
21#include <linux/start_kernel.h> 28#include <linux/start_kernel.h>
22#include <linux/string.h> 29#include <linux/string.h>
@@ -157,6 +164,8 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
157 * also simplify copy_in_guest_info(). Note that we'd still need to restore 164 * also simplify copy_in_guest_info(). Note that we'd still need to restore
158 * things when we exit to Launcher userspace, but that's fairly easy. 165 * things when we exit to Launcher userspace, but that's fairly easy.
159 * 166 *
167 * We could also try using this hooks for PGE, but that might be too expensive.
168 *
160 * The hooks were designed for KVM, but we can also put them to good use. :*/ 169 * The hooks were designed for KVM, but we can also put them to good use. :*/
161 170
162/*H:040 This is the i386-specific code to setup and run the Guest. Interrupts 171/*H:040 This is the i386-specific code to setup and run the Guest. Interrupts
@@ -182,7 +191,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
182 * was doing. */ 191 * was doing. */
183 run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); 192 run_guest_once(cpu, lguest_pages(raw_smp_processor_id()));
184 193
185 /* Note that the "regs" pointer contains two extra entries which are 194 /* Note that the "regs" structure contains two extra entries which are
186 * not really registers: a trap number which says what interrupt or 195 * not really registers: a trap number which says what interrupt or
187 * trap made the switcher code come back, and an error code which some 196 * trap made the switcher code come back, and an error code which some
188 * traps set. */ 197 * traps set. */
@@ -293,11 +302,10 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
293 break; 302 break;
294 case 14: /* We've intercepted a Page Fault. */ 303 case 14: /* We've intercepted a Page Fault. */
295 /* The Guest accessed a virtual address that wasn't mapped. 304 /* The Guest accessed a virtual address that wasn't mapped.
296 * This happens a lot: we don't actually set up most of the 305 * This happens a lot: we don't actually set up most of the page
297 * page tables for the Guest at all when we start: as it runs 306 * tables for the Guest at all when we start: as it runs it asks
298 * it asks for more and more, and we set them up as 307 * for more and more, and we set them up as required. In this
299 * required. In this case, we don't even tell the Guest that 308 * case, we don't even tell the Guest that the fault happened.
300 * the fault happened.
301 * 309 *
302 * The errcode tells whether this was a read or a write, and 310 * The errcode tells whether this was a read or a write, and
303 * whether kernel or userspace code. */ 311 * whether kernel or userspace code. */
@@ -342,7 +350,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
342 if (!deliver_trap(cpu, cpu->regs->trapnum)) 350 if (!deliver_trap(cpu, cpu->regs->trapnum))
343 /* If the Guest doesn't have a handler (either it hasn't 351 /* If the Guest doesn't have a handler (either it hasn't
344 * registered any yet, or it's one of the faults we don't let 352 * registered any yet, or it's one of the faults we don't let
345 * it handle), it dies with a cryptic error message. */ 353 * it handle), it dies with this cryptic error message. */
346 kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)", 354 kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)",
347 cpu->regs->trapnum, cpu->regs->eip, 355 cpu->regs->trapnum, cpu->regs->eip,
348 cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault 356 cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault
@@ -375,8 +383,8 @@ void __init lguest_arch_host_init(void)
375 * The only exception is the interrupt handlers in switcher.S: their 383 * The only exception is the interrupt handlers in switcher.S: their
376 * addresses are placed in a table (default_idt_entries), so we need to 384 * addresses are placed in a table (default_idt_entries), so we need to
377 * update the table with the new addresses. switcher_offset() is a 385 * update the table with the new addresses. switcher_offset() is a
378 * convenience function which returns the distance between the builtin 386 * convenience function which returns the distance between the
379 * switcher code and the high-mapped copy we just made. */ 387 * compiled-in switcher code and the high-mapped copy we just made. */
380 for (i = 0; i < IDT_ENTRIES; i++) 388 for (i = 0; i < IDT_ENTRIES; i++)
381 default_idt_entries[i] += switcher_offset(); 389 default_idt_entries[i] += switcher_offset();
382 390
@@ -416,7 +424,7 @@ void __init lguest_arch_host_init(void)
416 state->guest_gdt_desc.address = (long)&state->guest_gdt; 424 state->guest_gdt_desc.address = (long)&state->guest_gdt;
417 425
418 /* We know where we want the stack to be when the Guest enters 426 /* We know where we want the stack to be when the Guest enters
419 * the switcher: in pages->regs. The stack grows upwards, so 427 * the Switcher: in pages->regs. The stack grows upwards, so
420 * we start it at the end of that structure. */ 428 * we start it at the end of that structure. */
421 state->guest_tss.sp0 = (long)(&pages->regs + 1); 429 state->guest_tss.sp0 = (long)(&pages->regs + 1);
422 /* And this is the GDT entry to use for the stack: we keep a 430 /* And this is the GDT entry to use for the stack: we keep a
@@ -513,8 +521,8 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu)
513{ 521{
514 u32 tsc_speed; 522 u32 tsc_speed;
515 523
516 /* The pointer to the Guest's "struct lguest_data" is the only 524 /* The pointer to the Guest's "struct lguest_data" is the only argument.
517 * argument. We check that address now. */ 525 * We check that address now. */
518 if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, 526 if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1,
519 sizeof(*cpu->lg->lguest_data))) 527 sizeof(*cpu->lg->lguest_data)))
520 return -EFAULT; 528 return -EFAULT;
@@ -546,6 +554,7 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu)
546 554
547 return 0; 555 return 0;
548} 556}
557/*:*/
549 558
550/*L:030 lguest_arch_setup_regs() 559/*L:030 lguest_arch_setup_regs()
551 * 560 *
diff --git a/drivers/lguest/x86/switcher_32.S b/drivers/lguest/x86/switcher_32.S
index 0af8baaa0d4a..3fc15318a80f 100644
--- a/drivers/lguest/x86/switcher_32.S
+++ b/drivers/lguest/x86/switcher_32.S
@@ -1,6 +1,6 @@
1/*P:900 This is the Switcher: code which sits at 0xFFC00000 to do the low-level 1/*P:900 This is the Switcher: code which sits at 0xFFC00000 astride both the
2 * Guest<->Host switch. It is as simple as it can be made, but it's naturally 2 * Host and Guest to do the low-level Guest<->Host switch. It is as simple as
3 * very specific to x86. 3 * it can be made, but it's naturally very specific to x86.
4 * 4 *
5 * You have now completed Preparation. If this has whet your appetite; if you 5 * You have now completed Preparation. If this has whet your appetite; if you
6 * are feeling invigorated and refreshed then the next, more challenging stage 6 * are feeling invigorated and refreshed then the next, more challenging stage
@@ -189,7 +189,7 @@ ENTRY(switch_to_guest)
189 // Interrupts are turned back on: we are Guest. 189 // Interrupts are turned back on: we are Guest.
190 iret 190 iret
191 191
192// We treat two paths to switch back to the Host 192// We tread two paths to switch back to the Host
193// Yet both must save Guest state and restore Host 193// Yet both must save Guest state and restore Host
194// So we put the routine in a macro. 194// So we put the routine in a macro.
195#define SWITCH_TO_HOST \ 195#define SWITCH_TO_HOST \