diff options
Diffstat (limited to 'drivers/lguest')
-rw-r--r-- | drivers/lguest/core.c | 18 | ||||
-rw-r--r-- | drivers/lguest/hypercalls.c | 11 | ||||
-rw-r--r-- | drivers/lguest/interrupts_and_traps.c | 7 | ||||
-rw-r--r-- | drivers/lguest/lguest_device.c | 11 | ||||
-rw-r--r-- | drivers/lguest/lguest_user.c | 30 | ||||
-rw-r--r-- | drivers/lguest/page_tables.c | 32 | ||||
-rw-r--r-- | drivers/lguest/x86/core.c | 33 | ||||
-rw-r--r-- | drivers/lguest/x86/switcher_32.S | 8 |
8 files changed, 91 insertions, 59 deletions
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index c632c08cbbdc..5eea4356d703 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
@@ -1,8 +1,6 @@ | |||
1 | /*P:400 This contains run_guest() which actually calls into the Host<->Guest | 1 | /*P:400 This contains run_guest() which actually calls into the Host<->Guest |
2 | * Switcher and analyzes the return, such as determining if the Guest wants the | 2 | * Switcher and analyzes the return, such as determining if the Guest wants the |
3 | * Host to do something. This file also contains useful helper routines, and a | 3 | * Host to do something. This file also contains useful helper routines. :*/ |
4 | * couple of non-obvious setup and teardown pieces which were implemented after | ||
5 | * days of debugging pain. :*/ | ||
6 | #include <linux/module.h> | 4 | #include <linux/module.h> |
7 | #include <linux/stringify.h> | 5 | #include <linux/stringify.h> |
8 | #include <linux/stddef.h> | 6 | #include <linux/stddef.h> |
@@ -49,8 +47,8 @@ static __init int map_switcher(void) | |||
49 | * easy. | 47 | * easy. |
50 | */ | 48 | */ |
51 | 49 | ||
52 | /* We allocate an array of "struct page"s. map_vm_area() wants the | 50 | /* We allocate an array of struct page pointers. map_vm_area() wants |
53 | * pages in this form, rather than just an array of pointers. */ | 51 | * this, rather than just an array of pages. */ |
54 | switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, | 52 | switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, |
55 | GFP_KERNEL); | 53 | GFP_KERNEL); |
56 | if (!switcher_page) { | 54 | if (!switcher_page) { |
@@ -172,7 +170,7 @@ void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) | |||
172 | } | 170 | } |
173 | } | 171 | } |
174 | 172 | ||
175 | /* This is the write (copy into guest) version. */ | 173 | /* This is the write (copy into Guest) version. */ |
176 | void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, | 174 | void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, |
177 | unsigned bytes) | 175 | unsigned bytes) |
178 | { | 176 | { |
@@ -209,9 +207,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
209 | if (cpu->break_out) | 207 | if (cpu->break_out) |
210 | return -EAGAIN; | 208 | return -EAGAIN; |
211 | 209 | ||
212 | /* Check if there are any interrupts which can be delivered | 210 | /* Check if there are any interrupts which can be delivered now: |
213 | * now: if so, this sets up the hander to be executed when we | 211 | * if so, this sets up the hander to be executed when we next |
214 | * next run the Guest. */ | 212 | * run the Guest. */ |
215 | maybe_do_interrupt(cpu); | 213 | maybe_do_interrupt(cpu); |
216 | 214 | ||
217 | /* All long-lived kernel loops need to check with this horrible | 215 | /* All long-lived kernel loops need to check with this horrible |
@@ -246,8 +244,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
246 | lguest_arch_handle_trap(cpu); | 244 | lguest_arch_handle_trap(cpu); |
247 | } | 245 | } |
248 | 246 | ||
247 | /* Special case: Guest is 'dead' but wants a reboot. */ | ||
249 | if (cpu->lg->dead == ERR_PTR(-ERESTART)) | 248 | if (cpu->lg->dead == ERR_PTR(-ERESTART)) |
250 | return -ERESTART; | 249 | return -ERESTART; |
250 | |||
251 | /* The Guest is dead => "No such file or directory" */ | 251 | /* The Guest is dead => "No such file or directory" */ |
252 | return -ENOENT; | 252 | return -ENOENT; |
253 | } | 253 | } |
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 0f2cb4fd7c69..54d66f05fefa 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include "lg.h" | 29 | #include "lg.h" |
30 | 30 | ||
31 | /*H:120 This is the core hypercall routine: where the Guest gets what it wants. | 31 | /*H:120 This is the core hypercall routine: where the Guest gets what it wants. |
32 | * Or gets killed. Or, in the case of LHCALL_CRASH, both. */ | 32 | * Or gets killed. Or, in the case of LHCALL_SHUTDOWN, both. */ |
33 | static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) | 33 | static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) |
34 | { | 34 | { |
35 | switch (args->arg0) { | 35 | switch (args->arg0) { |
@@ -190,6 +190,13 @@ static void initialize(struct lg_cpu *cpu) | |||
190 | * pagetable. */ | 190 | * pagetable. */ |
191 | guest_pagetable_clear_all(cpu); | 191 | guest_pagetable_clear_all(cpu); |
192 | } | 192 | } |
193 | /*:*/ | ||
194 | |||
195 | /*M:013 If a Guest reads from a page (so creates a mapping) that it has never | ||
196 | * written to, and then the Launcher writes to it (ie. the output of a virtual | ||
197 | * device), the Guest will still see the old page. In practice, this never | ||
198 | * happens: why would the Guest read a page which it has never written to? But | ||
199 | * a similar scenario might one day bite us, so it's worth mentioning. :*/ | ||
193 | 200 | ||
194 | /*H:100 | 201 | /*H:100 |
195 | * Hypercalls | 202 | * Hypercalls |
@@ -227,7 +234,7 @@ void do_hypercalls(struct lg_cpu *cpu) | |||
227 | * However, if we are signalled or the Guest sends I/O to the | 234 | * However, if we are signalled or the Guest sends I/O to the |
228 | * Launcher, the run_guest() loop will exit without running the | 235 | * Launcher, the run_guest() loop will exit without running the |
229 | * Guest. When it comes back it would try to re-run the | 236 | * Guest. When it comes back it would try to re-run the |
230 | * hypercall. */ | 237 | * hypercall. Finding that bug sucked. */ |
231 | cpu->hcall = NULL; | 238 | cpu->hcall = NULL; |
232 | } | 239 | } |
233 | } | 240 | } |
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 32e97c1858e5..0414ddf87587 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c | |||
@@ -144,7 +144,6 @@ void maybe_do_interrupt(struct lg_cpu *cpu) | |||
144 | if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, | 144 | if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, |
145 | sizeof(blk))) | 145 | sizeof(blk))) |
146 | return; | 146 | return; |
147 | |||
148 | bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS); | 147 | bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS); |
149 | 148 | ||
150 | /* Find the first interrupt. */ | 149 | /* Find the first interrupt. */ |
@@ -237,9 +236,9 @@ void free_interrupts(void) | |||
237 | clear_bit(syscall_vector, used_vectors); | 236 | clear_bit(syscall_vector, used_vectors); |
238 | } | 237 | } |
239 | 238 | ||
240 | /*H:220 Now we've got the routines to deliver interrupts, delivering traps | 239 | /*H:220 Now we've got the routines to deliver interrupts, delivering traps like |
241 | * like page fault is easy. The only trick is that Intel decided that some | 240 | * page fault is easy. The only trick is that Intel decided that some traps |
242 | * traps should have error codes: */ | 241 | * should have error codes: */ |
243 | static int has_err(unsigned int trap) | 242 | static int has_err(unsigned int trap) |
244 | { | 243 | { |
245 | return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); | 244 | return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); |
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 1b2ec0bf5eb1..2bc9bf7e88e5 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c | |||
@@ -1,10 +1,10 @@ | |||
1 | /*P:050 Lguest guests use a very simple method to describe devices. It's a | 1 | /*P:050 Lguest guests use a very simple method to describe devices. It's a |
2 | * series of device descriptors contained just above the top of normal | 2 | * series of device descriptors contained just above the top of normal Guest |
3 | * memory. | 3 | * memory. |
4 | * | 4 | * |
5 | * We use the standard "virtio" device infrastructure, which provides us with a | 5 | * We use the standard "virtio" device infrastructure, which provides us with a |
6 | * console, a network and a block driver. Each one expects some configuration | 6 | * console, a network and a block driver. Each one expects some configuration |
7 | * information and a "virtqueue" mechanism to send and receive data. :*/ | 7 | * information and a "virtqueue" or two to send and receive data. :*/ |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/bootmem.h> | 9 | #include <linux/bootmem.h> |
10 | #include <linux/lguest_launcher.h> | 10 | #include <linux/lguest_launcher.h> |
@@ -53,7 +53,7 @@ struct lguest_device { | |||
53 | * Device configurations | 53 | * Device configurations |
54 | * | 54 | * |
55 | * The configuration information for a device consists of one or more | 55 | * The configuration information for a device consists of one or more |
56 | * virtqueues, a feature bitmaks, and some configuration bytes. The | 56 | * virtqueues, a feature bitmap, and some configuration bytes. The |
57 | * configuration bytes don't really matter to us: the Launcher sets them up, and | 57 | * configuration bytes don't really matter to us: the Launcher sets them up, and |
58 | * the driver will look at them during setup. | 58 | * the driver will look at them during setup. |
59 | * | 59 | * |
@@ -179,7 +179,7 @@ struct lguest_vq_info | |||
179 | }; | 179 | }; |
180 | 180 | ||
181 | /* When the virtio_ring code wants to prod the Host, it calls us here and we | 181 | /* When the virtio_ring code wants to prod the Host, it calls us here and we |
182 | * make a hypercall. We hand the page number of the virtqueue so the Host | 182 | * make a hypercall. We hand the physical address of the virtqueue so the Host |
183 | * knows which virtqueue we're talking about. */ | 183 | * knows which virtqueue we're talking about. */ |
184 | static void lg_notify(struct virtqueue *vq) | 184 | static void lg_notify(struct virtqueue *vq) |
185 | { | 185 | { |
@@ -199,7 +199,8 @@ static void lg_notify(struct virtqueue *vq) | |||
199 | * allocate its own pages and tell the Host where they are, but for lguest it's | 199 | * allocate its own pages and tell the Host where they are, but for lguest it's |
200 | * simpler for the Host to simply tell us where the pages are. | 200 | * simpler for the Host to simply tell us where the pages are. |
201 | * | 201 | * |
202 | * So we provide devices with a "find virtqueue and set it up" function. */ | 202 | * So we provide drivers with a "find the Nth virtqueue and set it up" |
203 | * function. */ | ||
203 | static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | 204 | static struct virtqueue *lg_find_vq(struct virtio_device *vdev, |
204 | unsigned index, | 205 | unsigned index, |
205 | void (*callback)(struct virtqueue *vq)) | 206 | void (*callback)(struct virtqueue *vq)) |
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 2221485b0773..564e425d71dd 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -73,7 +73,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
73 | if (current != cpu->tsk) | 73 | if (current != cpu->tsk) |
74 | return -EPERM; | 74 | return -EPERM; |
75 | 75 | ||
76 | /* If the guest is already dead, we indicate why */ | 76 | /* If the Guest is already dead, we indicate why */ |
77 | if (lg->dead) { | 77 | if (lg->dead) { |
78 | size_t len; | 78 | size_t len; |
79 | 79 | ||
@@ -88,7 +88,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
88 | return len; | 88 | return len; |
89 | } | 89 | } |
90 | 90 | ||
91 | /* If we returned from read() last time because the Guest notified, | 91 | /* If we returned from read() last time because the Guest sent I/O, |
92 | * clear the flag. */ | 92 | * clear the flag. */ |
93 | if (cpu->pending_notify) | 93 | if (cpu->pending_notify) |
94 | cpu->pending_notify = 0; | 94 | cpu->pending_notify = 0; |
@@ -97,14 +97,20 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
97 | return run_guest(cpu, (unsigned long __user *)user); | 97 | return run_guest(cpu, (unsigned long __user *)user); |
98 | } | 98 | } |
99 | 99 | ||
100 | /*L:025 This actually initializes a CPU. For the moment, a Guest is only | ||
101 | * uniprocessor, so "id" is always 0. */ | ||
100 | static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | 102 | static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) |
101 | { | 103 | { |
104 | /* We have a limited number the number of CPUs in the lguest struct. */ | ||
102 | if (id >= NR_CPUS) | 105 | if (id >= NR_CPUS) |
103 | return -EINVAL; | 106 | return -EINVAL; |
104 | 107 | ||
108 | /* Set up this CPU's id, and pointer back to the lguest struct. */ | ||
105 | cpu->id = id; | 109 | cpu->id = id; |
106 | cpu->lg = container_of((cpu - id), struct lguest, cpus[0]); | 110 | cpu->lg = container_of((cpu - id), struct lguest, cpus[0]); |
107 | cpu->lg->nr_cpus++; | 111 | cpu->lg->nr_cpus++; |
112 | |||
113 | /* Each CPU has a timer it can set. */ | ||
108 | init_clockdev(cpu); | 114 | init_clockdev(cpu); |
109 | 115 | ||
110 | /* We need a complete page for the Guest registers: they are accessible | 116 | /* We need a complete page for the Guest registers: they are accessible |
@@ -120,11 +126,11 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | |||
120 | * address. */ | 126 | * address. */ |
121 | lguest_arch_setup_regs(cpu, start_ip); | 127 | lguest_arch_setup_regs(cpu, start_ip); |
122 | 128 | ||
123 | /* Initialize the queue for the waker to wait on */ | 129 | /* Initialize the queue for the Waker to wait on */ |
124 | init_waitqueue_head(&cpu->break_wq); | 130 | init_waitqueue_head(&cpu->break_wq); |
125 | 131 | ||
126 | /* We keep a pointer to the Launcher task (ie. current task) for when | 132 | /* We keep a pointer to the Launcher task (ie. current task) for when |
127 | * other Guests want to wake this one (inter-Guest I/O). */ | 133 | * other Guests want to wake this one (eg. console input). */ |
128 | cpu->tsk = current; | 134 | cpu->tsk = current; |
129 | 135 | ||
130 | /* We need to keep a pointer to the Launcher's memory map, because if | 136 | /* We need to keep a pointer to the Launcher's memory map, because if |
@@ -136,6 +142,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | |||
136 | * when the same Guest runs on the same CPU twice. */ | 142 | * when the same Guest runs on the same CPU twice. */ |
137 | cpu->last_pages = NULL; | 143 | cpu->last_pages = NULL; |
138 | 144 | ||
145 | /* No error == success. */ | ||
139 | return 0; | 146 | return 0; |
140 | } | 147 | } |
141 | 148 | ||
@@ -185,14 +192,13 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
185 | lg->mem_base = (void __user *)(long)args[0]; | 192 | lg->mem_base = (void __user *)(long)args[0]; |
186 | lg->pfn_limit = args[1]; | 193 | lg->pfn_limit = args[1]; |
187 | 194 | ||
188 | /* This is the first cpu */ | 195 | /* This is the first cpu (cpu 0) and it will start booting at args[3] */ |
189 | err = lg_cpu_start(&lg->cpus[0], 0, args[3]); | 196 | err = lg_cpu_start(&lg->cpus[0], 0, args[3]); |
190 | if (err) | 197 | if (err) |
191 | goto release_guest; | 198 | goto release_guest; |
192 | 199 | ||
193 | /* Initialize the Guest's shadow page tables, using the toplevel | 200 | /* Initialize the Guest's shadow page tables, using the toplevel |
194 | * address the Launcher gave us. This allocates memory, so can | 201 | * address the Launcher gave us. This allocates memory, so can fail. */ |
195 | * fail. */ | ||
196 | err = init_guest_pagetable(lg, args[2]); | 202 | err = init_guest_pagetable(lg, args[2]); |
197 | if (err) | 203 | if (err) |
198 | goto free_regs; | 204 | goto free_regs; |
@@ -218,11 +224,16 @@ unlock: | |||
218 | /*L:010 The first operation the Launcher does must be a write. All writes | 224 | /*L:010 The first operation the Launcher does must be a write. All writes |
219 | * start with an unsigned long number: for the first write this must be | 225 | * start with an unsigned long number: for the first write this must be |
220 | * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use | 226 | * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use |
221 | * writes of other values to send interrupts. */ | 227 | * writes of other values to send interrupts. |
228 | * | ||
229 | * Note that we overload the "offset" in the /dev/lguest file to indicate what | ||
230 | * CPU number we're dealing with. Currently this is always 0, since we only | ||
231 | * support uniprocessor Guests, but you can see the beginnings of SMP support | ||
232 | * here. */ | ||
222 | static ssize_t write(struct file *file, const char __user *in, | 233 | static ssize_t write(struct file *file, const char __user *in, |
223 | size_t size, loff_t *off) | 234 | size_t size, loff_t *off) |
224 | { | 235 | { |
225 | /* Once the guest is initialized, we hold the "struct lguest" in the | 236 | /* Once the Guest is initialized, we hold the "struct lguest" in the |
226 | * file private data. */ | 237 | * file private data. */ |
227 | struct lguest *lg = file->private_data; | 238 | struct lguest *lg = file->private_data; |
228 | const unsigned long __user *input = (const unsigned long __user *)in; | 239 | const unsigned long __user *input = (const unsigned long __user *)in; |
@@ -230,6 +241,7 @@ static ssize_t write(struct file *file, const char __user *in, | |||
230 | struct lg_cpu *uninitialized_var(cpu); | 241 | struct lg_cpu *uninitialized_var(cpu); |
231 | unsigned int cpu_id = *off; | 242 | unsigned int cpu_id = *off; |
232 | 243 | ||
244 | /* The first value tells us what this request is. */ | ||
233 | if (get_user(req, input) != 0) | 245 | if (get_user(req, input) != 0) |
234 | return -EFAULT; | 246 | return -EFAULT; |
235 | input++; | 247 | input++; |
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index a7f64a9d67e0..d93500f24fbb 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
@@ -2,8 +2,8 @@ | |||
2 | * previous encounters. It's functional, and as neat as it can be in the | 2 | * previous encounters. It's functional, and as neat as it can be in the |
3 | * circumstances, but be wary, for these things are subtle and break easily. | 3 | * circumstances, but be wary, for these things are subtle and break easily. |
4 | * The Guest provides a virtual to physical mapping, but we can neither trust | 4 | * The Guest provides a virtual to physical mapping, but we can neither trust |
5 | * it nor use it: we verify and convert it here to point the hardware to the | 5 | * it nor use it: we verify and convert it here then point the CPU to the |
6 | * actual Guest pages when running the Guest. :*/ | 6 | * converted Guest pages when running the Guest. :*/ |
7 | 7 | ||
8 | /* Copyright (C) Rusty Russell IBM Corporation 2006. | 8 | /* Copyright (C) Rusty Russell IBM Corporation 2006. |
9 | * GPL v2 and any later version */ | 9 | * GPL v2 and any later version */ |
@@ -106,6 +106,11 @@ static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr) | |||
106 | BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); | 106 | BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); |
107 | return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t); | 107 | return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t); |
108 | } | 108 | } |
109 | /*:*/ | ||
110 | |||
111 | /*M:014 get_pfn is slow; it takes the mmap sem and calls get_user_pages. We | ||
112 | * could probably try to grab batches of pages here as an optimization | ||
113 | * (ie. pre-faulting). :*/ | ||
109 | 114 | ||
110 | /*H:350 This routine takes a page number given by the Guest and converts it to | 115 | /*H:350 This routine takes a page number given by the Guest and converts it to |
111 | * an actual, physical page number. It can fail for several reasons: the | 116 | * an actual, physical page number. It can fail for several reasons: the |
@@ -113,8 +118,8 @@ static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr) | |||
113 | * and the page is read-only, or the write flag was set and the page was | 118 | * and the page is read-only, or the write flag was set and the page was |
114 | * shared so had to be copied, but we ran out of memory. | 119 | * shared so had to be copied, but we ran out of memory. |
115 | * | 120 | * |
116 | * This holds a reference to the page, so release_pte() is careful to | 121 | * This holds a reference to the page, so release_pte() is careful to put that |
117 | * put that back. */ | 122 | * back. */ |
118 | static unsigned long get_pfn(unsigned long virtpfn, int write) | 123 | static unsigned long get_pfn(unsigned long virtpfn, int write) |
119 | { | 124 | { |
120 | struct page *page; | 125 | struct page *page; |
@@ -532,13 +537,13 @@ static void do_set_pte(struct lg_cpu *cpu, int idx, | |||
532 | * all processes. So when the page table above that address changes, we update | 537 | * all processes. So when the page table above that address changes, we update |
533 | * all the page tables, not just the current one. This is rare. | 538 | * all the page tables, not just the current one. This is rare. |
534 | * | 539 | * |
535 | * The benefit is that when we have to track a new page table, we can copy keep | 540 | * The benefit is that when we have to track a new page table, we can keep all |
536 | * all the kernel mappings. This speeds up context switch immensely. */ | 541 | * the kernel mappings. This speeds up context switch immensely. */ |
537 | void guest_set_pte(struct lg_cpu *cpu, | 542 | void guest_set_pte(struct lg_cpu *cpu, |
538 | unsigned long gpgdir, unsigned long vaddr, pte_t gpte) | 543 | unsigned long gpgdir, unsigned long vaddr, pte_t gpte) |
539 | { | 544 | { |
540 | /* Kernel mappings must be changed on all top levels. Slow, but | 545 | /* Kernel mappings must be changed on all top levels. Slow, but doesn't |
541 | * doesn't happen often. */ | 546 | * happen often. */ |
542 | if (vaddr >= cpu->lg->kernel_address) { | 547 | if (vaddr >= cpu->lg->kernel_address) { |
543 | unsigned int i; | 548 | unsigned int i; |
544 | for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) | 549 | for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) |
@@ -704,12 +709,11 @@ static __init void populate_switcher_pte_page(unsigned int cpu, | |||
704 | /* We've made it through the page table code. Perhaps our tired brains are | 709 | /* We've made it through the page table code. Perhaps our tired brains are |
705 | * still processing the details, or perhaps we're simply glad it's over. | 710 | * still processing the details, or perhaps we're simply glad it's over. |
706 | * | 711 | * |
707 | * If nothing else, note that all this complexity in juggling shadow page | 712 | * If nothing else, note that all this complexity in juggling shadow page tables |
708 | * tables in sync with the Guest's page tables is for one reason: for most | 713 | * in sync with the Guest's page tables is for one reason: for most Guests this |
709 | * Guests this page table dance determines how bad performance will be. This | 714 | * page table dance determines how bad performance will be. This is why Xen |
710 | * is why Xen uses exotic direct Guest pagetable manipulation, and why both | 715 | * uses exotic direct Guest pagetable manipulation, and why both Intel and AMD |
711 | * Intel and AMD have implemented shadow page table support directly into | 716 | * have implemented shadow page table support directly into hardware. |
712 | * hardware. | ||
713 | * | 717 | * |
714 | * There is just one file remaining in the Host. */ | 718 | * There is just one file remaining in the Host. */ |
715 | 719 | ||
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 635187812d52..5126d5d9ea0e 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
@@ -17,6 +17,13 @@ | |||
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | */ | 19 | */ |
20 | /*P:450 This file contains the x86-specific lguest code. It used to be all | ||
21 | * mixed in with drivers/lguest/core.c but several foolhardy code slashers | ||
22 | * wrestled most of the dependencies out to here in preparation for porting | ||
23 | * lguest to other architectures (see what I mean by foolhardy?). | ||
24 | * | ||
25 | * This also contains a couple of non-obvious setup and teardown pieces which | ||
26 | * were implemented after days of debugging pain. :*/ | ||
20 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
21 | #include <linux/start_kernel.h> | 28 | #include <linux/start_kernel.h> |
22 | #include <linux/string.h> | 29 | #include <linux/string.h> |
@@ -157,6 +164,8 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) | |||
157 | * also simplify copy_in_guest_info(). Note that we'd still need to restore | 164 | * also simplify copy_in_guest_info(). Note that we'd still need to restore |
158 | * things when we exit to Launcher userspace, but that's fairly easy. | 165 | * things when we exit to Launcher userspace, but that's fairly easy. |
159 | * | 166 | * |
167 | * We could also try using this hooks for PGE, but that might be too expensive. | ||
168 | * | ||
160 | * The hooks were designed for KVM, but we can also put them to good use. :*/ | 169 | * The hooks were designed for KVM, but we can also put them to good use. :*/ |
161 | 170 | ||
162 | /*H:040 This is the i386-specific code to setup and run the Guest. Interrupts | 171 | /*H:040 This is the i386-specific code to setup and run the Guest. Interrupts |
@@ -182,7 +191,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
182 | * was doing. */ | 191 | * was doing. */ |
183 | run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); | 192 | run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); |
184 | 193 | ||
185 | /* Note that the "regs" pointer contains two extra entries which are | 194 | /* Note that the "regs" structure contains two extra entries which are |
186 | * not really registers: a trap number which says what interrupt or | 195 | * not really registers: a trap number which says what interrupt or |
187 | * trap made the switcher code come back, and an error code which some | 196 | * trap made the switcher code come back, and an error code which some |
188 | * traps set. */ | 197 | * traps set. */ |
@@ -293,11 +302,10 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) | |||
293 | break; | 302 | break; |
294 | case 14: /* We've intercepted a Page Fault. */ | 303 | case 14: /* We've intercepted a Page Fault. */ |
295 | /* The Guest accessed a virtual address that wasn't mapped. | 304 | /* The Guest accessed a virtual address that wasn't mapped. |
296 | * This happens a lot: we don't actually set up most of the | 305 | * This happens a lot: we don't actually set up most of the page |
297 | * page tables for the Guest at all when we start: as it runs | 306 | * tables for the Guest at all when we start: as it runs it asks |
298 | * it asks for more and more, and we set them up as | 307 | * for more and more, and we set them up as required. In this |
299 | * required. In this case, we don't even tell the Guest that | 308 | * case, we don't even tell the Guest that the fault happened. |
300 | * the fault happened. | ||
301 | * | 309 | * |
302 | * The errcode tells whether this was a read or a write, and | 310 | * The errcode tells whether this was a read or a write, and |
303 | * whether kernel or userspace code. */ | 311 | * whether kernel or userspace code. */ |
@@ -342,7 +350,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) | |||
342 | if (!deliver_trap(cpu, cpu->regs->trapnum)) | 350 | if (!deliver_trap(cpu, cpu->regs->trapnum)) |
343 | /* If the Guest doesn't have a handler (either it hasn't | 351 | /* If the Guest doesn't have a handler (either it hasn't |
344 | * registered any yet, or it's one of the faults we don't let | 352 | * registered any yet, or it's one of the faults we don't let |
345 | * it handle), it dies with a cryptic error message. */ | 353 | * it handle), it dies with this cryptic error message. */ |
346 | kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)", | 354 | kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)", |
347 | cpu->regs->trapnum, cpu->regs->eip, | 355 | cpu->regs->trapnum, cpu->regs->eip, |
348 | cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault | 356 | cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault |
@@ -375,8 +383,8 @@ void __init lguest_arch_host_init(void) | |||
375 | * The only exception is the interrupt handlers in switcher.S: their | 383 | * The only exception is the interrupt handlers in switcher.S: their |
376 | * addresses are placed in a table (default_idt_entries), so we need to | 384 | * addresses are placed in a table (default_idt_entries), so we need to |
377 | * update the table with the new addresses. switcher_offset() is a | 385 | * update the table with the new addresses. switcher_offset() is a |
378 | * convenience function which returns the distance between the builtin | 386 | * convenience function which returns the distance between the |
379 | * switcher code and the high-mapped copy we just made. */ | 387 | * compiled-in switcher code and the high-mapped copy we just made. */ |
380 | for (i = 0; i < IDT_ENTRIES; i++) | 388 | for (i = 0; i < IDT_ENTRIES; i++) |
381 | default_idt_entries[i] += switcher_offset(); | 389 | default_idt_entries[i] += switcher_offset(); |
382 | 390 | ||
@@ -416,7 +424,7 @@ void __init lguest_arch_host_init(void) | |||
416 | state->guest_gdt_desc.address = (long)&state->guest_gdt; | 424 | state->guest_gdt_desc.address = (long)&state->guest_gdt; |
417 | 425 | ||
418 | /* We know where we want the stack to be when the Guest enters | 426 | /* We know where we want the stack to be when the Guest enters |
419 | * the switcher: in pages->regs. The stack grows upwards, so | 427 | * the Switcher: in pages->regs. The stack grows upwards, so |
420 | * we start it at the end of that structure. */ | 428 | * we start it at the end of that structure. */ |
421 | state->guest_tss.sp0 = (long)(&pages->regs + 1); | 429 | state->guest_tss.sp0 = (long)(&pages->regs + 1); |
422 | /* And this is the GDT entry to use for the stack: we keep a | 430 | /* And this is the GDT entry to use for the stack: we keep a |
@@ -513,8 +521,8 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) | |||
513 | { | 521 | { |
514 | u32 tsc_speed; | 522 | u32 tsc_speed; |
515 | 523 | ||
516 | /* The pointer to the Guest's "struct lguest_data" is the only | 524 | /* The pointer to the Guest's "struct lguest_data" is the only argument. |
517 | * argument. We check that address now. */ | 525 | * We check that address now. */ |
518 | if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, | 526 | if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, |
519 | sizeof(*cpu->lg->lguest_data))) | 527 | sizeof(*cpu->lg->lguest_data))) |
520 | return -EFAULT; | 528 | return -EFAULT; |
@@ -546,6 +554,7 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) | |||
546 | 554 | ||
547 | return 0; | 555 | return 0; |
548 | } | 556 | } |
557 | /*:*/ | ||
549 | 558 | ||
550 | /*L:030 lguest_arch_setup_regs() | 559 | /*L:030 lguest_arch_setup_regs() |
551 | * | 560 | * |
diff --git a/drivers/lguest/x86/switcher_32.S b/drivers/lguest/x86/switcher_32.S index 0af8baaa0d4a..3fc15318a80f 100644 --- a/drivers/lguest/x86/switcher_32.S +++ b/drivers/lguest/x86/switcher_32.S | |||
@@ -1,6 +1,6 @@ | |||
1 | /*P:900 This is the Switcher: code which sits at 0xFFC00000 to do the low-level | 1 | /*P:900 This is the Switcher: code which sits at 0xFFC00000 astride both the |
2 | * Guest<->Host switch. It is as simple as it can be made, but it's naturally | 2 | * Host and Guest to do the low-level Guest<->Host switch. It is as simple as |
3 | * very specific to x86. | 3 | * it can be made, but it's naturally very specific to x86. |
4 | * | 4 | * |
5 | * You have now completed Preparation. If this has whet your appetite; if you | 5 | * You have now completed Preparation. If this has whet your appetite; if you |
6 | * are feeling invigorated and refreshed then the next, more challenging stage | 6 | * are feeling invigorated and refreshed then the next, more challenging stage |
@@ -189,7 +189,7 @@ ENTRY(switch_to_guest) | |||
189 | // Interrupts are turned back on: we are Guest. | 189 | // Interrupts are turned back on: we are Guest. |
190 | iret | 190 | iret |
191 | 191 | ||
192 | // We treat two paths to switch back to the Host | 192 | // We tread two paths to switch back to the Host |
193 | // Yet both must save Guest state and restore Host | 193 | // Yet both must save Guest state and restore Host |
194 | // So we put the routine in a macro. | 194 | // So we put the routine in a macro. |
195 | #define SWITCH_TO_HOST \ | 195 | #define SWITCH_TO_HOST \ |