aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lguest/x86/core.c
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2007-10-25 01:02:50 -0400
committerRusty Russell <rusty@rustcorp.com.au>2007-10-25 01:02:50 -0400
commite1e72965ec2c02db99b415cd06c17ea90767e3a4 (patch)
tree94e43aac35bdc33220e64f285b72b3b2b787fd57 /drivers/lguest/x86/core.c
parent568a17ffce2eeceae0cd9fc37e97cbad12f70278 (diff)
lguest: documentation update
Went through the documentation doing typo and content fixes. This patch contains only comment and whitespace changes. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'drivers/lguest/x86/core.c')
-rw-r--r--drivers/lguest/x86/core.c120
1 files changed, 62 insertions, 58 deletions
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 09d9207420d..482aec2a963 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -63,7 +63,7 @@ static struct lguest_pages *lguest_pages(unsigned int cpu)
63static DEFINE_PER_CPU(struct lguest *, last_guest); 63static DEFINE_PER_CPU(struct lguest *, last_guest);
64 64
65/*S:010 65/*S:010
66 * We are getting close to the Switcher. 66 * We approach the Switcher.
67 * 67 *
68 * Remember that each CPU has two pages which are visible to the Guest when it 68 * Remember that each CPU has two pages which are visible to the Guest when it
69 * runs on that CPU. This has to contain the state for that Guest: we copy the 69 * runs on that CPU. This has to contain the state for that Guest: we copy the
@@ -134,7 +134,7 @@ static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
134 * 134 *
135 * The lcall also pushes the old code segment (KERNEL_CS) onto the 135 * The lcall also pushes the old code segment (KERNEL_CS) onto the
136 * stack, then the address of this call. This stack layout happens to 136 * stack, then the address of this call. This stack layout happens to
137 * exactly match the stack of an interrupt... */ 137 * exactly match the stack layout created by an interrupt... */
138 asm volatile("pushf; lcall *lguest_entry" 138 asm volatile("pushf; lcall *lguest_entry"
139 /* This is how we tell GCC that %eax ("a") and %ebx ("b") 139 /* This is how we tell GCC that %eax ("a") and %ebx ("b")
140 * are changed by this routine. The "=" means output. */ 140 * are changed by this routine. The "=" means output. */
@@ -151,40 +151,46 @@ static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
151} 151}
152/*:*/ 152/*:*/
153 153
154/*M:002 There are hooks in the scheduler which we can register to tell when we
155 * get kicked off the CPU (preempt_notifier_register()). This would allow us
156 * to lazily disable SYSENTER which would regain some performance, and should
157 * also simplify copy_in_guest_info(). Note that we'd still need to restore
158 * things when we exit to Launcher userspace, but that's fairly easy.
159 *
160 * The hooks were designed for KVM, but we can also put them to good use. :*/
161
154/*H:040 This is the i386-specific code to setup and run the Guest. Interrupts 162/*H:040 This is the i386-specific code to setup and run the Guest. Interrupts
155 * are disabled: we own the CPU. */ 163 * are disabled: we own the CPU. */
156void lguest_arch_run_guest(struct lguest *lg) 164void lguest_arch_run_guest(struct lguest *lg)
157{ 165{
158 /* Remember the awfully-named TS bit? If the Guest has asked 166 /* Remember the awfully-named TS bit? If the Guest has asked to set it
159 * to set it we set it now, so we can trap and pass that trap 167 * we set it now, so we can trap and pass that trap to the Guest if it
160 * to the Guest if it uses the FPU. */ 168 * uses the FPU. */
161 if (lg->ts) 169 if (lg->ts)
162 lguest_set_ts(); 170 lguest_set_ts();
163 171
164 /* SYSENTER is an optimized way of doing system calls. We 172 /* SYSENTER is an optimized way of doing system calls. We can't allow
165 * can't allow it because it always jumps to privilege level 0. 173 * it because it always jumps to privilege level 0. A normal Guest
166 * A normal Guest won't try it because we don't advertise it in 174 * won't try it because we don't advertise it in CPUID, but a malicious
167 * CPUID, but a malicious Guest (or malicious Guest userspace 175 * Guest (or malicious Guest userspace program) could, so we tell the
168 * program) could, so we tell the CPU to disable it before 176 * CPU to disable it before running the Guest. */
169 * running the Guest. */
170 if (boot_cpu_has(X86_FEATURE_SEP)) 177 if (boot_cpu_has(X86_FEATURE_SEP))
171 wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); 178 wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
172 179
173 /* Now we actually run the Guest. It will pop back out when 180 /* Now we actually run the Guest. It will return when something
174 * something interesting happens, and we can examine its 181 * interesting happens, and we can examine its registers to see what it
175 * registers to see what it was doing. */ 182 * was doing. */
176 run_guest_once(lg, lguest_pages(raw_smp_processor_id())); 183 run_guest_once(lg, lguest_pages(raw_smp_processor_id()));
177 184
178 /* The "regs" pointer contains two extra entries which are not 185 /* Note that the "regs" pointer contains two extra entries which are
179 * really registers: a trap number which says what interrupt or 186 * not really registers: a trap number which says what interrupt or
180 * trap made the switcher code come back, and an error code 187 * trap made the switcher code come back, and an error code which some
181 * which some traps set. */ 188 * traps set. */
182 189
183 /* If the Guest page faulted, then the cr2 register will tell 190 /* If the Guest page faulted, then the cr2 register will tell us the
184 * us the bad virtual address. We have to grab this now, 191 * bad virtual address. We have to grab this now, because once we
185 * because once we re-enable interrupts an interrupt could 192 * re-enable interrupts an interrupt could fault and thus overwrite
186 * fault and thus overwrite cr2, or we could even move off to a 193 * cr2, or we could even move off to a different CPU. */
187 * different CPU. */
188 if (lg->regs->trapnum == 14) 194 if (lg->regs->trapnum == 14)
189 lg->arch.last_pagefault = read_cr2(); 195 lg->arch.last_pagefault = read_cr2();
190 /* Similarly, if we took a trap because the Guest used the FPU, 196 /* Similarly, if we took a trap because the Guest used the FPU,
@@ -197,14 +203,15 @@ void lguest_arch_run_guest(struct lguest *lg)
197 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 203 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
198} 204}
199 205
200/*H:130 Our Guest is usually so well behaved; it never tries to do things it 206/*H:130 Now we've examined the hypercall code; our Guest can make requests.
201 * isn't allowed to. Unfortunately, Linux's paravirtual infrastructure isn't 207 * Our Guest is usually so well behaved; it never tries to do things it isn't
202 * quite complete, because it doesn't contain replacements for the Intel I/O 208 * allowed to, and uses hypercalls instead. Unfortunately, Linux's paravirtual
203 * instructions. As a result, the Guest sometimes fumbles across one during 209 * infrastructure isn't quite complete, because it doesn't contain replacements
204 * the boot process as it probes for various things which are usually attached 210 * for the Intel I/O instructions. As a result, the Guest sometimes fumbles
205 * to a PC. 211 * across one during the boot process as it probes for various things which are
212 * usually attached to a PC.
206 * 213 *
207 * When the Guest uses one of these instructions, we get trap #13 (General 214 * When the Guest uses one of these instructions, we get a trap (General
208 * Protection Fault) and come here. We see if it's one of those troublesome 215 * Protection Fault) and come here. We see if it's one of those troublesome
209 * instructions and skip over it. We return true if we did. */ 216 * instructions and skip over it. We return true if we did. */
210static int emulate_insn(struct lguest *lg) 217static int emulate_insn(struct lguest *lg)
@@ -275,43 +282,43 @@ static int emulate_insn(struct lguest *lg)
275void lguest_arch_handle_trap(struct lguest *lg) 282void lguest_arch_handle_trap(struct lguest *lg)
276{ 283{
277 switch (lg->regs->trapnum) { 284 switch (lg->regs->trapnum) {
278 case 13: /* We've intercepted a GPF. */ 285 case 13: /* We've intercepted a General Protection Fault. */
279 /* Check if this was one of those annoying IN or OUT 286 /* Check if this was one of those annoying IN or OUT
280 * instructions which we need to emulate. If so, we 287 * instructions which we need to emulate. If so, we just go
281 * just go back into the Guest after we've done it. */ 288 * back into the Guest after we've done it. */
282 if (lg->regs->errcode == 0) { 289 if (lg->regs->errcode == 0) {
283 if (emulate_insn(lg)) 290 if (emulate_insn(lg))
284 return; 291 return;
285 } 292 }
286 break; 293 break;
287 case 14: /* We've intercepted a page fault. */ 294 case 14: /* We've intercepted a Page Fault. */
288 /* The Guest accessed a virtual address that wasn't 295 /* The Guest accessed a virtual address that wasn't mapped.
289 * mapped. This happens a lot: we don't actually set 296 * This happens a lot: we don't actually set up most of the
290 * up most of the page tables for the Guest at all when 297 * page tables for the Guest at all when we start: as it runs
291 * we start: as it runs it asks for more and more, and 298 * it asks for more and more, and we set them up as
292 * we set them up as required. In this case, we don't 299 * required. In this case, we don't even tell the Guest that
293 * even tell the Guest that the fault happened. 300 * the fault happened.
294 * 301 *
295 * The errcode tells whether this was a read or a 302 * The errcode tells whether this was a read or a write, and
296 * write, and whether kernel or userspace code. */ 303 * whether kernel or userspace code. */
297 if (demand_page(lg, lg->arch.last_pagefault, lg->regs->errcode)) 304 if (demand_page(lg, lg->arch.last_pagefault, lg->regs->errcode))
298 return; 305 return;
299 306
300 /* OK, it's really not there (or not OK): the Guest 307 /* OK, it's really not there (or not OK): the Guest needs to
301 * needs to know. We write out the cr2 value so it 308 * know. We write out the cr2 value so it knows where the
302 * knows where the fault occurred. 309 * fault occurred.
303 * 310 *
304 * Note that if the Guest were really messed up, this 311 * Note that if the Guest were really messed up, this could
305 * could happen before it's done the INITIALIZE 312 * happen before it's done the LHCALL_LGUEST_INIT hypercall, so
306 * hypercall, so lg->lguest_data will be NULL */ 313 * lg->lguest_data could be NULL */
307 if (lg->lguest_data && 314 if (lg->lguest_data &&
308 put_user(lg->arch.last_pagefault, &lg->lguest_data->cr2)) 315 put_user(lg->arch.last_pagefault, &lg->lguest_data->cr2))
309 kill_guest(lg, "Writing cr2"); 316 kill_guest(lg, "Writing cr2");
310 break; 317 break;
311 case 7: /* We've intercepted a Device Not Available fault. */ 318 case 7: /* We've intercepted a Device Not Available fault. */
312 /* If the Guest doesn't want to know, we already 319 /* If the Guest doesn't want to know, we already restored the
313 * restored the Floating Point Unit, so we just 320 * Floating Point Unit, so we just continue without telling
314 * continue without telling it. */ 321 * it. */
315 if (!lg->ts) 322 if (!lg->ts)
316 return; 323 return;
317 break; 324 break;
@@ -536,9 +543,6 @@ int lguest_arch_init_hypercalls(struct lguest *lg)
536 543
537 return 0; 544 return 0;
538} 545}
539/* Now we've examined the hypercall code; our Guest can make requests. There
540 * is one other way we can do things for the Guest, as we see in
541 * emulate_insn(). :*/
542 546
543/*L:030 lguest_arch_setup_regs() 547/*L:030 lguest_arch_setup_regs()
544 * 548 *
@@ -570,8 +574,8 @@ void lguest_arch_setup_regs(struct lguest *lg, unsigned long start)
570 574
571 /* %esi points to our boot information, at physical address 0, so don't 575 /* %esi points to our boot information, at physical address 0, so don't
572 * touch it. */ 576 * touch it. */
577
573 /* There are a couple of GDT entries the Guest expects when first 578 /* There are a couple of GDT entries the Guest expects when first
574 * booting. */ 579 * booting. */
575
576 setup_guest_gdt(lg); 580 setup_guest_gdt(lg);
577} 581}