aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/lguest/boot.c54
-rw-r--r--arch/x86/lguest/i386_head.S8
2 files changed, 33 insertions, 29 deletions
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d2235db4085f..a55b0902f9d3 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -56,6 +56,7 @@
56#include <linux/lguest.h> 56#include <linux/lguest.h>
57#include <linux/lguest_launcher.h> 57#include <linux/lguest_launcher.h>
58#include <linux/virtio_console.h> 58#include <linux/virtio_console.h>
59#include <linux/pm.h>
59#include <asm/paravirt.h> 60#include <asm/paravirt.h>
60#include <asm/param.h> 61#include <asm/param.h>
61#include <asm/page.h> 62#include <asm/page.h>
@@ -98,7 +99,7 @@ static cycle_t clock_base;
98 * When lazy_mode is set, it means we're allowed to defer all hypercalls and do 99 * When lazy_mode is set, it means we're allowed to defer all hypercalls and do
99 * them as a batch when lazy_mode is eventually turned off. Because hypercalls 100 * them as a batch when lazy_mode is eventually turned off. Because hypercalls
100 * are reasonably expensive, batching them up makes sense. For example, a 101 * are reasonably expensive, batching them up makes sense. For example, a
101 * large mmap might update dozens of page table entries: that code calls 102 * large munmap might update dozens of page table entries: that code calls
102 * paravirt_enter_lazy_mmu(), does the dozen updates, then calls 103 * paravirt_enter_lazy_mmu(), does the dozen updates, then calls
103 * lguest_leave_lazy_mode(). 104 * lguest_leave_lazy_mode().
104 * 105 *
@@ -163,8 +164,8 @@ void async_hcall(unsigned long call,
163/*:*/ 164/*:*/
164 165
165/*G:033 166/*G:033
166 * Here are our first native-instruction replacements: four functions for 167 * After that diversion we return to our first native-instruction
167 * interrupt control. 168 * replacements: four functions for interrupt control.
168 * 169 *
169 * The simplest way of implementing these would be to have "turn interrupts 170 * The simplest way of implementing these would be to have "turn interrupts
170 * off" and "turn interrupts on" hypercalls. Unfortunately, this is too slow: 171 * off" and "turn interrupts on" hypercalls. Unfortunately, this is too slow:
@@ -183,7 +184,7 @@ static unsigned long save_fl(void)
183 return lguest_data.irq_enabled; 184 return lguest_data.irq_enabled;
184} 185}
185 186
186/* "restore_flags" just sets the flags back to the value given. */ 187/* restore_flags() just sets the flags back to the value given. */
187static void restore_fl(unsigned long flags) 188static void restore_fl(unsigned long flags)
188{ 189{
189 lguest_data.irq_enabled = flags; 190 lguest_data.irq_enabled = flags;
@@ -356,7 +357,7 @@ static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
356 * it. The Host needs to know when the Guest wants to change them, so we have 357 * it. The Host needs to know when the Guest wants to change them, so we have
357 * a whole series of functions like read_cr0() and write_cr0(). 358 * a whole series of functions like read_cr0() and write_cr0().
358 * 359 *
359 * We start with CR0. CR0 allows you to turn on and off all kinds of basic 360 * We start with cr0. cr0 allows you to turn on and off all kinds of basic
360 * features, but Linux only really cares about one: the horrifically-named Task 361 * features, but Linux only really cares about one: the horrifically-named Task
361 * Switched (TS) bit at bit 3 (ie. 8) 362 * Switched (TS) bit at bit 3 (ie. 8)
362 * 363 *
@@ -371,8 +372,7 @@ static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
371static unsigned long current_cr0, current_cr3; 372static unsigned long current_cr0, current_cr3;
372static void lguest_write_cr0(unsigned long val) 373static void lguest_write_cr0(unsigned long val)
373{ 374{
374 /* 8 == TS bit. */ 375 lazy_hcall(LHCALL_TS, val & X86_CR0_TS, 0, 0);
375 lazy_hcall(LHCALL_TS, val & 8, 0, 0);
376 current_cr0 = val; 376 current_cr0 = val;
377} 377}
378 378
@@ -387,10 +387,10 @@ static unsigned long lguest_read_cr0(void)
387static void lguest_clts(void) 387static void lguest_clts(void)
388{ 388{
389 lazy_hcall(LHCALL_TS, 0, 0, 0); 389 lazy_hcall(LHCALL_TS, 0, 0, 0);
390 current_cr0 &= ~8U; 390 current_cr0 &= ~X86_CR0_TS;
391} 391}
392 392
393/* CR2 is the virtual address of the last page fault, which the Guest only ever 393/* cr2 is the virtual address of the last page fault, which the Guest only ever
394 * reads. The Host kindly writes this into our "struct lguest_data", so we 394 * reads. The Host kindly writes this into our "struct lguest_data", so we
395 * just read it out of there. */ 395 * just read it out of there. */
396static unsigned long lguest_read_cr2(void) 396static unsigned long lguest_read_cr2(void)
@@ -398,7 +398,7 @@ static unsigned long lguest_read_cr2(void)
398 return lguest_data.cr2; 398 return lguest_data.cr2;
399} 399}
400 400
401/* CR3 is the current toplevel pagetable page: the principle is the same as 401/* cr3 is the current toplevel pagetable page: the principle is the same as
402 * cr0. Keep a local copy, and tell the Host when it changes. */ 402 * cr0. Keep a local copy, and tell the Host when it changes. */
403static void lguest_write_cr3(unsigned long cr3) 403static void lguest_write_cr3(unsigned long cr3)
404{ 404{
@@ -411,7 +411,7 @@ static unsigned long lguest_read_cr3(void)
411 return current_cr3; 411 return current_cr3;
412} 412}
413 413
414/* CR4 is used to enable and disable PGE, but we don't care. */ 414/* cr4 is used to enable and disable PGE, but we don't care. */
415static unsigned long lguest_read_cr4(void) 415static unsigned long lguest_read_cr4(void)
416{ 416{
417 return 0; 417 return 0;
@@ -432,7 +432,7 @@ static void lguest_write_cr4(unsigned long val)
432 * maps virtual addresses to physical addresses using "page tables". We could 432 * maps virtual addresses to physical addresses using "page tables". We could
433 * use one huge index of 1 million entries: each address is 4 bytes, so that's 433 * use one huge index of 1 million entries: each address is 4 bytes, so that's
434 * 1024 pages just to hold the page tables. But since most virtual addresses 434 * 1024 pages just to hold the page tables. But since most virtual addresses
435 * are unused, we use a two level index which saves space. The CR3 register 435 * are unused, we use a two level index which saves space. The cr3 register
436 * contains the physical address of the top level "page directory" page, which 436 * contains the physical address of the top level "page directory" page, which
437 * contains physical addresses of up to 1024 second-level pages. Each of these 437 * contains physical addresses of up to 1024 second-level pages. Each of these
438 * second level pages contains up to 1024 physical addresses of actual pages, 438 * second level pages contains up to 1024 physical addresses of actual pages,
@@ -440,7 +440,7 @@ static void lguest_write_cr4(unsigned long val)
440 * 440 *
441 * Here's a diagram, where arrows indicate physical addresses: 441 * Here's a diagram, where arrows indicate physical addresses:
442 * 442 *
443 * CR3 ---> +---------+ 443 * cr3 ---> +---------+
444 * | --------->+---------+ 444 * | --------->+---------+
445 * | | | PADDR1 | 445 * | | | PADDR1 |
446 * Top-level | | PADDR2 | 446 * Top-level | | PADDR2 |
@@ -498,8 +498,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
498 * 498 *
499 * ... except in early boot when the kernel sets up the initial pagetables, 499 * ... except in early boot when the kernel sets up the initial pagetables,
500 * which makes booting astonishingly slow. So we don't even tell the Host 500 * which makes booting astonishingly slow. So we don't even tell the Host
501 * anything changed until we've done the first page table switch. 501 * anything changed until we've done the first page table switch. */
502 */
503static void lguest_set_pte(pte_t *ptep, pte_t pteval) 502static void lguest_set_pte(pte_t *ptep, pte_t pteval)
504{ 503{
505 *ptep = pteval; 504 *ptep = pteval;
@@ -720,10 +719,10 @@ static void lguest_time_init(void)
720 /* Set up the timer interrupt (0) to go to our simple timer routine */ 719 /* Set up the timer interrupt (0) to go to our simple timer routine */
721 set_irq_handler(0, lguest_time_irq); 720 set_irq_handler(0, lguest_time_irq);
722 721
723 /* Our clock structure look like arch/i386/kernel/tsc.c if we can use 722 /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can
724 * the TSC, otherwise it's a dumb nanosecond-resolution clock. Either 723 * use the TSC, otherwise it's a dumb nanosecond-resolution clock.
725 * way, the "rating" is initialized so high that it's always chosen 724 * Either way, the "rating" is set so high that it's always chosen over
726 * over any other clocksource. */ 725 * any other clocksource. */
727 if (lguest_data.tsc_khz) 726 if (lguest_data.tsc_khz)
728 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, 727 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
729 lguest_clock.shift); 728 lguest_clock.shift);
@@ -749,7 +748,7 @@ static void lguest_time_init(void)
749 * to work. They're pretty simple. 748 * to work. They're pretty simple.
750 */ 749 */
751 750
752/* The Guest needs to tell the host what stack it expects traps to use. For 751/* The Guest needs to tell the Host what stack it expects traps to use. For
753 * native hardware, this is part of the Task State Segment mentioned above in 752 * native hardware, this is part of the Task State Segment mentioned above in
754 * lguest_load_tr_desc(), but to help hypervisors there's this special call. 753 * lguest_load_tr_desc(), but to help hypervisors there's this special call.
755 * 754 *
@@ -850,13 +849,16 @@ static __init char *lguest_memory_setup(void)
850 return "LGUEST"; 849 return "LGUEST";
851} 850}
852 851
853/* Before virtqueues are set up, we use LHCALL_NOTIFY on normal memory to 852/* We will eventually use the virtio console device to produce console output,
854 * produce console output. */ 853 * but before that is set up we use LHCALL_NOTIFY on normal memory to produce
854 * console output. */
855static __init int early_put_chars(u32 vtermno, const char *buf, int count) 855static __init int early_put_chars(u32 vtermno, const char *buf, int count)
856{ 856{
857 char scratch[17]; 857 char scratch[17];
858 unsigned int len = count; 858 unsigned int len = count;
859 859
860 /* We use a nul-terminated string, so we have to make a copy. Icky,
861 * huh? */
860 if (len > sizeof(scratch) - 1) 862 if (len > sizeof(scratch) - 1)
861 len = sizeof(scratch) - 1; 863 len = sizeof(scratch) - 1;
862 scratch[len] = '\0'; 864 scratch[len] = '\0';
@@ -883,7 +885,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
883 * Our current solution is to allow the paravirt back end to optionally patch 885 * Our current solution is to allow the paravirt back end to optionally patch
884 * over the indirect calls to replace them with something more efficient. We 886 * over the indirect calls to replace them with something more efficient. We
885 * patch the four most commonly called functions: disable interrupts, enable 887 * patch the four most commonly called functions: disable interrupts, enable
886 * interrupts, restore interrupts and save interrupts. We usually have 10 888 * interrupts, restore interrupts and save interrupts. We usually have 6 or 10
887 * bytes to patch into: the Guest versions of these operations are small enough 889 * bytes to patch into: the Guest versions of these operations are small enough
888 * that we can fit comfortably. 890 * that we can fit comfortably.
889 * 891 *
@@ -1015,7 +1017,7 @@ __init void lguest_init(void)
1015 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); 1017 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
1016 1018
1017 /* The Host uses the top of the Guest's virtual address space for the 1019 /* The Host uses the top of the Guest's virtual address space for the
1018 * Host<->Guest Switcher, and it tells us how much it needs in 1020 * Host<->Guest Switcher, and it tells us how big that is in
1019 * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */ 1021 * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */
1020 reserve_top_address(lguest_data.reserve_mem); 1022 reserve_top_address(lguest_data.reserve_mem);
1021 1023
@@ -1065,6 +1067,6 @@ __init void lguest_init(void)
1065/* 1067/*
1066 * This marks the end of stage II of our journey, The Guest. 1068 * This marks the end of stage II of our journey, The Guest.
1067 * 1069 *
1068 * It is now time for us to explore the nooks and crannies of the three Guest 1070 * It is now time for us to explore the layer of virtual drivers and complete
1069 * devices and complete our understanding of the Guest in "make Drivers". 1071 * our understanding of the Guest in "make Drivers".
1070 */ 1072 */
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index ebc6ac733899..95b6fbcded63 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -6,7 +6,7 @@
6#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7 7
8/*G:020 This is where we begin: head.S notes that the boot header's platform 8/*G:020 This is where we begin: head.S notes that the boot header's platform
9 * type field is "1" (lguest), so calls us here. The boot header is in %esi. 9 * type field is "1" (lguest), so calls us here.
10 * 10 *
11 * WARNING: be very careful here! We're running at addresses equal to physical 11 * WARNING: be very careful here! We're running at addresses equal to physical
12 * addesses (around 0), not above PAGE_OFFSET as most code expectes 12 * addesses (around 0), not above PAGE_OFFSET as most code expectes
@@ -17,13 +17,15 @@
17 * boot. */ 17 * boot. */
18.section .init.text, "ax", @progbits 18.section .init.text, "ax", @progbits
19ENTRY(lguest_entry) 19ENTRY(lguest_entry)
20 /* Make initial hypercall now, so we can set up the pagetables. */ 20 /* We make the "initialization" hypercall now to tell the Host about
21 * us, and also find out where it put our page tables. */
21 movl $LHCALL_LGUEST_INIT, %eax 22 movl $LHCALL_LGUEST_INIT, %eax
22 movl $lguest_data - __PAGE_OFFSET, %edx 23 movl $lguest_data - __PAGE_OFFSET, %edx
23 int $LGUEST_TRAP_ENTRY 24 int $LGUEST_TRAP_ENTRY
24 25
25 /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl 26 /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl
26 * instruction uses %esi implicitly. */ 27 * instruction uses %esi implicitly as the source for the copy we'
28 * about to do. */
27 movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi 29 movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi
28 30
29 /* Copy first 32 entries of page directory to __PAGE_OFFSET entries. 31 /* Copy first 32 entries of page directory to __PAGE_OFFSET entries.