aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorMatias Zabaljauregui <zabaljauregui@gmail.com>2009-06-13 00:27:07 -0400
committerRusty Russell <rusty@rustcorp.com.au>2009-06-12 08:57:08 -0400
commitacdd0b6292b282c4511897ac2691a47befbf1c6a (patch)
tree1bfcfc32b11d35e99fec5bbf52b19d6ee038f25e /arch/x86
parentcefcad1773197523e11e18b669f245e6a8d32058 (diff)
lguest: PAE support
This version requires that host and guest have the same PAE status. NX cap is not offered to the guest, yet. Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/lguest.h7
-rw-r--r--arch/x86/include/asm/lguest_hcall.h3
-rw-r--r--arch/x86/lguest/Kconfig1
-rw-r--r--arch/x86/lguest/boot.c71
4 files changed, 74 insertions, 8 deletions
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index 1caf57628b9c..313389cd50d2 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -17,8 +17,13 @@
17/* Pages for switcher itself, then two pages per cpu */ 17/* Pages for switcher itself, then two pages per cpu */
18#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) 18#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
19 19
20/* We map at -4M for ease of mapping into the guest (one PTE page). */ 20/* We map at -4M (-2M when PAE is activated) for ease of mapping
21 * into the guest (one PTE page). */
22#ifdef CONFIG_X86_PAE
23#define SWITCHER_ADDR 0xFFE00000
24#else
21#define SWITCHER_ADDR 0xFFC00000 25#define SWITCHER_ADDR 0xFFC00000
26#endif
22 27
23/* Found in switcher.S */ 28/* Found in switcher.S */
24extern unsigned long default_idt_entries[]; 29extern unsigned long default_idt_entries[];
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index b14b3552a4db..d31c4a684078 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -12,6 +12,7 @@
12#define LHCALL_TS 8 12#define LHCALL_TS 8
13#define LHCALL_SET_CLOCKEVENT 9 13#define LHCALL_SET_CLOCKEVENT 9
14#define LHCALL_HALT 10 14#define LHCALL_HALT 10
15#define LHCALL_SET_PMD 13
15#define LHCALL_SET_PTE 14 16#define LHCALL_SET_PTE 14
16#define LHCALL_SET_PGD 15 17#define LHCALL_SET_PGD 15
17#define LHCALL_LOAD_TLS 16 18#define LHCALL_LOAD_TLS 16
@@ -33,7 +34,7 @@
33 * operations? There are two ways: the direct way is to make a "hypercall", 34 * operations? There are two ways: the direct way is to make a "hypercall",
34 * to make requests of the Host Itself. 35 * to make requests of the Host Itself.
35 * 36 *
36 * We use the KVM hypercall mechanism. Eighteen hypercalls are 37 * We use the KVM hypercall mechanism. Seventeen hypercalls are
37 * available: the hypercall number is put in the %eax register, and the 38 * available: the hypercall number is put in the %eax register, and the
38 * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. 39 * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
39 * If a return value makes sense, it's returned in %eax. 40 * If a return value makes sense, it's returned in %eax.
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 8dab8f7844d3..38718041efc3 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -2,7 +2,6 @@ config LGUEST_GUEST
2 bool "Lguest guest support" 2 bool "Lguest guest support"
3 select PARAVIRT 3 select PARAVIRT
4 depends on X86_32 4 depends on X86_32
5 depends on !X86_PAE
6 select VIRTIO 5 select VIRTIO
7 select VIRTIO_RING 6 select VIRTIO_RING
8 select VIRTIO_CONSOLE 7 select VIRTIO_CONSOLE
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d12f554e5f6a..7bc65f0f62c4 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -167,6 +167,7 @@ static void lazy_hcall3(unsigned long call,
167 async_hcall(call, arg1, arg2, arg3, 0); 167 async_hcall(call, arg1, arg2, arg3, 0);
168} 168}
169 169
170#ifdef CONFIG_X86_PAE
170static void lazy_hcall4(unsigned long call, 171static void lazy_hcall4(unsigned long call,
171 unsigned long arg1, 172 unsigned long arg1,
172 unsigned long arg2, 173 unsigned long arg2,
@@ -178,6 +179,7 @@ static void lazy_hcall4(unsigned long call,
178 else 179 else
179 async_hcall(call, arg1, arg2, arg3, arg4); 180 async_hcall(call, arg1, arg2, arg3, arg4);
180} 181}
182#endif
181 183
182/* When lazy mode is turned off reset the per-cpu lazy mode variable and then 184/* When lazy mode is turned off reset the per-cpu lazy mode variable and then
183 * issue the do-nothing hypercall to flush any stored calls. */ 185 * issue the do-nothing hypercall to flush any stored calls. */
@@ -380,8 +382,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
380 case 1: /* Basic feature request. */ 382 case 1: /* Basic feature request. */
381 /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ 383 /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
382 *cx &= 0x00002201; 384 *cx &= 0x00002201;
383 /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ 385 /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */
384 *dx &= 0x07808111; 386 *dx &= 0x07808151;
385 /* The Host can do a nice optimization if it knows that the 387 /* The Host can do a nice optimization if it knows that the
386 * kernel mappings (addresses above 0xC0000000 or whatever 388 * kernel mappings (addresses above 0xC0000000 or whatever
387 * PAGE_OFFSET is set to) haven't changed. But Linux calls 389 * PAGE_OFFSET is set to) haven't changed. But Linux calls
@@ -400,6 +402,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
400 if (*ax > 0x80000008) 402 if (*ax > 0x80000008)
401 *ax = 0x80000008; 403 *ax = 0x80000008;
402 break; 404 break;
405 case 0x80000001:
406 /* Here we should fix nx cap depending on host. */
407 /* For this version of PAE, we just clear NX bit. */
408 *dx &= ~(1 << 20);
409 break;
403 } 410 }
404} 411}
405 412
@@ -533,7 +540,12 @@ static void lguest_write_cr4(unsigned long val)
533static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, 540static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
534 pte_t *ptep) 541 pte_t *ptep)
535{ 542{
543#ifdef CONFIG_X86_PAE
544 lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr,
545 ptep->pte_low, ptep->pte_high);
546#else
536 lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low); 547 lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low);
548#endif
537} 549}
538 550
539static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, 551static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -543,15 +555,37 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
543 lguest_pte_update(mm, addr, ptep); 555 lguest_pte_update(mm, addr, ptep);
544} 556}
545 557
546/* The Guest calls this to set a top-level entry. Again, we set the entry then 558/* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd
547 * tell the Host which top-level page we changed, and the index of the entry we 559 * to set a middle-level entry when PAE is activated.
548 * changed. */ 560 * Again, we set the entry then tell the Host which page we changed,
561 * and the index of the entry we changed. */
562#ifdef CONFIG_X86_PAE
563static void lguest_set_pud(pud_t *pudp, pud_t pudval)
564{
565 native_set_pud(pudp, pudval);
566
567 /* 32 bytes aligned pdpt address and the index. */
568 lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0,
569 (__pa(pudp) & 0x1F) / sizeof(pud_t));
570}
571
572static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
573{
574 native_set_pmd(pmdp, pmdval);
575 lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
576 (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
577}
578#else
579
580/* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not
581 * activated. */
549static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) 582static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
550{ 583{
551 native_set_pmd(pmdp, pmdval); 584 native_set_pmd(pmdp, pmdval);
552 lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK, 585 lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK,
553 (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t)); 586 (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
554} 587}
588#endif
555 589
556/* There are a couple of legacy places where the kernel sets a PTE, but we 590/* There are a couple of legacy places where the kernel sets a PTE, but we
557 * don't know the top level any more. This is useless for us, since we don't 591 * don't know the top level any more. This is useless for us, since we don't
@@ -569,6 +603,26 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
569 lazy_hcall1(LHCALL_FLUSH_TLB, 1); 603 lazy_hcall1(LHCALL_FLUSH_TLB, 1);
570} 604}
571 605
606#ifdef CONFIG_X86_PAE
607static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
608{
609 native_set_pte_atomic(ptep, pte);
610 if (cr3_changed)
611 lazy_hcall1(LHCALL_FLUSH_TLB, 1);
612}
613
614void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
615{
616 native_pte_clear(mm, addr, ptep);
617 lguest_pte_update(mm, addr, ptep);
618}
619
620void lguest_pmd_clear(pmd_t *pmdp)
621{
622 lguest_set_pmd(pmdp, __pmd(0));
623}
624#endif
625
572/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on 626/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
573 * native page table operations. On native hardware you can set a new page 627 * native page table operations. On native hardware you can set a new page
574 * table entry whenever you want, but if you want to remove one you have to do 628 * table entry whenever you want, but if you want to remove one you have to do
@@ -1035,6 +1089,7 @@ __init void lguest_init(void)
1035 pv_info.name = "lguest"; 1089 pv_info.name = "lguest";
1036 pv_info.paravirt_enabled = 1; 1090 pv_info.paravirt_enabled = 1;
1037 pv_info.kernel_rpl = 1; 1091 pv_info.kernel_rpl = 1;
1092 pv_info.shared_kernel_pmd = 1;
1038 1093
1039 /* We set up all the lguest overrides for sensitive operations. These 1094 /* We set up all the lguest overrides for sensitive operations. These
1040 * are detailed with the operations themselves. */ 1095 * are detailed with the operations themselves. */
@@ -1080,6 +1135,12 @@ __init void lguest_init(void)
1080 pv_mmu_ops.set_pte = lguest_set_pte; 1135 pv_mmu_ops.set_pte = lguest_set_pte;
1081 pv_mmu_ops.set_pte_at = lguest_set_pte_at; 1136 pv_mmu_ops.set_pte_at = lguest_set_pte_at;
1082 pv_mmu_ops.set_pmd = lguest_set_pmd; 1137 pv_mmu_ops.set_pmd = lguest_set_pmd;
1138#ifdef CONFIG_X86_PAE
1139 pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic;
1140 pv_mmu_ops.pte_clear = lguest_pte_clear;
1141 pv_mmu_ops.pmd_clear = lguest_pmd_clear;
1142 pv_mmu_ops.set_pud = lguest_set_pud;
1143#endif
1083 pv_mmu_ops.read_cr2 = lguest_read_cr2; 1144 pv_mmu_ops.read_cr2 = lguest_read_cr2;
1084 pv_mmu_ops.read_cr3 = lguest_read_cr3; 1145 pv_mmu_ops.read_cr3 = lguest_read_cr3;
1085 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; 1146 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;