aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorNitin A Kamble <nitin.a.kamble@intel.com>2009-06-08 14:34:16 -0400
committerAvi Kivity <avi@redhat.com>2009-09-10 01:32:49 -0400
commit3a624e29c7587b79abab60e279f9d1a62a3d4716 (patch)
treec6cfd94b21572b4992b76a51d563b5ec3550473b /arch
parentfa40a8214bb9bcae8d49c234c19d8b4a6c1f37ff (diff)
KVM: VMX: Support Unrestricted Guest feature
"Unrestricted Guest" feature is added in the VMX specification. Intel Westmere and onwards processors will support this feature. It allows kvm guests to run real mode and unpaged mode code natively in the VMX mode when EPT is turned on. With the unrestricted guest there is no need to emulate the guest real mode code in the vm86 container or in the emulator. Also the guest big real mode code works like native. The attached patch enhances KVM to use the unrestricted guest feature if available on the processor. It also adds a new kernel/module parameter to disable the unrestricted guest feature at the boot time. Signed-off-by: Nitin A Kamble <nitin.a.kamble@intel.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/kvm_host.h12
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/kvm/vmx.c60
3 files changed, 62 insertions, 11 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1cc901ec4ba5..a1a96a57bb9d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -37,12 +37,14 @@
37#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 37#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
38 0xFFFFFF0000000000ULL) 38 0xFFFFFF0000000000ULL)
39 39
40#define KVM_GUEST_CR0_MASK \ 40#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
41 (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \ 41 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
42 | X86_CR0_NW | X86_CR0_CD) 42#define KVM_GUEST_CR0_MASK \
43 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
44#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
45 (X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
43#define KVM_VM_CR0_ALWAYS_ON \ 46#define KVM_VM_CR0_ALWAYS_ON \
44 (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \ 47 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
45 | X86_CR0_MP)
46#define KVM_GUEST_CR4_MASK \ 48#define KVM_GUEST_CR4_MASK \
47 (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE) 49 (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
48#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 50#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 11be5ad2e0e9..e7927a639d69 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -55,6 +55,7 @@
55#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 55#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
56#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 56#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
57#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 57#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
58#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
58 59
59 60
60#define PIN_BASED_EXT_INTR_MASK 0x00000001 61#define PIN_BASED_EXT_INTR_MASK 0x00000001
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 959cb59cfaeb..f0f9773f0b0f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,6 +51,10 @@ module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
51static int __read_mostly enable_ept = 1; 51static int __read_mostly enable_ept = 1;
52module_param_named(ept, enable_ept, bool, S_IRUGO); 52module_param_named(ept, enable_ept, bool, S_IRUGO);
53 53
54static int __read_mostly enable_unrestricted_guest = 1;
55module_param_named(unrestricted_guest,
56 enable_unrestricted_guest, bool, S_IRUGO);
57
54static int __read_mostly emulate_invalid_guest_state = 0; 58static int __read_mostly emulate_invalid_guest_state = 0;
55module_param(emulate_invalid_guest_state, bool, S_IRUGO); 59module_param(emulate_invalid_guest_state, bool, S_IRUGO);
56 60
@@ -279,6 +283,12 @@ static inline int cpu_has_vmx_ept(void)
279 SECONDARY_EXEC_ENABLE_EPT; 283 SECONDARY_EXEC_ENABLE_EPT;
280} 284}
281 285
286static inline int cpu_has_vmx_unrestricted_guest(void)
287{
288 return vmcs_config.cpu_based_2nd_exec_ctrl &
289 SECONDARY_EXEC_UNRESTRICTED_GUEST;
290}
291
282static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 292static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
283{ 293{
284 return flexpriority_enabled && 294 return flexpriority_enabled &&
@@ -1210,7 +1220,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1210 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 1220 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
1211 SECONDARY_EXEC_WBINVD_EXITING | 1221 SECONDARY_EXEC_WBINVD_EXITING |
1212 SECONDARY_EXEC_ENABLE_VPID | 1222 SECONDARY_EXEC_ENABLE_VPID |
1213 SECONDARY_EXEC_ENABLE_EPT; 1223 SECONDARY_EXEC_ENABLE_EPT |
1224 SECONDARY_EXEC_UNRESTRICTED_GUEST;
1214 if (adjust_vmx_controls(min2, opt2, 1225 if (adjust_vmx_controls(min2, opt2,
1215 MSR_IA32_VMX_PROCBASED_CTLS2, 1226 MSR_IA32_VMX_PROCBASED_CTLS2,
1216 &_cpu_based_2nd_exec_control) < 0) 1227 &_cpu_based_2nd_exec_control) < 0)
@@ -1340,8 +1351,13 @@ static __init int hardware_setup(void)
1340 if (!cpu_has_vmx_vpid()) 1351 if (!cpu_has_vmx_vpid())
1341 enable_vpid = 0; 1352 enable_vpid = 0;
1342 1353
1343 if (!cpu_has_vmx_ept()) 1354 if (!cpu_has_vmx_ept()) {
1344 enable_ept = 0; 1355 enable_ept = 0;
1356 enable_unrestricted_guest = 0;
1357 }
1358
1359 if (!cpu_has_vmx_unrestricted_guest())
1360 enable_unrestricted_guest = 0;
1345 1361
1346 if (!cpu_has_vmx_flexpriority()) 1362 if (!cpu_has_vmx_flexpriority())
1347 flexpriority_enabled = 0; 1363 flexpriority_enabled = 0;
@@ -1440,6 +1456,9 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1440 unsigned long flags; 1456 unsigned long flags;
1441 struct vcpu_vmx *vmx = to_vmx(vcpu); 1457 struct vcpu_vmx *vmx = to_vmx(vcpu);
1442 1458
1459 if (enable_unrestricted_guest)
1460 return;
1461
1443 vmx->emulation_required = 1; 1462 vmx->emulation_required = 1;
1444 vcpu->arch.rmode.vm86_active = 1; 1463 vcpu->arch.rmode.vm86_active = 1;
1445 1464
@@ -1593,7 +1612,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1593 CPU_BASED_CR3_STORE_EXITING)); 1612 CPU_BASED_CR3_STORE_EXITING));
1594 vcpu->arch.cr0 = cr0; 1613 vcpu->arch.cr0 = cr0;
1595 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1614 vmx_set_cr4(vcpu, vcpu->arch.cr4);
1596 *hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
1597 *hw_cr0 &= ~X86_CR0_WP; 1615 *hw_cr0 &= ~X86_CR0_WP;
1598 } else if (!is_paging(vcpu)) { 1616 } else if (!is_paging(vcpu)) {
1599 /* From nonpaging to paging */ 1617 /* From nonpaging to paging */
@@ -1620,8 +1638,13 @@ static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
1620 1638
1621static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1639static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1622{ 1640{
1623 unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | 1641 unsigned long hw_cr0;
1624 KVM_VM_CR0_ALWAYS_ON; 1642
1643 if (enable_unrestricted_guest)
1644 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST)
1645 | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
1646 else
1647 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
1625 1648
1626 vmx_fpu_deactivate(vcpu); 1649 vmx_fpu_deactivate(vcpu);
1627 1650
@@ -1786,6 +1809,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
1786 ar = 0xf3; 1809 ar = 0xf3;
1787 } else 1810 } else
1788 ar = vmx_segment_access_rights(var); 1811 ar = vmx_segment_access_rights(var);
1812
1813 /*
1814 * Fix the "Accessed" bit in AR field of segment registers for older
1815 * qemu binaries.
1816 * IA32 arch specifies that at the time of processor reset the
1817 * "Accessed" bit in the AR field of segment registers is 1. And qemu
1818 * is setting it to 0 in the usedland code. This causes invalid guest
1819 * state vmexit when "unrestricted guest" mode is turned on.
1820 * Fix for this setup issue in cpu_reset is being pushed in the qemu
1821 * tree. Newer qemu binaries with that qemu fix would not need this
1822 * kvm hack.
1823 */
1824 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
1825 ar |= 0x1; /* Accessed */
1826
1789 vmcs_write32(sf->ar_bytes, ar); 1827 vmcs_write32(sf->ar_bytes, ar);
1790} 1828}
1791 1829
@@ -2082,11 +2120,19 @@ out:
2082static void seg_setup(int seg) 2120static void seg_setup(int seg)
2083{ 2121{
2084 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2122 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2123 unsigned int ar;
2085 2124
2086 vmcs_write16(sf->selector, 0); 2125 vmcs_write16(sf->selector, 0);
2087 vmcs_writel(sf->base, 0); 2126 vmcs_writel(sf->base, 0);
2088 vmcs_write32(sf->limit, 0xffff); 2127 vmcs_write32(sf->limit, 0xffff);
2089 vmcs_write32(sf->ar_bytes, 0xf3); 2128 if (enable_unrestricted_guest) {
2129 ar = 0x93;
2130 if (seg == VCPU_SREG_CS)
2131 ar |= 0x08; /* code segment */
2132 } else
2133 ar = 0xf3;
2134
2135 vmcs_write32(sf->ar_bytes, ar);
2090} 2136}
2091 2137
2092static int alloc_apic_access_page(struct kvm *kvm) 2138static int alloc_apic_access_page(struct kvm *kvm)
@@ -2229,6 +2275,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2229 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; 2275 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
2230 if (!enable_ept) 2276 if (!enable_ept)
2231 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; 2277 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
2278 if (!enable_unrestricted_guest)
2279 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
2232 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 2280 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
2233 } 2281 }
2234 2282