aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
authorNadav Har'El <nyh@il.ibm.com>2011-05-25 16:04:25 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 04:45:11 -0400
commitb87a51ae2893a5907f796eadb4beb60747a69209 (patch)
tree0dfe98e545b1e69268d37121eb9ad42726a6b8d4 /arch/x86/kvm/vmx.c
parenta9d30f33dd21b67b2f4db09f3dfe63a7c390d1b3 (diff)
KVM: nVMX: Implement reading and writing of VMX MSRs
When the guest can use VMX instructions (when the "nested" module option is on), it should also be able to read and write VMX MSRs, e.g., to query about VMX capabilities. This patch adds this support. Signed-off-by: Nadav Har'El <nyh@il.ibm.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c219
1 files changed, 219 insertions, 0 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 914dc4e9b37f..487952b20217 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1396,6 +1396,218 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
1396} 1396}
1397 1397
1398/* 1398/*
1399 * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
1400 * returned for the various VMX controls MSRs when nested VMX is enabled.
1401 * The same values should also be used to verify that vmcs12 control fields are
1402 * valid during nested entry from L1 to L2.
1403 * Each of these control msrs has a low and high 32-bit half: A low bit is on
1404 * if the corresponding bit in the (32-bit) control field *must* be on, and a
1405 * bit in the high half is on if the corresponding bit in the control field
1406 * may be on. See also vmx_control_verify().
1407 * TODO: allow these variables to be modified (downgraded) by module options
1408 * or other means.
1409 */
1410static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high;
1411static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
1412static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
1413static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
1414static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
1415static __init void nested_vmx_setup_ctls_msrs(void)
1416{
1417 /*
1418 * Note that as a general rule, the high half of the MSRs (bits in
1419 * the control fields which may be 1) should be initialized by the
1420 * intersection of the underlying hardware's MSR (i.e., features which
1421 * can be supported) and the list of features we want to expose -
1422 * because they are known to be properly supported in our code.
1423 * Also, usually, the low half of the MSRs (bits which must be 1) can
1424 * be set to 0, meaning that L1 may turn off any of these bits. The
1425 * reason is that if one of these bits is necessary, it will appear
1426 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
1427 * fields of vmcs01 and vmcs02, will turn these bits off - and
1428 * nested_vmx_exit_handled() will not pass related exits to L1.
1429 * These rules have exceptions below.
1430 */
1431
1432 /* pin-based controls */
1433 /*
1434 * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is
1435 * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR.
1436 */
1437 nested_vmx_pinbased_ctls_low = 0x16 ;
1438 nested_vmx_pinbased_ctls_high = 0x16 |
1439 PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
1440 PIN_BASED_VIRTUAL_NMIS;
1441
1442 /* exit controls */
1443 nested_vmx_exit_ctls_low = 0;
1444#ifdef CONFIG_X86_64
1445 nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
1446#else
1447 nested_vmx_exit_ctls_high = 0;
1448#endif
1449
1450 /* entry controls */
1451 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
1452 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
1453 nested_vmx_entry_ctls_low = 0;
1454 nested_vmx_entry_ctls_high &=
1455 VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
1456
1457 /* cpu-based controls */
1458 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
1459 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
1460 nested_vmx_procbased_ctls_low = 0;
1461 nested_vmx_procbased_ctls_high &=
1462 CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING |
1463 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
1464 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
1465 CPU_BASED_CR3_STORE_EXITING |
1466#ifdef CONFIG_X86_64
1467 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
1468#endif
1469 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
1470 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
1471 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
1472 /*
1473 * We can allow some features even when not supported by the
1474 * hardware. For example, L1 can specify an MSR bitmap - and we
1475 * can use it to avoid exits to L1 - even when L0 runs L2
1476 * without MSR bitmaps.
1477 */
1478 nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS;
1479
1480 /* secondary cpu-based controls */
1481 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
1482 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high);
1483 nested_vmx_secondary_ctls_low = 0;
1484 nested_vmx_secondary_ctls_high &=
1485 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
1486}
1487
1488static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
1489{
1490 /*
1491 * Bits 0 in high must be 0, and bits 1 in low must be 1.
1492 */
1493 return ((control & high) | low) == control;
1494}
1495
1496static inline u64 vmx_control_msr(u32 low, u32 high)
1497{
1498 return low | ((u64)high << 32);
1499}
1500
1501/*
1502 * If we allow our guest to use VMX instructions (i.e., nested VMX), we should
1503 * also let it use VMX-specific MSRs.
1504 * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a
1505 * VMX-specific MSR, or 0 when we haven't (and the caller should handle it
1506 * like all other MSRs).
1507 */
1508static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1509{
1510 if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC &&
1511 msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) {
1512 /*
1513 * According to the spec, processors which do not support VMX
1514 * should throw a #GP(0) when VMX capability MSRs are read.
1515 */
1516 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
1517 return 1;
1518 }
1519
1520 switch (msr_index) {
1521 case MSR_IA32_FEATURE_CONTROL:
1522 *pdata = 0;
1523 break;
1524 case MSR_IA32_VMX_BASIC:
1525 /*
1526 * This MSR reports some information about VMX support. We
1527 * should return information about the VMX we emulate for the
1528 * guest, and the VMCS structure we give it - not about the
1529 * VMX support of the underlying hardware.
1530 */
1531 *pdata = VMCS12_REVISION |
1532 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
1533 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
1534 break;
1535 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1536 case MSR_IA32_VMX_PINBASED_CTLS:
1537 *pdata = vmx_control_msr(nested_vmx_pinbased_ctls_low,
1538 nested_vmx_pinbased_ctls_high);
1539 break;
1540 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1541 case MSR_IA32_VMX_PROCBASED_CTLS:
1542 *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low,
1543 nested_vmx_procbased_ctls_high);
1544 break;
1545 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1546 case MSR_IA32_VMX_EXIT_CTLS:
1547 *pdata = vmx_control_msr(nested_vmx_exit_ctls_low,
1548 nested_vmx_exit_ctls_high);
1549 break;
1550 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1551 case MSR_IA32_VMX_ENTRY_CTLS:
1552 *pdata = vmx_control_msr(nested_vmx_entry_ctls_low,
1553 nested_vmx_entry_ctls_high);
1554 break;
1555 case MSR_IA32_VMX_MISC:
1556 *pdata = 0;
1557 break;
1558 /*
1559 * These MSRs specify bits which the guest must keep fixed (on or off)
1560 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
1561 * We picked the standard core2 setting.
1562 */
1563#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
1564#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
1565 case MSR_IA32_VMX_CR0_FIXED0:
1566 *pdata = VMXON_CR0_ALWAYSON;
1567 break;
1568 case MSR_IA32_VMX_CR0_FIXED1:
1569 *pdata = -1ULL;
1570 break;
1571 case MSR_IA32_VMX_CR4_FIXED0:
1572 *pdata = VMXON_CR4_ALWAYSON;
1573 break;
1574 case MSR_IA32_VMX_CR4_FIXED1:
1575 *pdata = -1ULL;
1576 break;
1577 case MSR_IA32_VMX_VMCS_ENUM:
1578 *pdata = 0x1f;
1579 break;
1580 case MSR_IA32_VMX_PROCBASED_CTLS2:
1581 *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low,
1582 nested_vmx_secondary_ctls_high);
1583 break;
1584 case MSR_IA32_VMX_EPT_VPID_CAP:
1585 /* Currently, no nested ept or nested vpid */
1586 *pdata = 0;
1587 break;
1588 default:
1589 return 0;
1590 }
1591
1592 return 1;
1593}
1594
1595static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1596{
1597 if (!nested_vmx_allowed(vcpu))
1598 return 0;
1599
1600 if (msr_index == MSR_IA32_FEATURE_CONTROL)
1601 /* TODO: the right thing. */
1602 return 1;
1603 /*
1604 * No need to treat VMX capability MSRs specially: If we don't handle
1605 * them, handle_wrmsr will #GP(0), which is correct (they are readonly)
1606 */
1607 return 0;
1608}
1609
1610/*
1399 * Reads an msr value (of 'msr_index') into 'pdata'. 1611 * Reads an msr value (of 'msr_index') into 'pdata'.
1400 * Returns 0 on success, non-0 otherwise. 1612 * Returns 0 on success, non-0 otherwise.
1401 * Assumes vcpu_load() was already called. 1613 * Assumes vcpu_load() was already called.
@@ -1443,6 +1655,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1443 /* Otherwise falls through */ 1655 /* Otherwise falls through */
1444 default: 1656 default:
1445 vmx_load_host_state(to_vmx(vcpu)); 1657 vmx_load_host_state(to_vmx(vcpu));
1658 if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
1659 return 0;
1446 msr = find_msr_entry(to_vmx(vcpu), msr_index); 1660 msr = find_msr_entry(to_vmx(vcpu), msr_index);
1447 if (msr) { 1661 if (msr) {
1448 vmx_load_host_state(to_vmx(vcpu)); 1662 vmx_load_host_state(to_vmx(vcpu));
@@ -1514,6 +1728,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1514 return 1; 1728 return 1;
1515 /* Otherwise falls through */ 1729 /* Otherwise falls through */
1516 default: 1730 default:
1731 if (vmx_set_vmx_msr(vcpu, msr_index, data))
1732 break;
1517 msr = find_msr_entry(vmx, msr_index); 1733 msr = find_msr_entry(vmx, msr_index);
1518 if (msr) { 1734 if (msr) {
1519 vmx_load_host_state(vmx); 1735 vmx_load_host_state(vmx);
@@ -1902,6 +2118,9 @@ static __init int hardware_setup(void)
1902 if (!cpu_has_vmx_ple()) 2118 if (!cpu_has_vmx_ple())
1903 ple_gap = 0; 2119 ple_gap = 0;
1904 2120
2121 if (nested)
2122 nested_vmx_setup_ctls_msrs();
2123
1905 return alloc_kvm_area(); 2124 return alloc_kvm_area();
1906} 2125}
1907 2126