diff options
author | Huang Ying <ying.huang@intel.com> | 2009-05-11 04:48:15 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2009-09-10 01:32:39 -0400 |
commit | 890ca9aefa78f7831f8f633cab9e4803636dffe4 (patch) | |
tree | 1a66ca576b6b0c396de79214e2262dc218ec5d56 /arch/x86/kvm/x86.c | |
parent | af24a4e4aec77ef16c1971cf4465f767ba946034 (diff) |
KVM: Add MCE support
The related MSRs are emulated. MCE capability is exported via
extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED. A new
vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation
such as the mcg_cap. MCE is injected via vcpu ioctl command
KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are
not implemented.
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 220 |
1 files changed, 196 insertions, 24 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6d46079a901c..55a9dd182de8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <asm/msr.h> | 42 | #include <asm/msr.h> |
43 | #include <asm/desc.h> | 43 | #include <asm/desc.h> |
44 | #include <asm/mtrr.h> | 44 | #include <asm/mtrr.h> |
45 | #include <asm/mce.h> | ||
45 | 46 | ||
46 | #define MAX_IO_MSRS 256 | 47 | #define MAX_IO_MSRS 256 |
47 | #define CR0_RESERVED_BITS \ | 48 | #define CR0_RESERVED_BITS \ |
@@ -55,6 +56,10 @@ | |||
55 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | 56 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) |
56 | 57 | ||
57 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | 58 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) |
59 | |||
60 | #define KVM_MAX_MCE_BANKS 32 | ||
61 | #define KVM_MCE_CAP_SUPPORTED MCG_CTL_P | ||
62 | |||
58 | /* EFER defaults: | 63 | /* EFER defaults: |
59 | * - enable syscall per default because its emulated by KVM | 64 | * - enable syscall per default because its emulated by KVM |
60 | * - enable LME and LMA per default on 64 bit KVM | 65 | * - enable LME and LMA per default on 64 bit KVM |
@@ -777,23 +782,43 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
777 | return 0; | 782 | return 0; |
778 | } | 783 | } |
779 | 784 | ||
780 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 785 | static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
781 | { | 786 | { |
787 | u64 mcg_cap = vcpu->arch.mcg_cap; | ||
788 | unsigned bank_num = mcg_cap & 0xff; | ||
789 | |||
782 | switch (msr) { | 790 | switch (msr) { |
783 | case MSR_EFER: | ||
784 | set_efer(vcpu, data); | ||
785 | break; | ||
786 | case MSR_IA32_MC0_STATUS: | ||
787 | pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", | ||
788 | __func__, data); | ||
789 | break; | ||
790 | case MSR_IA32_MCG_STATUS: | 791 | case MSR_IA32_MCG_STATUS: |
791 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", | 792 | vcpu->arch.mcg_status = data; |
792 | __func__, data); | ||
793 | break; | 793 | break; |
794 | case MSR_IA32_MCG_CTL: | 794 | case MSR_IA32_MCG_CTL: |
795 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", | 795 | if (!(mcg_cap & MCG_CTL_P)) |
796 | __func__, data); | 796 | return 1; |
797 | if (data != 0 && data != ~(u64)0) | ||
798 | return -1; | ||
799 | vcpu->arch.mcg_ctl = data; | ||
800 | break; | ||
801 | default: | ||
802 | if (msr >= MSR_IA32_MC0_CTL && | ||
803 | msr < MSR_IA32_MC0_CTL + 4 * bank_num) { | ||
804 | u32 offset = msr - MSR_IA32_MC0_CTL; | ||
805 | /* only 0 or all 1s can be written to IA32_MCi_CTL */ | ||
806 | if ((offset & 0x3) == 0 && | ||
807 | data != 0 && data != ~(u64)0) | ||
808 | return -1; | ||
809 | vcpu->arch.mce_banks[offset] = data; | ||
810 | break; | ||
811 | } | ||
812 | return 1; | ||
813 | } | ||
814 | return 0; | ||
815 | } | ||
816 | |||
817 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
818 | { | ||
819 | switch (msr) { | ||
820 | case MSR_EFER: | ||
821 | set_efer(vcpu, data); | ||
797 | break; | 822 | break; |
798 | case MSR_IA32_DEBUGCTLMSR: | 823 | case MSR_IA32_DEBUGCTLMSR: |
799 | if (!data) { | 824 | if (!data) { |
@@ -849,6 +874,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
849 | kvm_request_guest_time_update(vcpu); | 874 | kvm_request_guest_time_update(vcpu); |
850 | break; | 875 | break; |
851 | } | 876 | } |
877 | case MSR_IA32_MCG_CTL: | ||
878 | case MSR_IA32_MCG_STATUS: | ||
879 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | ||
880 | return set_msr_mce(vcpu, msr, data); | ||
852 | default: | 881 | default: |
853 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); | 882 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); |
854 | return 1; | 883 | return 1; |
@@ -904,26 +933,49 @@ static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
904 | return 0; | 933 | return 0; |
905 | } | 934 | } |
906 | 935 | ||
907 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 936 | static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
908 | { | 937 | { |
909 | u64 data; | 938 | u64 data; |
939 | u64 mcg_cap = vcpu->arch.mcg_cap; | ||
940 | unsigned bank_num = mcg_cap & 0xff; | ||
910 | 941 | ||
911 | switch (msr) { | 942 | switch (msr) { |
912 | case 0xc0010010: /* SYSCFG */ | ||
913 | case 0xc0010015: /* HWCR */ | ||
914 | case MSR_IA32_PLATFORM_ID: | ||
915 | case MSR_IA32_P5_MC_ADDR: | 943 | case MSR_IA32_P5_MC_ADDR: |
916 | case MSR_IA32_P5_MC_TYPE: | 944 | case MSR_IA32_P5_MC_TYPE: |
917 | case MSR_IA32_MC0_CTL: | 945 | data = 0; |
918 | case MSR_IA32_MCG_STATUS: | 946 | break; |
919 | case MSR_IA32_MCG_CAP: | 947 | case MSR_IA32_MCG_CAP: |
948 | data = vcpu->arch.mcg_cap; | ||
949 | break; | ||
920 | case MSR_IA32_MCG_CTL: | 950 | case MSR_IA32_MCG_CTL: |
921 | case MSR_IA32_MC0_MISC: | 951 | if (!(mcg_cap & MCG_CTL_P)) |
922 | case MSR_IA32_MC0_MISC+4: | 952 | return 1; |
923 | case MSR_IA32_MC0_MISC+8: | 953 | data = vcpu->arch.mcg_ctl; |
924 | case MSR_IA32_MC0_MISC+12: | 954 | break; |
925 | case MSR_IA32_MC0_MISC+16: | 955 | case MSR_IA32_MCG_STATUS: |
926 | case MSR_IA32_MC0_MISC+20: | 956 | data = vcpu->arch.mcg_status; |
957 | break; | ||
958 | default: | ||
959 | if (msr >= MSR_IA32_MC0_CTL && | ||
960 | msr < MSR_IA32_MC0_CTL + 4 * bank_num) { | ||
961 | u32 offset = msr - MSR_IA32_MC0_CTL; | ||
962 | data = vcpu->arch.mce_banks[offset]; | ||
963 | break; | ||
964 | } | ||
965 | return 1; | ||
966 | } | ||
967 | *pdata = data; | ||
968 | return 0; | ||
969 | } | ||
970 | |||
971 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
972 | { | ||
973 | u64 data; | ||
974 | |||
975 | switch (msr) { | ||
976 | case 0xc0010010: /* SYSCFG */ | ||
977 | case 0xc0010015: /* HWCR */ | ||
978 | case MSR_IA32_PLATFORM_ID: | ||
927 | case MSR_IA32_UCODE_REV: | 979 | case MSR_IA32_UCODE_REV: |
928 | case MSR_IA32_EBL_CR_POWERON: | 980 | case MSR_IA32_EBL_CR_POWERON: |
929 | case MSR_IA32_DEBUGCTLMSR: | 981 | case MSR_IA32_DEBUGCTLMSR: |
@@ -966,6 +1018,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
966 | case MSR_KVM_SYSTEM_TIME: | 1018 | case MSR_KVM_SYSTEM_TIME: |
967 | data = vcpu->arch.time; | 1019 | data = vcpu->arch.time; |
968 | break; | 1020 | break; |
1021 | case MSR_IA32_P5_MC_ADDR: | ||
1022 | case MSR_IA32_P5_MC_TYPE: | ||
1023 | case MSR_IA32_MCG_CAP: | ||
1024 | case MSR_IA32_MCG_CTL: | ||
1025 | case MSR_IA32_MCG_STATUS: | ||
1026 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | ||
1027 | return get_msr_mce(vcpu, msr, pdata); | ||
969 | default: | 1028 | default: |
970 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1029 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
971 | return 1; | 1030 | return 1; |
@@ -1087,6 +1146,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1087 | case KVM_CAP_IOMMU: | 1146 | case KVM_CAP_IOMMU: |
1088 | r = iommu_found(); | 1147 | r = iommu_found(); |
1089 | break; | 1148 | break; |
1149 | case KVM_CAP_MCE: | ||
1150 | r = KVM_MAX_MCE_BANKS; | ||
1151 | break; | ||
1090 | default: | 1152 | default: |
1091 | r = 0; | 1153 | r = 0; |
1092 | break; | 1154 | break; |
@@ -1146,6 +1208,16 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
1146 | r = 0; | 1208 | r = 0; |
1147 | break; | 1209 | break; |
1148 | } | 1210 | } |
1211 | case KVM_X86_GET_MCE_CAP_SUPPORTED: { | ||
1212 | u64 mce_cap; | ||
1213 | |||
1214 | mce_cap = KVM_MCE_CAP_SUPPORTED; | ||
1215 | r = -EFAULT; | ||
1216 | if (copy_to_user(argp, &mce_cap, sizeof mce_cap)) | ||
1217 | goto out; | ||
1218 | r = 0; | ||
1219 | break; | ||
1220 | } | ||
1149 | default: | 1221 | default: |
1150 | r = -EINVAL; | 1222 | r = -EINVAL; |
1151 | } | 1223 | } |
@@ -1502,6 +1574,80 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, | |||
1502 | return 0; | 1574 | return 0; |
1503 | } | 1575 | } |
1504 | 1576 | ||
1577 | static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | ||
1578 | u64 mcg_cap) | ||
1579 | { | ||
1580 | int r; | ||
1581 | unsigned bank_num = mcg_cap & 0xff, bank; | ||
1582 | |||
1583 | r = -EINVAL; | ||
1584 | if (!bank_num) | ||
1585 | goto out; | ||
1586 | if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) | ||
1587 | goto out; | ||
1588 | r = 0; | ||
1589 | vcpu->arch.mcg_cap = mcg_cap; | ||
1590 | /* Init IA32_MCG_CTL to all 1s */ | ||
1591 | if (mcg_cap & MCG_CTL_P) | ||
1592 | vcpu->arch.mcg_ctl = ~(u64)0; | ||
1593 | /* Init IA32_MCi_CTL to all 1s */ | ||
1594 | for (bank = 0; bank < bank_num; bank++) | ||
1595 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; | ||
1596 | out: | ||
1597 | return r; | ||
1598 | } | ||
1599 | |||
1600 | static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | ||
1601 | struct kvm_x86_mce *mce) | ||
1602 | { | ||
1603 | u64 mcg_cap = vcpu->arch.mcg_cap; | ||
1604 | unsigned bank_num = mcg_cap & 0xff; | ||
1605 | u64 *banks = vcpu->arch.mce_banks; | ||
1606 | |||
1607 | if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL)) | ||
1608 | return -EINVAL; | ||
1609 | /* | ||
1610 | * if IA32_MCG_CTL is not all 1s, the uncorrected error | ||
1611 | * reporting is disabled | ||
1612 | */ | ||
1613 | if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) && | ||
1614 | vcpu->arch.mcg_ctl != ~(u64)0) | ||
1615 | return 0; | ||
1616 | banks += 4 * mce->bank; | ||
1617 | /* | ||
1618 | * if IA32_MCi_CTL is not all 1s, the uncorrected error | ||
1619 | * reporting is disabled for the bank | ||
1620 | */ | ||
1621 | if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0) | ||
1622 | return 0; | ||
1623 | if (mce->status & MCI_STATUS_UC) { | ||
1624 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || | ||
1625 | !(vcpu->arch.cr4 & X86_CR4_MCE)) { | ||
1626 | printk(KERN_DEBUG "kvm: set_mce: " | ||
1627 | "injects mce exception while " | ||
1628 | "previous one is in progress!\n"); | ||
1629 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
1630 | return 0; | ||
1631 | } | ||
1632 | if (banks[1] & MCI_STATUS_VAL) | ||
1633 | mce->status |= MCI_STATUS_OVER; | ||
1634 | banks[2] = mce->addr; | ||
1635 | banks[3] = mce->misc; | ||
1636 | vcpu->arch.mcg_status = mce->mcg_status; | ||
1637 | banks[1] = mce->status; | ||
1638 | kvm_queue_exception(vcpu, MC_VECTOR); | ||
1639 | } else if (!(banks[1] & MCI_STATUS_VAL) | ||
1640 | || !(banks[1] & MCI_STATUS_UC)) { | ||
1641 | if (banks[1] & MCI_STATUS_VAL) | ||
1642 | mce->status |= MCI_STATUS_OVER; | ||
1643 | banks[2] = mce->addr; | ||
1644 | banks[3] = mce->misc; | ||
1645 | banks[1] = mce->status; | ||
1646 | } else | ||
1647 | banks[1] |= MCI_STATUS_OVER; | ||
1648 | return 0; | ||
1649 | } | ||
1650 | |||
1505 | long kvm_arch_vcpu_ioctl(struct file *filp, | 1651 | long kvm_arch_vcpu_ioctl(struct file *filp, |
1506 | unsigned int ioctl, unsigned long arg) | 1652 | unsigned int ioctl, unsigned long arg) |
1507 | { | 1653 | { |
@@ -1635,6 +1781,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1635 | kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); | 1781 | kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); |
1636 | break; | 1782 | break; |
1637 | } | 1783 | } |
1784 | case KVM_X86_SETUP_MCE: { | ||
1785 | u64 mcg_cap; | ||
1786 | |||
1787 | r = -EFAULT; | ||
1788 | if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap)) | ||
1789 | goto out; | ||
1790 | r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap); | ||
1791 | break; | ||
1792 | } | ||
1793 | case KVM_X86_SET_MCE: { | ||
1794 | struct kvm_x86_mce mce; | ||
1795 | |||
1796 | r = -EFAULT; | ||
1797 | if (copy_from_user(&mce, argp, sizeof mce)) | ||
1798 | goto out; | ||
1799 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | ||
1800 | break; | ||
1801 | } | ||
1638 | default: | 1802 | default: |
1639 | r = -EINVAL; | 1803 | r = -EINVAL; |
1640 | } | 1804 | } |
@@ -4440,6 +4604,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
4440 | goto fail_mmu_destroy; | 4604 | goto fail_mmu_destroy; |
4441 | } | 4605 | } |
4442 | 4606 | ||
4607 | vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, | ||
4608 | GFP_KERNEL); | ||
4609 | if (!vcpu->arch.mce_banks) { | ||
4610 | r = -ENOMEM; | ||
4611 | goto fail_mmu_destroy; | ||
4612 | } | ||
4613 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | ||
4614 | |||
4443 | return 0; | 4615 | return 0; |
4444 | 4616 | ||
4445 | fail_mmu_destroy: | 4617 | fail_mmu_destroy: |