aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2009-05-11 04:48:15 -0400
committerAvi Kivity <avi@redhat.com>2009-09-10 01:32:39 -0400
commit890ca9aefa78f7831f8f633cab9e4803636dffe4 (patch)
tree1a66ca576b6b0c396de79214e2262dc218ec5d56
parentaf24a4e4aec77ef16c1971cf4465f767ba946034 (diff)
KVM: Add MCE support
The related MSRs are emulated. MCE capability is exported via extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED. A new vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation such as the mcg_cap. MCE is injected via vcpu ioctl command KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are not implemented. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/x86/include/asm/kvm.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h5
-rw-r--r--arch/x86/kvm/x86.c220
-rw-r--r--include/linux/kvm.h20
4 files changed, 222 insertions, 24 deletions
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 125be8b19568..708b9c32a5da 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -17,6 +17,7 @@
17#define __KVM_HAVE_USER_NMI 17#define __KVM_HAVE_USER_NMI
18#define __KVM_HAVE_GUEST_DEBUG 18#define __KVM_HAVE_GUEST_DEBUG
19#define __KVM_HAVE_MSIX 19#define __KVM_HAVE_MSIX
20#define __KVM_HAVE_MCE
20 21
21/* Architectural interrupt line count. */ 22/* Architectural interrupt line count. */
22#define KVM_NR_INTERRUPTS 256 23#define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 79561752af97..81c68f630b14 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -373,6 +373,11 @@ struct kvm_vcpu_arch {
373 unsigned long dr6; 373 unsigned long dr6;
374 unsigned long dr7; 374 unsigned long dr7;
375 unsigned long eff_db[KVM_NR_DB_REGS]; 375 unsigned long eff_db[KVM_NR_DB_REGS];
376
377 u64 mcg_cap;
378 u64 mcg_status;
379 u64 mcg_ctl;
380 u64 *mce_banks;
376}; 381};
377 382
378struct kvm_mem_alias { 383struct kvm_mem_alias {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6d46079a901c..55a9dd182de8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
42#include <asm/msr.h> 42#include <asm/msr.h>
43#include <asm/desc.h> 43#include <asm/desc.h>
44#include <asm/mtrr.h> 44#include <asm/mtrr.h>
45#include <asm/mce.h>
45 46
46#define MAX_IO_MSRS 256 47#define MAX_IO_MSRS 256
47#define CR0_RESERVED_BITS \ 48#define CR0_RESERVED_BITS \
@@ -55,6 +56,10 @@
55 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 56 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
56 57
57#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 58#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
59
60#define KVM_MAX_MCE_BANKS 32
61#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
62
58/* EFER defaults: 63/* EFER defaults:
59 * - enable syscall per default because its emulated by KVM 64 * - enable syscall per default because its emulated by KVM
60 * - enable LME and LMA per default on 64 bit KVM 65 * - enable LME and LMA per default on 64 bit KVM
@@ -777,23 +782,43 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
777 return 0; 782 return 0;
778} 783}
779 784
780int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 785static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
781{ 786{
787 u64 mcg_cap = vcpu->arch.mcg_cap;
788 unsigned bank_num = mcg_cap & 0xff;
789
782 switch (msr) { 790 switch (msr) {
783 case MSR_EFER:
784 set_efer(vcpu, data);
785 break;
786 case MSR_IA32_MC0_STATUS:
787 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
788 __func__, data);
789 break;
790 case MSR_IA32_MCG_STATUS: 791 case MSR_IA32_MCG_STATUS:
791 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", 792 vcpu->arch.mcg_status = data;
792 __func__, data);
793 break; 793 break;
794 case MSR_IA32_MCG_CTL: 794 case MSR_IA32_MCG_CTL:
795 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", 795 if (!(mcg_cap & MCG_CTL_P))
796 __func__, data); 796 return 1;
797 if (data != 0 && data != ~(u64)0)
798 return -1;
799 vcpu->arch.mcg_ctl = data;
800 break;
801 default:
802 if (msr >= MSR_IA32_MC0_CTL &&
803 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
804 u32 offset = msr - MSR_IA32_MC0_CTL;
805 /* only 0 or all 1s can be written to IA32_MCi_CTL */
806 if ((offset & 0x3) == 0 &&
807 data != 0 && data != ~(u64)0)
808 return -1;
809 vcpu->arch.mce_banks[offset] = data;
810 break;
811 }
812 return 1;
813 }
814 return 0;
815}
816
817int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
818{
819 switch (msr) {
820 case MSR_EFER:
821 set_efer(vcpu, data);
797 break; 822 break;
798 case MSR_IA32_DEBUGCTLMSR: 823 case MSR_IA32_DEBUGCTLMSR:
799 if (!data) { 824 if (!data) {
@@ -849,6 +874,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
849 kvm_request_guest_time_update(vcpu); 874 kvm_request_guest_time_update(vcpu);
850 break; 875 break;
851 } 876 }
877 case MSR_IA32_MCG_CTL:
878 case MSR_IA32_MCG_STATUS:
879 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
880 return set_msr_mce(vcpu, msr, data);
852 default: 881 default:
853 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); 882 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
854 return 1; 883 return 1;
@@ -904,26 +933,49 @@ static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
904 return 0; 933 return 0;
905} 934}
906 935
907int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 936static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
908{ 937{
909 u64 data; 938 u64 data;
939 u64 mcg_cap = vcpu->arch.mcg_cap;
940 unsigned bank_num = mcg_cap & 0xff;
910 941
911 switch (msr) { 942 switch (msr) {
912 case 0xc0010010: /* SYSCFG */
913 case 0xc0010015: /* HWCR */
914 case MSR_IA32_PLATFORM_ID:
915 case MSR_IA32_P5_MC_ADDR: 943 case MSR_IA32_P5_MC_ADDR:
916 case MSR_IA32_P5_MC_TYPE: 944 case MSR_IA32_P5_MC_TYPE:
917 case MSR_IA32_MC0_CTL: 945 data = 0;
918 case MSR_IA32_MCG_STATUS: 946 break;
919 case MSR_IA32_MCG_CAP: 947 case MSR_IA32_MCG_CAP:
948 data = vcpu->arch.mcg_cap;
949 break;
920 case MSR_IA32_MCG_CTL: 950 case MSR_IA32_MCG_CTL:
921 case MSR_IA32_MC0_MISC: 951 if (!(mcg_cap & MCG_CTL_P))
922 case MSR_IA32_MC0_MISC+4: 952 return 1;
923 case MSR_IA32_MC0_MISC+8: 953 data = vcpu->arch.mcg_ctl;
924 case MSR_IA32_MC0_MISC+12: 954 break;
925 case MSR_IA32_MC0_MISC+16: 955 case MSR_IA32_MCG_STATUS:
926 case MSR_IA32_MC0_MISC+20: 956 data = vcpu->arch.mcg_status;
957 break;
958 default:
959 if (msr >= MSR_IA32_MC0_CTL &&
960 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
961 u32 offset = msr - MSR_IA32_MC0_CTL;
962 data = vcpu->arch.mce_banks[offset];
963 break;
964 }
965 return 1;
966 }
967 *pdata = data;
968 return 0;
969}
970
971int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
972{
973 u64 data;
974
975 switch (msr) {
976 case 0xc0010010: /* SYSCFG */
977 case 0xc0010015: /* HWCR */
978 case MSR_IA32_PLATFORM_ID:
927 case MSR_IA32_UCODE_REV: 979 case MSR_IA32_UCODE_REV:
928 case MSR_IA32_EBL_CR_POWERON: 980 case MSR_IA32_EBL_CR_POWERON:
929 case MSR_IA32_DEBUGCTLMSR: 981 case MSR_IA32_DEBUGCTLMSR:
@@ -966,6 +1018,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
966 case MSR_KVM_SYSTEM_TIME: 1018 case MSR_KVM_SYSTEM_TIME:
967 data = vcpu->arch.time; 1019 data = vcpu->arch.time;
968 break; 1020 break;
1021 case MSR_IA32_P5_MC_ADDR:
1022 case MSR_IA32_P5_MC_TYPE:
1023 case MSR_IA32_MCG_CAP:
1024 case MSR_IA32_MCG_CTL:
1025 case MSR_IA32_MCG_STATUS:
1026 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1027 return get_msr_mce(vcpu, msr, pdata);
969 default: 1028 default:
970 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 1029 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
971 return 1; 1030 return 1;
@@ -1087,6 +1146,9 @@ int kvm_dev_ioctl_check_extension(long ext)
1087 case KVM_CAP_IOMMU: 1146 case KVM_CAP_IOMMU:
1088 r = iommu_found(); 1147 r = iommu_found();
1089 break; 1148 break;
1149 case KVM_CAP_MCE:
1150 r = KVM_MAX_MCE_BANKS;
1151 break;
1090 default: 1152 default:
1091 r = 0; 1153 r = 0;
1092 break; 1154 break;
@@ -1146,6 +1208,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
1146 r = 0; 1208 r = 0;
1147 break; 1209 break;
1148 } 1210 }
1211 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
1212 u64 mce_cap;
1213
1214 mce_cap = KVM_MCE_CAP_SUPPORTED;
1215 r = -EFAULT;
1216 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
1217 goto out;
1218 r = 0;
1219 break;
1220 }
1149 default: 1221 default:
1150 r = -EINVAL; 1222 r = -EINVAL;
1151 } 1223 }
@@ -1502,6 +1574,80 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
1502 return 0; 1574 return 0;
1503} 1575}
1504 1576
1577static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
1578 u64 mcg_cap)
1579{
1580 int r;
1581 unsigned bank_num = mcg_cap & 0xff, bank;
1582
1583 r = -EINVAL;
1584 if (!bank_num)
1585 goto out;
1586 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
1587 goto out;
1588 r = 0;
1589 vcpu->arch.mcg_cap = mcg_cap;
1590 /* Init IA32_MCG_CTL to all 1s */
1591 if (mcg_cap & MCG_CTL_P)
1592 vcpu->arch.mcg_ctl = ~(u64)0;
1593 /* Init IA32_MCi_CTL to all 1s */
1594 for (bank = 0; bank < bank_num; bank++)
1595 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
1596out:
1597 return r;
1598}
1599
1600static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
1601 struct kvm_x86_mce *mce)
1602{
1603 u64 mcg_cap = vcpu->arch.mcg_cap;
1604 unsigned bank_num = mcg_cap & 0xff;
1605 u64 *banks = vcpu->arch.mce_banks;
1606
1607 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
1608 return -EINVAL;
1609 /*
1610 * if IA32_MCG_CTL is not all 1s, the uncorrected error
1611 * reporting is disabled
1612 */
1613 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
1614 vcpu->arch.mcg_ctl != ~(u64)0)
1615 return 0;
1616 banks += 4 * mce->bank;
1617 /*
1618 * if IA32_MCi_CTL is not all 1s, the uncorrected error
1619 * reporting is disabled for the bank
1620 */
1621 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
1622 return 0;
1623 if (mce->status & MCI_STATUS_UC) {
1624 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
1625 !(vcpu->arch.cr4 & X86_CR4_MCE)) {
1626 printk(KERN_DEBUG "kvm: set_mce: "
1627 "injects mce exception while "
1628 "previous one is in progress!\n");
1629 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
1630 return 0;
1631 }
1632 if (banks[1] & MCI_STATUS_VAL)
1633 mce->status |= MCI_STATUS_OVER;
1634 banks[2] = mce->addr;
1635 banks[3] = mce->misc;
1636 vcpu->arch.mcg_status = mce->mcg_status;
1637 banks[1] = mce->status;
1638 kvm_queue_exception(vcpu, MC_VECTOR);
1639 } else if (!(banks[1] & MCI_STATUS_VAL)
1640 || !(banks[1] & MCI_STATUS_UC)) {
1641 if (banks[1] & MCI_STATUS_VAL)
1642 mce->status |= MCI_STATUS_OVER;
1643 banks[2] = mce->addr;
1644 banks[3] = mce->misc;
1645 banks[1] = mce->status;
1646 } else
1647 banks[1] |= MCI_STATUS_OVER;
1648 return 0;
1649}
1650
1505long kvm_arch_vcpu_ioctl(struct file *filp, 1651long kvm_arch_vcpu_ioctl(struct file *filp,
1506 unsigned int ioctl, unsigned long arg) 1652 unsigned int ioctl, unsigned long arg)
1507{ 1653{
@@ -1635,6 +1781,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1635 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); 1781 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
1636 break; 1782 break;
1637 } 1783 }
1784 case KVM_X86_SETUP_MCE: {
1785 u64 mcg_cap;
1786
1787 r = -EFAULT;
1788 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
1789 goto out;
1790 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
1791 break;
1792 }
1793 case KVM_X86_SET_MCE: {
1794 struct kvm_x86_mce mce;
1795
1796 r = -EFAULT;
1797 if (copy_from_user(&mce, argp, sizeof mce))
1798 goto out;
1799 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
1800 break;
1801 }
1638 default: 1802 default:
1639 r = -EINVAL; 1803 r = -EINVAL;
1640 } 1804 }
@@ -4440,6 +4604,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
4440 goto fail_mmu_destroy; 4604 goto fail_mmu_destroy;
4441 } 4605 }
4442 4606
4607 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
4608 GFP_KERNEL);
4609 if (!vcpu->arch.mce_banks) {
4610 r = -ENOMEM;
4611 goto fail_mmu_destroy;
4612 }
4613 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
4614
4443 return 0; 4615 return 0;
4444 4616
4445fail_mmu_destroy: 4617fail_mmu_destroy:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 3db5d8d37485..7b17141c47c9 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -415,6 +415,9 @@ struct kvm_trace_rec {
415#define KVM_CAP_ASSIGN_DEV_IRQ 29 415#define KVM_CAP_ASSIGN_DEV_IRQ 29
416/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ 416/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
417#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 417#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
418#ifdef __KVM_HAVE_MCE
419#define KVM_CAP_MCE 31
420#endif
418 421
419#ifdef KVM_CAP_IRQ_ROUTING 422#ifdef KVM_CAP_IRQ_ROUTING
420 423
@@ -454,6 +457,19 @@ struct kvm_irq_routing {
454 457
455#endif 458#endif
456 459
460#ifdef KVM_CAP_MCE
461/* x86 MCE */
462struct kvm_x86_mce {
463 __u64 status;
464 __u64 addr;
465 __u64 misc;
466 __u64 mcg_status;
467 __u8 bank;
468 __u8 pad1[7];
469 __u64 pad2[3];
470};
471#endif
472
457/* 473/*
458 * ioctls for VM fds 474 * ioctls for VM fds
459 */ 475 */
@@ -541,6 +557,10 @@ struct kvm_irq_routing {
541#define KVM_NMI _IO(KVMIO, 0x9a) 557#define KVM_NMI _IO(KVMIO, 0x9a)
542/* Available with KVM_CAP_SET_GUEST_DEBUG */ 558/* Available with KVM_CAP_SET_GUEST_DEBUG */
543#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) 559#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
560/* MCE for x86 */
561#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64)
562#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64)
563#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce)
544 564
545/* 565/*
546 * Deprecated interfaces 566 * Deprecated interfaces