aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 20:43:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 20:43:43 -0400
commit69def9f05dfce3281bb06599057e6b8097385d39 (patch)
tree7d826b22924268ddbfad101993b248996d40e2ec /arch/x86/kvm/x86.c
parent353f6dd2dec992ddd34620a94b051b0f76227379 (diff)
parent8e616fc8d343bd7f0f0a0c22407fdcb77f6d22b1 (diff)
Merge branch 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (202 commits) MAINTAINERS: update KVM entry KVM: correct error-handling code KVM: fix compile warnings on s390 KVM: VMX: Check cpl before emulating debug register access KVM: fix misreporting of coalesced interrupts by kvm tracer KVM: x86: drop duplicate kvm_flush_remote_tlb calls KVM: VMX: call vmx_load_host_state() only if msr is cached KVM: VMX: Conditionally reload debug register 6 KVM: Use thread debug register storage instead of kvm specific data KVM guest: do not batch pte updates from interrupt context KVM: Fix coalesced interrupt reporting in IOAPIC KVM guest: fix bogus wallclock physical address calculation KVM: VMX: Fix cr8 exiting control clobbering by EPT KVM: Optimize kvm_mmu_unprotect_page_virt() for tdp KVM: Document KVM_CAP_IRQCHIP KVM: Protect update_cr8_intercept() when running without an apic KVM: VMX: Fix EPT with WP bit change during paging KVM: Use kvm_{read,write}_guest_virt() to read and write segment descriptors KVM: x86 emulator: Add adc and sbb missing decoder flags KVM: Add missing #include ...
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c815
1 files changed, 576 insertions, 239 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 633ccc7400a..be451ee4424 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -37,11 +37,16 @@
37#include <linux/iommu.h> 37#include <linux/iommu.h>
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/cpufreq.h> 39#include <linux/cpufreq.h>
40#include <trace/events/kvm.h>
41#undef TRACE_INCLUDE_FILE
42#define CREATE_TRACE_POINTS
43#include "trace.h"
40 44
41#include <asm/uaccess.h> 45#include <asm/uaccess.h>
42#include <asm/msr.h> 46#include <asm/msr.h>
43#include <asm/desc.h> 47#include <asm/desc.h>
44#include <asm/mtrr.h> 48#include <asm/mtrr.h>
49#include <asm/mce.h>
45 50
46#define MAX_IO_MSRS 256 51#define MAX_IO_MSRS 256
47#define CR0_RESERVED_BITS \ 52#define CR0_RESERVED_BITS \
@@ -55,6 +60,10 @@
55 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 60 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
56 61
57#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 62#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
63
64#define KVM_MAX_MCE_BANKS 32
65#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
66
58/* EFER defaults: 67/* EFER defaults:
59 * - enable syscall per default because its emulated by KVM 68 * - enable syscall per default because its emulated by KVM
60 * - enable LME and LMA per default on 64 bit KVM 69 * - enable LME and LMA per default on 64 bit KVM
@@ -68,14 +77,16 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
68#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 77#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
69#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 78#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
70 79
80static void update_cr8_intercept(struct kvm_vcpu *vcpu);
71static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 81static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
72 struct kvm_cpuid_entry2 __user *entries); 82 struct kvm_cpuid_entry2 __user *entries);
73struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
74 u32 function, u32 index);
75 83
76struct kvm_x86_ops *kvm_x86_ops; 84struct kvm_x86_ops *kvm_x86_ops;
77EXPORT_SYMBOL_GPL(kvm_x86_ops); 85EXPORT_SYMBOL_GPL(kvm_x86_ops);
78 86
87int ignore_msrs = 0;
88module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
89
79struct kvm_stats_debugfs_item debugfs_entries[] = { 90struct kvm_stats_debugfs_item debugfs_entries[] = {
80 { "pf_fixed", VCPU_STAT(pf_fixed) }, 91 { "pf_fixed", VCPU_STAT(pf_fixed) },
81 { "pf_guest", VCPU_STAT(pf_guest) }, 92 { "pf_guest", VCPU_STAT(pf_guest) },
@@ -122,18 +133,16 @@ unsigned long segment_base(u16 selector)
122 if (selector == 0) 133 if (selector == 0)
123 return 0; 134 return 0;
124 135
125 asm("sgdt %0" : "=m"(gdt)); 136 kvm_get_gdt(&gdt);
126 table_base = gdt.base; 137 table_base = gdt.base;
127 138
128 if (selector & 4) { /* from ldt */ 139 if (selector & 4) { /* from ldt */
129 u16 ldt_selector; 140 u16 ldt_selector = kvm_read_ldt();
130 141
131 asm("sldt %0" : "=g"(ldt_selector));
132 table_base = segment_base(ldt_selector); 142 table_base = segment_base(ldt_selector);
133 } 143 }
134 d = (struct desc_struct *)(table_base + (selector & ~7)); 144 d = (struct desc_struct *)(table_base + (selector & ~7));
135 v = d->base0 | ((unsigned long)d->base1 << 16) | 145 v = get_desc_base(d);
136 ((unsigned long)d->base2 << 24);
137#ifdef CONFIG_X86_64 146#ifdef CONFIG_X86_64
138 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) 147 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
139 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; 148 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
@@ -176,16 +185,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
176 ++vcpu->stat.pf_guest; 185 ++vcpu->stat.pf_guest;
177 186
178 if (vcpu->arch.exception.pending) { 187 if (vcpu->arch.exception.pending) {
179 if (vcpu->arch.exception.nr == PF_VECTOR) { 188 switch(vcpu->arch.exception.nr) {
180 printk(KERN_DEBUG "kvm: inject_page_fault:" 189 case DF_VECTOR:
181 " double fault 0x%lx\n", addr);
182 vcpu->arch.exception.nr = DF_VECTOR;
183 vcpu->arch.exception.error_code = 0;
184 } else if (vcpu->arch.exception.nr == DF_VECTOR) {
185 /* triple fault -> shutdown */ 190 /* triple fault -> shutdown */
186 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 191 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
192 return;
193 case PF_VECTOR:
194 vcpu->arch.exception.nr = DF_VECTOR;
195 vcpu->arch.exception.error_code = 0;
196 return;
197 default:
198 /* replace previous exception with a new one in a hope
199 that instruction re-execution will regenerate lost
200 exception */
201 vcpu->arch.exception.pending = false;
202 break;
187 } 203 }
188 return;
189 } 204 }
190 vcpu->arch.cr2 = addr; 205 vcpu->arch.cr2 = addr;
191 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 206 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
@@ -207,12 +222,18 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
207} 222}
208EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 223EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
209 224
210static void __queue_exception(struct kvm_vcpu *vcpu) 225/*
226 * Checks if cpl <= required_cpl; if true, return true. Otherwise queue
227 * a #GP and return false.
228 */
229bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
211{ 230{
212 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, 231 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
213 vcpu->arch.exception.has_error_code, 232 return true;
214 vcpu->arch.exception.error_code); 233 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
234 return false;
215} 235}
236EXPORT_SYMBOL_GPL(kvm_require_cpl);
216 237
217/* 238/*
218 * Load the pae pdptrs. Return true is they are all valid. 239 * Load the pae pdptrs. Return true is they are all valid.
@@ -232,7 +253,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
232 goto out; 253 goto out;
233 } 254 }
234 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { 255 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
235 if (is_present_pte(pdpte[i]) && 256 if (is_present_gpte(pdpte[i]) &&
236 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) { 257 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
237 ret = 0; 258 ret = 0;
238 goto out; 259 goto out;
@@ -241,6 +262,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
241 ret = 1; 262 ret = 1;
242 263
243 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); 264 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
265 __set_bit(VCPU_EXREG_PDPTR,
266 (unsigned long *)&vcpu->arch.regs_avail);
267 __set_bit(VCPU_EXREG_PDPTR,
268 (unsigned long *)&vcpu->arch.regs_dirty);
244out: 269out:
245 270
246 return ret; 271 return ret;
@@ -256,6 +281,10 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
256 if (is_long_mode(vcpu) || !is_pae(vcpu)) 281 if (is_long_mode(vcpu) || !is_pae(vcpu))
257 return false; 282 return false;
258 283
284 if (!test_bit(VCPU_EXREG_PDPTR,
285 (unsigned long *)&vcpu->arch.regs_avail))
286 return true;
287
259 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); 288 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
260 if (r < 0) 289 if (r < 0)
261 goto out; 290 goto out;
@@ -328,9 +357,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
328void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 357void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
329{ 358{
330 kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); 359 kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
331 KVMTRACE_1D(LMSW, vcpu,
332 (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
333 handler);
334} 360}
335EXPORT_SYMBOL_GPL(kvm_lmsw); 361EXPORT_SYMBOL_GPL(kvm_lmsw);
336 362
@@ -466,7 +492,7 @@ static u32 msrs_to_save[] = {
466#ifdef CONFIG_X86_64 492#ifdef CONFIG_X86_64
467 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 493 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
468#endif 494#endif
469 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 495 MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
470 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA 496 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
471}; 497};
472 498
@@ -644,8 +670,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
644 670
645 /* Keep irq disabled to prevent changes to the clock */ 671 /* Keep irq disabled to prevent changes to the clock */
646 local_irq_save(flags); 672 local_irq_save(flags);
647 kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, 673 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
648 &vcpu->hv_clock.tsc_timestamp);
649 ktime_get_ts(&ts); 674 ktime_get_ts(&ts);
650 local_irq_restore(flags); 675 local_irq_restore(flags);
651 676
@@ -778,23 +803,60 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
778 return 0; 803 return 0;
779} 804}
780 805
806static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
807{
808 u64 mcg_cap = vcpu->arch.mcg_cap;
809 unsigned bank_num = mcg_cap & 0xff;
810
811 switch (msr) {
812 case MSR_IA32_MCG_STATUS:
813 vcpu->arch.mcg_status = data;
814 break;
815 case MSR_IA32_MCG_CTL:
816 if (!(mcg_cap & MCG_CTL_P))
817 return 1;
818 if (data != 0 && data != ~(u64)0)
819 return -1;
820 vcpu->arch.mcg_ctl = data;
821 break;
822 default:
823 if (msr >= MSR_IA32_MC0_CTL &&
824 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
825 u32 offset = msr - MSR_IA32_MC0_CTL;
826 /* only 0 or all 1s can be written to IA32_MCi_CTL */
827 if ((offset & 0x3) == 0 &&
828 data != 0 && data != ~(u64)0)
829 return -1;
830 vcpu->arch.mce_banks[offset] = data;
831 break;
832 }
833 return 1;
834 }
835 return 0;
836}
837
781int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 838int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
782{ 839{
783 switch (msr) { 840 switch (msr) {
784 case MSR_EFER: 841 case MSR_EFER:
785 set_efer(vcpu, data); 842 set_efer(vcpu, data);
786 break; 843 break;
787 case MSR_IA32_MC0_STATUS: 844 case MSR_K7_HWCR:
788 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", 845 data &= ~(u64)0x40; /* ignore flush filter disable */
789 __func__, data); 846 if (data != 0) {
847 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
848 data);
849 return 1;
850 }
790 break; 851 break;
791 case MSR_IA32_MCG_STATUS: 852 case MSR_FAM10H_MMIO_CONF_BASE:
792 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", 853 if (data != 0) {
793 __func__, data); 854 pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
855 "0x%llx\n", data);
856 return 1;
857 }
794 break; 858 break;
795 case MSR_IA32_MCG_CTL: 859 case MSR_AMD64_NB_CFG:
796 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
797 __func__, data);
798 break; 860 break;
799 case MSR_IA32_DEBUGCTLMSR: 861 case MSR_IA32_DEBUGCTLMSR:
800 if (!data) { 862 if (!data) {
@@ -811,12 +873,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
811 case MSR_IA32_UCODE_REV: 873 case MSR_IA32_UCODE_REV:
812 case MSR_IA32_UCODE_WRITE: 874 case MSR_IA32_UCODE_WRITE:
813 case MSR_VM_HSAVE_PA: 875 case MSR_VM_HSAVE_PA:
876 case MSR_AMD64_PATCH_LOADER:
814 break; 877 break;
815 case 0x200 ... 0x2ff: 878 case 0x200 ... 0x2ff:
816 return set_msr_mtrr(vcpu, msr, data); 879 return set_msr_mtrr(vcpu, msr, data);
817 case MSR_IA32_APICBASE: 880 case MSR_IA32_APICBASE:
818 kvm_set_apic_base(vcpu, data); 881 kvm_set_apic_base(vcpu, data);
819 break; 882 break;
883 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
884 return kvm_x2apic_msr_write(vcpu, msr, data);
820 case MSR_IA32_MISC_ENABLE: 885 case MSR_IA32_MISC_ENABLE:
821 vcpu->arch.ia32_misc_enable_msr = data; 886 vcpu->arch.ia32_misc_enable_msr = data;
822 break; 887 break;
@@ -850,9 +915,50 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
850 kvm_request_guest_time_update(vcpu); 915 kvm_request_guest_time_update(vcpu);
851 break; 916 break;
852 } 917 }
918 case MSR_IA32_MCG_CTL:
919 case MSR_IA32_MCG_STATUS:
920 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
921 return set_msr_mce(vcpu, msr, data);
922
923 /* Performance counters are not protected by a CPUID bit,
924 * so we should check all of them in the generic path for the sake of
925 * cross vendor migration.
926 * Writing a zero into the event select MSRs disables them,
927 * which we perfectly emulate ;-). Any other value should be at least
928 * reported, some guests depend on them.
929 */
930 case MSR_P6_EVNTSEL0:
931 case MSR_P6_EVNTSEL1:
932 case MSR_K7_EVNTSEL0:
933 case MSR_K7_EVNTSEL1:
934 case MSR_K7_EVNTSEL2:
935 case MSR_K7_EVNTSEL3:
936 if (data != 0)
937 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
938 "0x%x data 0x%llx\n", msr, data);
939 break;
940 /* at least RHEL 4 unconditionally writes to the perfctr registers,
941 * so we ignore writes to make it happy.
942 */
943 case MSR_P6_PERFCTR0:
944 case MSR_P6_PERFCTR1:
945 case MSR_K7_PERFCTR0:
946 case MSR_K7_PERFCTR1:
947 case MSR_K7_PERFCTR2:
948 case MSR_K7_PERFCTR3:
949 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
950 "0x%x data 0x%llx\n", msr, data);
951 break;
853 default: 952 default:
854 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); 953 if (!ignore_msrs) {
855 return 1; 954 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
955 msr, data);
956 return 1;
957 } else {
958 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
959 msr, data);
960 break;
961 }
856 } 962 }
857 return 0; 963 return 0;
858} 964}
@@ -905,26 +1011,47 @@ static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
905 return 0; 1011 return 0;
906} 1012}
907 1013
908int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1014static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
909{ 1015{
910 u64 data; 1016 u64 data;
1017 u64 mcg_cap = vcpu->arch.mcg_cap;
1018 unsigned bank_num = mcg_cap & 0xff;
911 1019
912 switch (msr) { 1020 switch (msr) {
913 case 0xc0010010: /* SYSCFG */
914 case 0xc0010015: /* HWCR */
915 case MSR_IA32_PLATFORM_ID:
916 case MSR_IA32_P5_MC_ADDR: 1021 case MSR_IA32_P5_MC_ADDR:
917 case MSR_IA32_P5_MC_TYPE: 1022 case MSR_IA32_P5_MC_TYPE:
918 case MSR_IA32_MC0_CTL: 1023 data = 0;
919 case MSR_IA32_MCG_STATUS: 1024 break;
920 case MSR_IA32_MCG_CAP: 1025 case MSR_IA32_MCG_CAP:
1026 data = vcpu->arch.mcg_cap;
1027 break;
921 case MSR_IA32_MCG_CTL: 1028 case MSR_IA32_MCG_CTL:
922 case MSR_IA32_MC0_MISC: 1029 if (!(mcg_cap & MCG_CTL_P))
923 case MSR_IA32_MC0_MISC+4: 1030 return 1;
924 case MSR_IA32_MC0_MISC+8: 1031 data = vcpu->arch.mcg_ctl;
925 case MSR_IA32_MC0_MISC+12: 1032 break;
926 case MSR_IA32_MC0_MISC+16: 1033 case MSR_IA32_MCG_STATUS:
927 case MSR_IA32_MC0_MISC+20: 1034 data = vcpu->arch.mcg_status;
1035 break;
1036 default:
1037 if (msr >= MSR_IA32_MC0_CTL &&
1038 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1039 u32 offset = msr - MSR_IA32_MC0_CTL;
1040 data = vcpu->arch.mce_banks[offset];
1041 break;
1042 }
1043 return 1;
1044 }
1045 *pdata = data;
1046 return 0;
1047}
1048
1049int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1050{
1051 u64 data;
1052
1053 switch (msr) {
1054 case MSR_IA32_PLATFORM_ID:
928 case MSR_IA32_UCODE_REV: 1055 case MSR_IA32_UCODE_REV:
929 case MSR_IA32_EBL_CR_POWERON: 1056 case MSR_IA32_EBL_CR_POWERON:
930 case MSR_IA32_DEBUGCTLMSR: 1057 case MSR_IA32_DEBUGCTLMSR:
@@ -932,10 +1059,18 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
932 case MSR_IA32_LASTBRANCHTOIP: 1059 case MSR_IA32_LASTBRANCHTOIP:
933 case MSR_IA32_LASTINTFROMIP: 1060 case MSR_IA32_LASTINTFROMIP:
934 case MSR_IA32_LASTINTTOIP: 1061 case MSR_IA32_LASTINTTOIP:
1062 case MSR_K8_SYSCFG:
1063 case MSR_K7_HWCR:
935 case MSR_VM_HSAVE_PA: 1064 case MSR_VM_HSAVE_PA:
1065 case MSR_P6_PERFCTR0:
1066 case MSR_P6_PERFCTR1:
936 case MSR_P6_EVNTSEL0: 1067 case MSR_P6_EVNTSEL0:
937 case MSR_P6_EVNTSEL1: 1068 case MSR_P6_EVNTSEL1:
938 case MSR_K7_EVNTSEL0: 1069 case MSR_K7_EVNTSEL0:
1070 case MSR_K7_PERFCTR0:
1071 case MSR_K8_INT_PENDING_MSG:
1072 case MSR_AMD64_NB_CFG:
1073 case MSR_FAM10H_MMIO_CONF_BASE:
939 data = 0; 1074 data = 0;
940 break; 1075 break;
941 case MSR_MTRRcap: 1076 case MSR_MTRRcap:
@@ -949,6 +1084,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
949 case MSR_IA32_APICBASE: 1084 case MSR_IA32_APICBASE:
950 data = kvm_get_apic_base(vcpu); 1085 data = kvm_get_apic_base(vcpu);
951 break; 1086 break;
1087 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1088 return kvm_x2apic_msr_read(vcpu, msr, pdata);
1089 break;
952 case MSR_IA32_MISC_ENABLE: 1090 case MSR_IA32_MISC_ENABLE:
953 data = vcpu->arch.ia32_misc_enable_msr; 1091 data = vcpu->arch.ia32_misc_enable_msr;
954 break; 1092 break;
@@ -967,9 +1105,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
967 case MSR_KVM_SYSTEM_TIME: 1105 case MSR_KVM_SYSTEM_TIME:
968 data = vcpu->arch.time; 1106 data = vcpu->arch.time;
969 break; 1107 break;
1108 case MSR_IA32_P5_MC_ADDR:
1109 case MSR_IA32_P5_MC_TYPE:
1110 case MSR_IA32_MCG_CAP:
1111 case MSR_IA32_MCG_CTL:
1112 case MSR_IA32_MCG_STATUS:
1113 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1114 return get_msr_mce(vcpu, msr, pdata);
970 default: 1115 default:
971 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 1116 if (!ignore_msrs) {
972 return 1; 1117 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
1118 return 1;
1119 } else {
1120 pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
1121 data = 0;
1122 }
1123 break;
973 } 1124 }
974 *pdata = data; 1125 *pdata = data;
975 return 0; 1126 return 0;
@@ -1068,6 +1219,11 @@ int kvm_dev_ioctl_check_extension(long ext)
1068 case KVM_CAP_REINJECT_CONTROL: 1219 case KVM_CAP_REINJECT_CONTROL:
1069 case KVM_CAP_IRQ_INJECT_STATUS: 1220 case KVM_CAP_IRQ_INJECT_STATUS:
1070 case KVM_CAP_ASSIGN_DEV_IRQ: 1221 case KVM_CAP_ASSIGN_DEV_IRQ:
1222 case KVM_CAP_IRQFD:
1223 case KVM_CAP_IOEVENTFD:
1224 case KVM_CAP_PIT2:
1225 case KVM_CAP_PIT_STATE2:
1226 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
1071 r = 1; 1227 r = 1;
1072 break; 1228 break;
1073 case KVM_CAP_COALESCED_MMIO: 1229 case KVM_CAP_COALESCED_MMIO:
@@ -1088,6 +1244,9 @@ int kvm_dev_ioctl_check_extension(long ext)
1088 case KVM_CAP_IOMMU: 1244 case KVM_CAP_IOMMU:
1089 r = iommu_found(); 1245 r = iommu_found();
1090 break; 1246 break;
1247 case KVM_CAP_MCE:
1248 r = KVM_MAX_MCE_BANKS;
1249 break;
1091 default: 1250 default:
1092 r = 0; 1251 r = 0;
1093 break; 1252 break;
@@ -1147,6 +1306,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
1147 r = 0; 1306 r = 0;
1148 break; 1307 break;
1149 } 1308 }
1309 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
1310 u64 mce_cap;
1311
1312 mce_cap = KVM_MCE_CAP_SUPPORTED;
1313 r = -EFAULT;
1314 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
1315 goto out;
1316 r = 0;
1317 break;
1318 }
1150 default: 1319 default:
1151 r = -EINVAL; 1320 r = -EINVAL;
1152 } 1321 }
@@ -1227,6 +1396,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1227 vcpu->arch.cpuid_nent = cpuid->nent; 1396 vcpu->arch.cpuid_nent = cpuid->nent;
1228 cpuid_fix_nx_cap(vcpu); 1397 cpuid_fix_nx_cap(vcpu);
1229 r = 0; 1398 r = 0;
1399 kvm_apic_set_version(vcpu);
1230 1400
1231out_free: 1401out_free:
1232 vfree(cpuid_entries); 1402 vfree(cpuid_entries);
@@ -1248,6 +1418,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1248 cpuid->nent * sizeof(struct kvm_cpuid_entry2))) 1418 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
1249 goto out; 1419 goto out;
1250 vcpu->arch.cpuid_nent = cpuid->nent; 1420 vcpu->arch.cpuid_nent = cpuid->nent;
1421 kvm_apic_set_version(vcpu);
1251 return 0; 1422 return 0;
1252 1423
1253out: 1424out:
@@ -1290,6 +1461,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1290 u32 index, int *nent, int maxnent) 1461 u32 index, int *nent, int maxnent)
1291{ 1462{
1292 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 1463 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
1464 unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
1293#ifdef CONFIG_X86_64 1465#ifdef CONFIG_X86_64
1294 unsigned f_lm = F(LM); 1466 unsigned f_lm = F(LM);
1295#else 1467#else
@@ -1314,7 +1486,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1314 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | 1486 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1315 F(PAT) | F(PSE36) | 0 /* Reserved */ | 1487 F(PAT) | F(PSE36) | 0 /* Reserved */ |
1316 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | 1488 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
1317 F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ | 1489 F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ |
1318 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1490 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
1319 /* cpuid 1.ecx */ 1491 /* cpuid 1.ecx */
1320 const u32 kvm_supported_word4_x86_features = 1492 const u32 kvm_supported_word4_x86_features =
@@ -1323,7 +1495,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1323 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | 1495 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
1324 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 1496 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
1325 0 /* Reserved, DCA */ | F(XMM4_1) | 1497 0 /* Reserved, DCA */ | F(XMM4_1) |
1326 F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) | 1498 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
1327 0 /* Reserved, XSAVE, OSXSAVE */; 1499 0 /* Reserved, XSAVE, OSXSAVE */;
1328 /* cpuid 0x80000001.ecx */ 1500 /* cpuid 0x80000001.ecx */
1329 const u32 kvm_supported_word6_x86_features = 1501 const u32 kvm_supported_word6_x86_features =
@@ -1344,6 +1516,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1344 case 1: 1516 case 1:
1345 entry->edx &= kvm_supported_word0_x86_features; 1517 entry->edx &= kvm_supported_word0_x86_features;
1346 entry->ecx &= kvm_supported_word4_x86_features; 1518 entry->ecx &= kvm_supported_word4_x86_features;
1519 /* we support x2apic emulation even if host does not support
1520 * it since we emulate x2apic in software */
1521 entry->ecx |= F(X2APIC);
1347 break; 1522 break;
1348 /* function 2 entries are STATEFUL. That is, repeated cpuid commands 1523 /* function 2 entries are STATEFUL. That is, repeated cpuid commands
1349 * may return different values. This forces us to get_cpu() before 1524 * may return different values. This forces us to get_cpu() before
@@ -1435,6 +1610,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1435 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) 1610 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
1436 do_cpuid_ent(&cpuid_entries[nent], func, 0, 1611 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1437 &nent, cpuid->nent); 1612 &nent, cpuid->nent);
1613 r = -E2BIG;
1614 if (nent >= cpuid->nent)
1615 goto out_free;
1616
1438 r = -EFAULT; 1617 r = -EFAULT;
1439 if (copy_to_user(entries, cpuid_entries, 1618 if (copy_to_user(entries, cpuid_entries,
1440 nent * sizeof(struct kvm_cpuid_entry2))) 1619 nent * sizeof(struct kvm_cpuid_entry2)))
@@ -1464,6 +1643,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
1464 vcpu_load(vcpu); 1643 vcpu_load(vcpu);
1465 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 1644 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
1466 kvm_apic_post_state_restore(vcpu); 1645 kvm_apic_post_state_restore(vcpu);
1646 update_cr8_intercept(vcpu);
1467 vcpu_put(vcpu); 1647 vcpu_put(vcpu);
1468 1648
1469 return 0; 1649 return 0;
@@ -1503,6 +1683,80 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
1503 return 0; 1683 return 0;
1504} 1684}
1505 1685
1686static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
1687 u64 mcg_cap)
1688{
1689 int r;
1690 unsigned bank_num = mcg_cap & 0xff, bank;
1691
1692 r = -EINVAL;
1693 if (!bank_num)
1694 goto out;
1695 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
1696 goto out;
1697 r = 0;
1698 vcpu->arch.mcg_cap = mcg_cap;
1699 /* Init IA32_MCG_CTL to all 1s */
1700 if (mcg_cap & MCG_CTL_P)
1701 vcpu->arch.mcg_ctl = ~(u64)0;
1702 /* Init IA32_MCi_CTL to all 1s */
1703 for (bank = 0; bank < bank_num; bank++)
1704 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
1705out:
1706 return r;
1707}
1708
1709static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
1710 struct kvm_x86_mce *mce)
1711{
1712 u64 mcg_cap = vcpu->arch.mcg_cap;
1713 unsigned bank_num = mcg_cap & 0xff;
1714 u64 *banks = vcpu->arch.mce_banks;
1715
1716 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
1717 return -EINVAL;
1718 /*
1719 * if IA32_MCG_CTL is not all 1s, the uncorrected error
1720 * reporting is disabled
1721 */
1722 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
1723 vcpu->arch.mcg_ctl != ~(u64)0)
1724 return 0;
1725 banks += 4 * mce->bank;
1726 /*
1727 * if IA32_MCi_CTL is not all 1s, the uncorrected error
1728 * reporting is disabled for the bank
1729 */
1730 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
1731 return 0;
1732 if (mce->status & MCI_STATUS_UC) {
1733 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
1734 !(vcpu->arch.cr4 & X86_CR4_MCE)) {
1735 printk(KERN_DEBUG "kvm: set_mce: "
1736 "injects mce exception while "
1737 "previous one is in progress!\n");
1738 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
1739 return 0;
1740 }
1741 if (banks[1] & MCI_STATUS_VAL)
1742 mce->status |= MCI_STATUS_OVER;
1743 banks[2] = mce->addr;
1744 banks[3] = mce->misc;
1745 vcpu->arch.mcg_status = mce->mcg_status;
1746 banks[1] = mce->status;
1747 kvm_queue_exception(vcpu, MC_VECTOR);
1748 } else if (!(banks[1] & MCI_STATUS_VAL)
1749 || !(banks[1] & MCI_STATUS_UC)) {
1750 if (banks[1] & MCI_STATUS_VAL)
1751 mce->status |= MCI_STATUS_OVER;
1752 banks[2] = mce->addr;
1753 banks[3] = mce->misc;
1754 banks[1] = mce->status;
1755 } else
1756 banks[1] |= MCI_STATUS_OVER;
1757 return 0;
1758}
1759
1506long kvm_arch_vcpu_ioctl(struct file *filp, 1760long kvm_arch_vcpu_ioctl(struct file *filp,
1507 unsigned int ioctl, unsigned long arg) 1761 unsigned int ioctl, unsigned long arg)
1508{ 1762{
@@ -1636,6 +1890,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1636 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); 1890 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
1637 break; 1891 break;
1638 } 1892 }
1893 case KVM_X86_SETUP_MCE: {
1894 u64 mcg_cap;
1895
1896 r = -EFAULT;
1897 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
1898 goto out;
1899 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
1900 break;
1901 }
1902 case KVM_X86_SET_MCE: {
1903 struct kvm_x86_mce mce;
1904
1905 r = -EFAULT;
1906 if (copy_from_user(&mce, argp, sizeof mce))
1907 goto out;
1908 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
1909 break;
1910 }
1639 default: 1911 default:
1640 r = -EINVAL; 1912 r = -EINVAL;
1641 } 1913 }
@@ -1654,6 +1926,13 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
1654 return ret; 1926 return ret;
1655} 1927}
1656 1928
1929static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
1930 u64 ident_addr)
1931{
1932 kvm->arch.ept_identity_map_addr = ident_addr;
1933 return 0;
1934}
1935
1657static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, 1936static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
1658 u32 kvm_nr_mmu_pages) 1937 u32 kvm_nr_mmu_pages)
1659{ 1938{
@@ -1775,19 +2054,25 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
1775 r = 0; 2054 r = 0;
1776 switch (chip->chip_id) { 2055 switch (chip->chip_id) {
1777 case KVM_IRQCHIP_PIC_MASTER: 2056 case KVM_IRQCHIP_PIC_MASTER:
2057 spin_lock(&pic_irqchip(kvm)->lock);
1778 memcpy(&pic_irqchip(kvm)->pics[0], 2058 memcpy(&pic_irqchip(kvm)->pics[0],
1779 &chip->chip.pic, 2059 &chip->chip.pic,
1780 sizeof(struct kvm_pic_state)); 2060 sizeof(struct kvm_pic_state));
2061 spin_unlock(&pic_irqchip(kvm)->lock);
1781 break; 2062 break;
1782 case KVM_IRQCHIP_PIC_SLAVE: 2063 case KVM_IRQCHIP_PIC_SLAVE:
2064 spin_lock(&pic_irqchip(kvm)->lock);
1783 memcpy(&pic_irqchip(kvm)->pics[1], 2065 memcpy(&pic_irqchip(kvm)->pics[1],
1784 &chip->chip.pic, 2066 &chip->chip.pic,
1785 sizeof(struct kvm_pic_state)); 2067 sizeof(struct kvm_pic_state));
2068 spin_unlock(&pic_irqchip(kvm)->lock);
1786 break; 2069 break;
1787 case KVM_IRQCHIP_IOAPIC: 2070 case KVM_IRQCHIP_IOAPIC:
2071 mutex_lock(&kvm->irq_lock);
1788 memcpy(ioapic_irqchip(kvm), 2072 memcpy(ioapic_irqchip(kvm),
1789 &chip->chip.ioapic, 2073 &chip->chip.ioapic,
1790 sizeof(struct kvm_ioapic_state)); 2074 sizeof(struct kvm_ioapic_state));
2075 mutex_unlock(&kvm->irq_lock);
1791 break; 2076 break;
1792 default: 2077 default:
1793 r = -EINVAL; 2078 r = -EINVAL;
@@ -1801,7 +2086,9 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1801{ 2086{
1802 int r = 0; 2087 int r = 0;
1803 2088
2089 mutex_lock(&kvm->arch.vpit->pit_state.lock);
1804 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); 2090 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
2091 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
1805 return r; 2092 return r;
1806} 2093}
1807 2094
@@ -1809,8 +2096,39 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1809{ 2096{
1810 int r = 0; 2097 int r = 0;
1811 2098
2099 mutex_lock(&kvm->arch.vpit->pit_state.lock);
1812 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); 2100 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
1813 kvm_pit_load_count(kvm, 0, ps->channels[0].count); 2101 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
2102 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2103 return r;
2104}
2105
2106static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
2107{
2108 int r = 0;
2109
2110 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2111 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
2112 sizeof(ps->channels));
2113 ps->flags = kvm->arch.vpit->pit_state.flags;
2114 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2115 return r;
2116}
2117
2118static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
2119{
2120 int r = 0, start = 0;
2121 u32 prev_legacy, cur_legacy;
2122 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2123 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
2124 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
2125 if (!prev_legacy && cur_legacy)
2126 start = 1;
2127 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
2128 sizeof(kvm->arch.vpit->pit_state.channels));
2129 kvm->arch.vpit->pit_state.flags = ps->flags;
2130 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
2131 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
1814 return r; 2132 return r;
1815} 2133}
1816 2134
@@ -1819,7 +2137,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
1819{ 2137{
1820 if (!kvm->arch.vpit) 2138 if (!kvm->arch.vpit)
1821 return -ENXIO; 2139 return -ENXIO;
2140 mutex_lock(&kvm->arch.vpit->pit_state.lock);
1822 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; 2141 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
2142 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
1823 return 0; 2143 return 0;
1824} 2144}
1825 2145
@@ -1845,7 +2165,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1845 spin_lock(&kvm->mmu_lock); 2165 spin_lock(&kvm->mmu_lock);
1846 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2166 kvm_mmu_slot_remove_write_access(kvm, log->slot);
1847 spin_unlock(&kvm->mmu_lock); 2167 spin_unlock(&kvm->mmu_lock);
1848 kvm_flush_remote_tlbs(kvm);
1849 memslot = &kvm->memslots[log->slot]; 2168 memslot = &kvm->memslots[log->slot];
1850 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2169 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
1851 memset(memslot->dirty_bitmap, 0, n); 2170 memset(memslot->dirty_bitmap, 0, n);
@@ -1869,7 +2188,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
1869 */ 2188 */
1870 union { 2189 union {
1871 struct kvm_pit_state ps; 2190 struct kvm_pit_state ps;
2191 struct kvm_pit_state2 ps2;
1872 struct kvm_memory_alias alias; 2192 struct kvm_memory_alias alias;
2193 struct kvm_pit_config pit_config;
1873 } u; 2194 } u;
1874 2195
1875 switch (ioctl) { 2196 switch (ioctl) {
@@ -1878,6 +2199,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
1878 if (r < 0) 2199 if (r < 0)
1879 goto out; 2200 goto out;
1880 break; 2201 break;
2202 case KVM_SET_IDENTITY_MAP_ADDR: {
2203 u64 ident_addr;
2204
2205 r = -EFAULT;
2206 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
2207 goto out;
2208 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
2209 if (r < 0)
2210 goto out;
2211 break;
2212 }
1881 case KVM_SET_MEMORY_REGION: { 2213 case KVM_SET_MEMORY_REGION: {
1882 struct kvm_memory_region kvm_mem; 2214 struct kvm_memory_region kvm_mem;
1883 struct kvm_userspace_memory_region kvm_userspace_mem; 2215 struct kvm_userspace_memory_region kvm_userspace_mem;
@@ -1930,16 +2262,24 @@ long kvm_arch_vm_ioctl(struct file *filp,
1930 } 2262 }
1931 break; 2263 break;
1932 case KVM_CREATE_PIT: 2264 case KVM_CREATE_PIT:
1933 mutex_lock(&kvm->lock); 2265 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
2266 goto create_pit;
2267 case KVM_CREATE_PIT2:
2268 r = -EFAULT;
2269 if (copy_from_user(&u.pit_config, argp,
2270 sizeof(struct kvm_pit_config)))
2271 goto out;
2272 create_pit:
2273 down_write(&kvm->slots_lock);
1934 r = -EEXIST; 2274 r = -EEXIST;
1935 if (kvm->arch.vpit) 2275 if (kvm->arch.vpit)
1936 goto create_pit_unlock; 2276 goto create_pit_unlock;
1937 r = -ENOMEM; 2277 r = -ENOMEM;
1938 kvm->arch.vpit = kvm_create_pit(kvm); 2278 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
1939 if (kvm->arch.vpit) 2279 if (kvm->arch.vpit)
1940 r = 0; 2280 r = 0;
1941 create_pit_unlock: 2281 create_pit_unlock:
1942 mutex_unlock(&kvm->lock); 2282 up_write(&kvm->slots_lock);
1943 break; 2283 break;
1944 case KVM_IRQ_LINE_STATUS: 2284 case KVM_IRQ_LINE_STATUS:
1945 case KVM_IRQ_LINE: { 2285 case KVM_IRQ_LINE: {
@@ -1950,10 +2290,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
1950 goto out; 2290 goto out;
1951 if (irqchip_in_kernel(kvm)) { 2291 if (irqchip_in_kernel(kvm)) {
1952 __s32 status; 2292 __s32 status;
1953 mutex_lock(&kvm->lock); 2293 mutex_lock(&kvm->irq_lock);
1954 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 2294 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
1955 irq_event.irq, irq_event.level); 2295 irq_event.irq, irq_event.level);
1956 mutex_unlock(&kvm->lock); 2296 mutex_unlock(&kvm->irq_lock);
1957 if (ioctl == KVM_IRQ_LINE_STATUS) { 2297 if (ioctl == KVM_IRQ_LINE_STATUS) {
1958 irq_event.status = status; 2298 irq_event.status = status;
1959 if (copy_to_user(argp, &irq_event, 2299 if (copy_to_user(argp, &irq_event,
@@ -2042,6 +2382,32 @@ long kvm_arch_vm_ioctl(struct file *filp,
2042 r = 0; 2382 r = 0;
2043 break; 2383 break;
2044 } 2384 }
2385 case KVM_GET_PIT2: {
2386 r = -ENXIO;
2387 if (!kvm->arch.vpit)
2388 goto out;
2389 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
2390 if (r)
2391 goto out;
2392 r = -EFAULT;
2393 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
2394 goto out;
2395 r = 0;
2396 break;
2397 }
2398 case KVM_SET_PIT2: {
2399 r = -EFAULT;
2400 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
2401 goto out;
2402 r = -ENXIO;
2403 if (!kvm->arch.vpit)
2404 goto out;
2405 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
2406 if (r)
2407 goto out;
2408 r = 0;
2409 break;
2410 }
2045 case KVM_REINJECT_CONTROL: { 2411 case KVM_REINJECT_CONTROL: {
2046 struct kvm_reinject_control control; 2412 struct kvm_reinject_control control;
2047 r = -EFAULT; 2413 r = -EFAULT;
@@ -2075,35 +2441,23 @@ static void kvm_init_msr_list(void)
2075 num_msrs_to_save = j; 2441 num_msrs_to_save = j;
2076} 2442}
2077 2443
2078/* 2444static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
2079 * Only apic need an MMIO device hook, so shortcut now.. 2445 const void *v)
2080 */
2081static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
2082 gpa_t addr, int len,
2083 int is_write)
2084{ 2446{
2085 struct kvm_io_device *dev; 2447 if (vcpu->arch.apic &&
2448 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
2449 return 0;
2086 2450
2087 if (vcpu->arch.apic) { 2451 return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
2088 dev = &vcpu->arch.apic->dev;
2089 if (dev->in_range(dev, addr, len, is_write))
2090 return dev;
2091 }
2092 return NULL;
2093} 2452}
2094 2453
2095 2454static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
2096static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
2097 gpa_t addr, int len,
2098 int is_write)
2099{ 2455{
2100 struct kvm_io_device *dev; 2456 if (vcpu->arch.apic &&
2457 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
2458 return 0;
2101 2459
2102 dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write); 2460 return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
2103 if (dev == NULL)
2104 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
2105 is_write);
2106 return dev;
2107} 2461}
2108 2462
2109static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 2463static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
@@ -2172,11 +2526,12 @@ static int emulator_read_emulated(unsigned long addr,
2172 unsigned int bytes, 2526 unsigned int bytes,
2173 struct kvm_vcpu *vcpu) 2527 struct kvm_vcpu *vcpu)
2174{ 2528{
2175 struct kvm_io_device *mmio_dev;
2176 gpa_t gpa; 2529 gpa_t gpa;
2177 2530
2178 if (vcpu->mmio_read_completed) { 2531 if (vcpu->mmio_read_completed) {
2179 memcpy(val, vcpu->mmio_data, bytes); 2532 memcpy(val, vcpu->mmio_data, bytes);
2533 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
2534 vcpu->mmio_phys_addr, *(u64 *)val);
2180 vcpu->mmio_read_completed = 0; 2535 vcpu->mmio_read_completed = 0;
2181 return X86EMUL_CONTINUE; 2536 return X86EMUL_CONTINUE;
2182 } 2537 }
@@ -2197,14 +2552,12 @@ mmio:
2197 /* 2552 /*
2198 * Is this MMIO handled locally? 2553 * Is this MMIO handled locally?
2199 */ 2554 */
2200 mutex_lock(&vcpu->kvm->lock); 2555 if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
2201 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0); 2556 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
2202 if (mmio_dev) {
2203 kvm_iodevice_read(mmio_dev, gpa, bytes, val);
2204 mutex_unlock(&vcpu->kvm->lock);
2205 return X86EMUL_CONTINUE; 2557 return X86EMUL_CONTINUE;
2206 } 2558 }
2207 mutex_unlock(&vcpu->kvm->lock); 2559
2560 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
2208 2561
2209 vcpu->mmio_needed = 1; 2562 vcpu->mmio_needed = 1;
2210 vcpu->mmio_phys_addr = gpa; 2563 vcpu->mmio_phys_addr = gpa;
@@ -2231,7 +2584,6 @@ static int emulator_write_emulated_onepage(unsigned long addr,
2231 unsigned int bytes, 2584 unsigned int bytes,
2232 struct kvm_vcpu *vcpu) 2585 struct kvm_vcpu *vcpu)
2233{ 2586{
2234 struct kvm_io_device *mmio_dev;
2235 gpa_t gpa; 2587 gpa_t gpa;
2236 2588
2237 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 2589 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
@@ -2249,17 +2601,12 @@ static int emulator_write_emulated_onepage(unsigned long addr,
2249 return X86EMUL_CONTINUE; 2601 return X86EMUL_CONTINUE;
2250 2602
2251mmio: 2603mmio:
2604 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
2252 /* 2605 /*
2253 * Is this MMIO handled locally? 2606 * Is this MMIO handled locally?
2254 */ 2607 */
2255 mutex_lock(&vcpu->kvm->lock); 2608 if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
2256 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
2257 if (mmio_dev) {
2258 kvm_iodevice_write(mmio_dev, gpa, bytes, val);
2259 mutex_unlock(&vcpu->kvm->lock);
2260 return X86EMUL_CONTINUE; 2609 return X86EMUL_CONTINUE;
2261 }
2262 mutex_unlock(&vcpu->kvm->lock);
2263 2610
2264 vcpu->mmio_needed = 1; 2611 vcpu->mmio_needed = 1;
2265 vcpu->mmio_phys_addr = gpa; 2612 vcpu->mmio_phys_addr = gpa;
@@ -2343,7 +2690,6 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
2343 2690
2344int emulate_clts(struct kvm_vcpu *vcpu) 2691int emulate_clts(struct kvm_vcpu *vcpu)
2345{ 2692{
2346 KVMTRACE_0D(CLTS, vcpu, handler);
2347 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); 2693 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
2348 return X86EMUL_CONTINUE; 2694 return X86EMUL_CONTINUE;
2349} 2695}
@@ -2420,7 +2766,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2420 kvm_clear_exception_queue(vcpu); 2766 kvm_clear_exception_queue(vcpu);
2421 vcpu->arch.mmio_fault_cr2 = cr2; 2767 vcpu->arch.mmio_fault_cr2 = cr2;
2422 /* 2768 /*
2423 * TODO: fix x86_emulate.c to use guest_read/write_register 2769 * TODO: fix emulate.c to use guest_read/write_register
2424 * instead of direct ->regs accesses, can save hundred cycles 2770 * instead of direct ->regs accesses, can save hundred cycles
2425 * on Intel for instructions that don't read/change RSP, for 2771 * on Intel for instructions that don't read/change RSP, for
2426 * for example. 2772 * for example.
@@ -2444,14 +2790,33 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2444 2790
2445 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 2791 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
2446 2792
2447 /* Reject the instructions other than VMCALL/VMMCALL when 2793 /* Only allow emulation of specific instructions on #UD
2448 * try to emulate invalid opcode */ 2794 * (namely VMMCALL, sysenter, sysexit, syscall)*/
2449 c = &vcpu->arch.emulate_ctxt.decode; 2795 c = &vcpu->arch.emulate_ctxt.decode;
2450 if ((emulation_type & EMULTYPE_TRAP_UD) && 2796 if (emulation_type & EMULTYPE_TRAP_UD) {
2451 (!(c->twobyte && c->b == 0x01 && 2797 if (!c->twobyte)
2452 (c->modrm_reg == 0 || c->modrm_reg == 3) && 2798 return EMULATE_FAIL;
2453 c->modrm_mod == 3 && c->modrm_rm == 1))) 2799 switch (c->b) {
2454 return EMULATE_FAIL; 2800 case 0x01: /* VMMCALL */
2801 if (c->modrm_mod != 3 || c->modrm_rm != 1)
2802 return EMULATE_FAIL;
2803 break;
2804 case 0x34: /* sysenter */
2805 case 0x35: /* sysexit */
2806 if (c->modrm_mod != 0 || c->modrm_rm != 0)
2807 return EMULATE_FAIL;
2808 break;
2809 case 0x05: /* syscall */
2810 if (c->modrm_mod != 0 || c->modrm_rm != 0)
2811 return EMULATE_FAIL;
2812 break;
2813 default:
2814 return EMULATE_FAIL;
2815 }
2816
2817 if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
2818 return EMULATE_FAIL;
2819 }
2455 2820
2456 ++vcpu->stat.insn_emulation; 2821 ++vcpu->stat.insn_emulation;
2457 if (r) { 2822 if (r) {
@@ -2571,52 +2936,40 @@ int complete_pio(struct kvm_vcpu *vcpu)
2571 return 0; 2936 return 0;
2572} 2937}
2573 2938
2574static void kernel_pio(struct kvm_io_device *pio_dev, 2939static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
2575 struct kvm_vcpu *vcpu,
2576 void *pd)
2577{ 2940{
2578 /* TODO: String I/O for in kernel device */ 2941 /* TODO: String I/O for in kernel device */
2942 int r;
2579 2943
2580 mutex_lock(&vcpu->kvm->lock);
2581 if (vcpu->arch.pio.in) 2944 if (vcpu->arch.pio.in)
2582 kvm_iodevice_read(pio_dev, vcpu->arch.pio.port, 2945 r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
2583 vcpu->arch.pio.size, 2946 vcpu->arch.pio.size, pd);
2584 pd);
2585 else 2947 else
2586 kvm_iodevice_write(pio_dev, vcpu->arch.pio.port, 2948 r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
2587 vcpu->arch.pio.size, 2949 vcpu->arch.pio.size, pd);
2588 pd); 2950 return r;
2589 mutex_unlock(&vcpu->kvm->lock);
2590} 2951}
2591 2952
2592static void pio_string_write(struct kvm_io_device *pio_dev, 2953static int pio_string_write(struct kvm_vcpu *vcpu)
2593 struct kvm_vcpu *vcpu)
2594{ 2954{
2595 struct kvm_pio_request *io = &vcpu->arch.pio; 2955 struct kvm_pio_request *io = &vcpu->arch.pio;
2596 void *pd = vcpu->arch.pio_data; 2956 void *pd = vcpu->arch.pio_data;
2597 int i; 2957 int i, r = 0;
2598 2958
2599 mutex_lock(&vcpu->kvm->lock);
2600 for (i = 0; i < io->cur_count; i++) { 2959 for (i = 0; i < io->cur_count; i++) {
2601 kvm_iodevice_write(pio_dev, io->port, 2960 if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
2602 io->size, 2961 io->port, io->size, pd)) {
2603 pd); 2962 r = -EOPNOTSUPP;
2963 break;
2964 }
2604 pd += io->size; 2965 pd += io->size;
2605 } 2966 }
2606 mutex_unlock(&vcpu->kvm->lock); 2967 return r;
2607}
2608
2609static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
2610 gpa_t addr, int len,
2611 int is_write)
2612{
2613 return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
2614} 2968}
2615 2969
2616int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 2970int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2617 int size, unsigned port) 2971 int size, unsigned port)
2618{ 2972{
2619 struct kvm_io_device *pio_dev;
2620 unsigned long val; 2973 unsigned long val;
2621 2974
2622 vcpu->run->exit_reason = KVM_EXIT_IO; 2975 vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2630,19 +2983,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2630 vcpu->arch.pio.down = 0; 2983 vcpu->arch.pio.down = 0;
2631 vcpu->arch.pio.rep = 0; 2984 vcpu->arch.pio.rep = 0;
2632 2985
2633 if (vcpu->run->io.direction == KVM_EXIT_IO_IN) 2986 trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
2634 KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size, 2987 size, 1);
2635 handler);
2636 else
2637 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2638 handler);
2639 2988
2640 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 2989 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2641 memcpy(vcpu->arch.pio_data, &val, 4); 2990 memcpy(vcpu->arch.pio_data, &val, 4);
2642 2991
2643 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); 2992 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
2644 if (pio_dev) {
2645 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
2646 complete_pio(vcpu); 2993 complete_pio(vcpu);
2647 return 1; 2994 return 1;
2648 } 2995 }
@@ -2656,7 +3003,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2656{ 3003{
2657 unsigned now, in_page; 3004 unsigned now, in_page;
2658 int ret = 0; 3005 int ret = 0;
2659 struct kvm_io_device *pio_dev;
2660 3006
2661 vcpu->run->exit_reason = KVM_EXIT_IO; 3007 vcpu->run->exit_reason = KVM_EXIT_IO;
2662 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3008 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@ -2669,12 +3015,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2669 vcpu->arch.pio.down = down; 3015 vcpu->arch.pio.down = down;
2670 vcpu->arch.pio.rep = rep; 3016 vcpu->arch.pio.rep = rep;
2671 3017
2672 if (vcpu->run->io.direction == KVM_EXIT_IO_IN) 3018 trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
2673 KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size, 3019 size, count);
2674 handler);
2675 else
2676 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2677 handler);
2678 3020
2679 if (!count) { 3021 if (!count) {
2680 kvm_x86_ops->skip_emulated_instruction(vcpu); 3022 kvm_x86_ops->skip_emulated_instruction(vcpu);
@@ -2704,9 +3046,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2704 3046
2705 vcpu->arch.pio.guest_gva = address; 3047 vcpu->arch.pio.guest_gva = address;
2706 3048
2707 pio_dev = vcpu_find_pio_dev(vcpu, port,
2708 vcpu->arch.pio.cur_count,
2709 !vcpu->arch.pio.in);
2710 if (!vcpu->arch.pio.in) { 3049 if (!vcpu->arch.pio.in) {
2711 /* string PIO write */ 3050 /* string PIO write */
2712 ret = pio_copy_data(vcpu); 3051 ret = pio_copy_data(vcpu);
@@ -2714,16 +3053,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2714 kvm_inject_gp(vcpu, 0); 3053 kvm_inject_gp(vcpu, 0);
2715 return 1; 3054 return 1;
2716 } 3055 }
2717 if (ret == 0 && pio_dev) { 3056 if (ret == 0 && !pio_string_write(vcpu)) {
2718 pio_string_write(pio_dev, vcpu);
2719 complete_pio(vcpu); 3057 complete_pio(vcpu);
2720 if (vcpu->arch.pio.count == 0) 3058 if (vcpu->arch.pio.count == 0)
2721 ret = 1; 3059 ret = 1;
2722 } 3060 }
2723 } else if (pio_dev) 3061 }
2724 pr_unimpl(vcpu, "no string pio read support yet, " 3062 /* no string PIO read support yet */
2725 "port %x size %d count %ld\n",
2726 port, size, count);
2727 3063
2728 return ret; 3064 return ret;
2729} 3065}
@@ -2756,10 +3092,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
2756 3092
2757 spin_lock(&kvm_lock); 3093 spin_lock(&kvm_lock);
2758 list_for_each_entry(kvm, &vm_list, vm_list) { 3094 list_for_each_entry(kvm, &vm_list, vm_list) {
2759 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 3095 kvm_for_each_vcpu(i, vcpu, kvm) {
2760 vcpu = kvm->vcpus[i];
2761 if (!vcpu)
2762 continue;
2763 if (vcpu->cpu != freq->cpu) 3096 if (vcpu->cpu != freq->cpu)
2764 continue; 3097 continue;
2765 if (!kvm_request_guest_time_update(vcpu)) 3098 if (!kvm_request_guest_time_update(vcpu))
@@ -2852,7 +3185,6 @@ void kvm_arch_exit(void)
2852int kvm_emulate_halt(struct kvm_vcpu *vcpu) 3185int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2853{ 3186{
2854 ++vcpu->stat.halt_exits; 3187 ++vcpu->stat.halt_exits;
2855 KVMTRACE_0D(HLT, vcpu, handler);
2856 if (irqchip_in_kernel(vcpu->kvm)) { 3188 if (irqchip_in_kernel(vcpu->kvm)) {
2857 vcpu->arch.mp_state = KVM_MP_STATE_HALTED; 3189 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
2858 return 1; 3190 return 1;
@@ -2883,7 +3215,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2883 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX); 3215 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
2884 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI); 3216 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
2885 3217
2886 KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); 3218 trace_kvm_hypercall(nr, a0, a1, a2, a3);
2887 3219
2888 if (!is_long_mode(vcpu)) { 3220 if (!is_long_mode(vcpu)) {
2889 nr &= 0xFFFFFFFF; 3221 nr &= 0xFFFFFFFF;
@@ -2893,6 +3225,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2893 a3 &= 0xFFFFFFFF; 3225 a3 &= 0xFFFFFFFF;
2894 } 3226 }
2895 3227
3228 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
3229 ret = -KVM_EPERM;
3230 goto out;
3231 }
3232
2896 switch (nr) { 3233 switch (nr) {
2897 case KVM_HC_VAPIC_POLL_IRQ: 3234 case KVM_HC_VAPIC_POLL_IRQ:
2898 ret = 0; 3235 ret = 0;
@@ -2904,6 +3241,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2904 ret = -KVM_ENOSYS; 3241 ret = -KVM_ENOSYS;
2905 break; 3242 break;
2906 } 3243 }
3244out:
2907 kvm_register_write(vcpu, VCPU_REGS_RAX, ret); 3245 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
2908 ++vcpu->stat.hypercalls; 3246 ++vcpu->stat.hypercalls;
2909 return r; 3247 return r;
@@ -2983,8 +3321,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
2983 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 3321 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
2984 return 0; 3322 return 0;
2985 } 3323 }
2986 KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
2987 (u32)((u64)value >> 32), handler);
2988 3324
2989 return value; 3325 return value;
2990} 3326}
@@ -2992,9 +3328,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
2992void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, 3328void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
2993 unsigned long *rflags) 3329 unsigned long *rflags)
2994{ 3330{
2995 KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
2996 (u32)((u64)val >> 32), handler);
2997
2998 switch (cr) { 3331 switch (cr) {
2999 case 0: 3332 case 0:
3000 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 3333 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
@@ -3104,11 +3437,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
3104 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); 3437 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
3105 } 3438 }
3106 kvm_x86_ops->skip_emulated_instruction(vcpu); 3439 kvm_x86_ops->skip_emulated_instruction(vcpu);
3107 KVMTRACE_5D(CPUID, vcpu, function, 3440 trace_kvm_cpuid(function,
3108 (u32)kvm_register_read(vcpu, VCPU_REGS_RAX), 3441 kvm_register_read(vcpu, VCPU_REGS_RAX),
3109 (u32)kvm_register_read(vcpu, VCPU_REGS_RBX), 3442 kvm_register_read(vcpu, VCPU_REGS_RBX),
3110 (u32)kvm_register_read(vcpu, VCPU_REGS_RCX), 3443 kvm_register_read(vcpu, VCPU_REGS_RCX),
3111 (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler); 3444 kvm_register_read(vcpu, VCPU_REGS_RDX));
3112} 3445}
3113EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 3446EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
3114 3447
@@ -3174,6 +3507,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
3174 if (!kvm_x86_ops->update_cr8_intercept) 3507 if (!kvm_x86_ops->update_cr8_intercept)
3175 return; 3508 return;
3176 3509
3510 if (!vcpu->arch.apic)
3511 return;
3512
3177 if (!vcpu->arch.apic->vapic_addr) 3513 if (!vcpu->arch.apic->vapic_addr)
3178 max_irr = kvm_lapic_find_highest_irr(vcpu); 3514 max_irr = kvm_lapic_find_highest_irr(vcpu);
3179 else 3515 else
@@ -3187,12 +3523,16 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
3187 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); 3523 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
3188} 3524}
3189 3525
3190static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3526static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3191{ 3527{
3192 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3193 kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
3194
3195 /* try to reinject previous events if any */ 3528 /* try to reinject previous events if any */
3529 if (vcpu->arch.exception.pending) {
3530 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
3531 vcpu->arch.exception.has_error_code,
3532 vcpu->arch.exception.error_code);
3533 return;
3534 }
3535
3196 if (vcpu->arch.nmi_injected) { 3536 if (vcpu->arch.nmi_injected) {
3197 kvm_x86_ops->set_nmi(vcpu); 3537 kvm_x86_ops->set_nmi(vcpu);
3198 return; 3538 return;
@@ -3266,16 +3606,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3266 smp_mb__after_clear_bit(); 3606 smp_mb__after_clear_bit();
3267 3607
3268 if (vcpu->requests || need_resched() || signal_pending(current)) { 3608 if (vcpu->requests || need_resched() || signal_pending(current)) {
3609 set_bit(KVM_REQ_KICK, &vcpu->requests);
3269 local_irq_enable(); 3610 local_irq_enable();
3270 preempt_enable(); 3611 preempt_enable();
3271 r = 1; 3612 r = 1;
3272 goto out; 3613 goto out;
3273 } 3614 }
3274 3615
3275 if (vcpu->arch.exception.pending) 3616 inject_pending_event(vcpu, kvm_run);
3276 __queue_exception(vcpu);
3277 else
3278 inject_pending_irq(vcpu, kvm_run);
3279 3617
3280 /* enable NMI/IRQ window open exits if needed */ 3618 /* enable NMI/IRQ window open exits if needed */
3281 if (vcpu->arch.nmi_pending) 3619 if (vcpu->arch.nmi_pending)
@@ -3292,14 +3630,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3292 3630
3293 kvm_guest_enter(); 3631 kvm_guest_enter();
3294 3632
3295 get_debugreg(vcpu->arch.host_dr6, 6);
3296 get_debugreg(vcpu->arch.host_dr7, 7);
3297 if (unlikely(vcpu->arch.switch_db_regs)) { 3633 if (unlikely(vcpu->arch.switch_db_regs)) {
3298 get_debugreg(vcpu->arch.host_db[0], 0);
3299 get_debugreg(vcpu->arch.host_db[1], 1);
3300 get_debugreg(vcpu->arch.host_db[2], 2);
3301 get_debugreg(vcpu->arch.host_db[3], 3);
3302
3303 set_debugreg(0, 7); 3634 set_debugreg(0, 7);
3304 set_debugreg(vcpu->arch.eff_db[0], 0); 3635 set_debugreg(vcpu->arch.eff_db[0], 0);
3305 set_debugreg(vcpu->arch.eff_db[1], 1); 3636 set_debugreg(vcpu->arch.eff_db[1], 1);
@@ -3307,18 +3638,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3307 set_debugreg(vcpu->arch.eff_db[3], 3); 3638 set_debugreg(vcpu->arch.eff_db[3], 3);
3308 } 3639 }
3309 3640
3310 KVMTRACE_0D(VMENTRY, vcpu, entryexit); 3641 trace_kvm_entry(vcpu->vcpu_id);
3311 kvm_x86_ops->run(vcpu, kvm_run); 3642 kvm_x86_ops->run(vcpu, kvm_run);
3312 3643
3313 if (unlikely(vcpu->arch.switch_db_regs)) { 3644 if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
3314 set_debugreg(0, 7); 3645 set_debugreg(current->thread.debugreg0, 0);
3315 set_debugreg(vcpu->arch.host_db[0], 0); 3646 set_debugreg(current->thread.debugreg1, 1);
3316 set_debugreg(vcpu->arch.host_db[1], 1); 3647 set_debugreg(current->thread.debugreg2, 2);
3317 set_debugreg(vcpu->arch.host_db[2], 2); 3648 set_debugreg(current->thread.debugreg3, 3);
3318 set_debugreg(vcpu->arch.host_db[3], 3); 3649 set_debugreg(current->thread.debugreg6, 6);
3650 set_debugreg(current->thread.debugreg7, 7);
3319 } 3651 }
3320 set_debugreg(vcpu->arch.host_dr6, 6);
3321 set_debugreg(vcpu->arch.host_dr7, 7);
3322 3652
3323 set_bit(KVM_REQ_KICK, &vcpu->requests); 3653 set_bit(KVM_REQ_KICK, &vcpu->requests);
3324 local_irq_enable(); 3654 local_irq_enable();
@@ -3648,11 +3978,8 @@ static void kvm_set_segment(struct kvm_vcpu *vcpu,
3648static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, 3978static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3649 struct kvm_segment *kvm_desct) 3979 struct kvm_segment *kvm_desct)
3650{ 3980{
3651 kvm_desct->base = seg_desc->base0; 3981 kvm_desct->base = get_desc_base(seg_desc);
3652 kvm_desct->base |= seg_desc->base1 << 16; 3982 kvm_desct->limit = get_desc_limit(seg_desc);
3653 kvm_desct->base |= seg_desc->base2 << 24;
3654 kvm_desct->limit = seg_desc->limit0;
3655 kvm_desct->limit |= seg_desc->limit << 16;
3656 if (seg_desc->g) { 3983 if (seg_desc->g) {
3657 kvm_desct->limit <<= 12; 3984 kvm_desct->limit <<= 12;
3658 kvm_desct->limit |= 0xfff; 3985 kvm_desct->limit |= 0xfff;
@@ -3696,7 +4023,6 @@ static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
3696static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4023static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3697 struct desc_struct *seg_desc) 4024 struct desc_struct *seg_desc)
3698{ 4025{
3699 gpa_t gpa;
3700 struct descriptor_table dtable; 4026 struct descriptor_table dtable;
3701 u16 index = selector >> 3; 4027 u16 index = selector >> 3;
3702 4028
@@ -3706,16 +4032,13 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3706 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 4032 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
3707 return 1; 4033 return 1;
3708 } 4034 }
3709 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); 4035 return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
3710 gpa += index * 8;
3711 return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
3712} 4036}
3713 4037
3714/* allowed just for 8 bytes segments */ 4038/* allowed just for 8 bytes segments */
3715static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4039static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3716 struct desc_struct *seg_desc) 4040 struct desc_struct *seg_desc)
3717{ 4041{
3718 gpa_t gpa;
3719 struct descriptor_table dtable; 4042 struct descriptor_table dtable;
3720 u16 index = selector >> 3; 4043 u16 index = selector >> 3;
3721 4044
@@ -3723,19 +4046,13 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3723 4046
3724 if (dtable.limit < index * 8 + 7) 4047 if (dtable.limit < index * 8 + 7)
3725 return 1; 4048 return 1;
3726 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); 4049 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
3727 gpa += index * 8;
3728 return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
3729} 4050}
3730 4051
3731static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, 4052static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
3732 struct desc_struct *seg_desc) 4053 struct desc_struct *seg_desc)
3733{ 4054{
3734 u32 base_addr; 4055 u32 base_addr = get_desc_base(seg_desc);
3735
3736 base_addr = seg_desc->base0;
3737 base_addr |= (seg_desc->base1 << 16);
3738 base_addr |= (seg_desc->base2 << 24);
3739 4056
3740 return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); 4057 return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
3741} 4058}
@@ -3780,12 +4097,19 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se
3780 return 0; 4097 return 0;
3781} 4098}
3782 4099
4100static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
4101{
4102 return (seg != VCPU_SREG_LDTR) &&
4103 (seg != VCPU_SREG_TR) &&
4104 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM);
4105}
4106
3783int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4107int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3784 int type_bits, int seg) 4108 int type_bits, int seg)
3785{ 4109{
3786 struct kvm_segment kvm_seg; 4110 struct kvm_segment kvm_seg;
3787 4111
3788 if (!(vcpu->arch.cr0 & X86_CR0_PE)) 4112 if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE))
3789 return kvm_load_realmode_segment(vcpu, selector, seg); 4113 return kvm_load_realmode_segment(vcpu, selector, seg);
3790 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) 4114 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
3791 return 1; 4115 return 1;
@@ -4024,7 +4348,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4024 } 4348 }
4025 } 4349 }
4026 4350
4027 if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) { 4351 if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
4028 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); 4352 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
4029 return 1; 4353 return 1;
4030 } 4354 }
@@ -4094,13 +4418,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4094 4418
4095 vcpu->arch.cr2 = sregs->cr2; 4419 vcpu->arch.cr2 = sregs->cr2;
4096 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; 4420 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
4097 4421 vcpu->arch.cr3 = sregs->cr3;
4098 down_read(&vcpu->kvm->slots_lock);
4099 if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT))
4100 vcpu->arch.cr3 = sregs->cr3;
4101 else
4102 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
4103 up_read(&vcpu->kvm->slots_lock);
4104 4422
4105 kvm_set_cr8(vcpu, sregs->cr8); 4423 kvm_set_cr8(vcpu, sregs->cr8);
4106 4424
@@ -4142,8 +4460,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4142 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 4460 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
4143 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 4461 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
4144 4462
4463 update_cr8_intercept(vcpu);
4464
4145 /* Older userspace won't unhalt the vcpu on reset. */ 4465 /* Older userspace won't unhalt the vcpu on reset. */
4146 if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 && 4466 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
4147 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && 4467 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
4148 !(vcpu->arch.cr0 & X86_CR0_PE)) 4468 !(vcpu->arch.cr0 & X86_CR0_PE))
4149 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4469 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -4414,7 +4734,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
4414 kvm = vcpu->kvm; 4734 kvm = vcpu->kvm;
4415 4735
4416 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 4736 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
4417 if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0) 4737 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
4418 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4738 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4419 else 4739 else
4420 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; 4740 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -4436,6 +4756,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
4436 goto fail_mmu_destroy; 4756 goto fail_mmu_destroy;
4437 } 4757 }
4438 4758
4759 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
4760 GFP_KERNEL);
4761 if (!vcpu->arch.mce_banks) {
4762 r = -ENOMEM;
4763 goto fail_mmu_destroy;
4764 }
4765 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
4766
4439 return 0; 4767 return 0;
4440 4768
4441fail_mmu_destroy: 4769fail_mmu_destroy:
@@ -4483,20 +4811,22 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
4483static void kvm_free_vcpus(struct kvm *kvm) 4811static void kvm_free_vcpus(struct kvm *kvm)
4484{ 4812{
4485 unsigned int i; 4813 unsigned int i;
4814 struct kvm_vcpu *vcpu;
4486 4815
4487 /* 4816 /*
4488 * Unpin any mmu pages first. 4817 * Unpin any mmu pages first.
4489 */ 4818 */
4490 for (i = 0; i < KVM_MAX_VCPUS; ++i) 4819 kvm_for_each_vcpu(i, vcpu, kvm)
4491 if (kvm->vcpus[i]) 4820 kvm_unload_vcpu_mmu(vcpu);
4492 kvm_unload_vcpu_mmu(kvm->vcpus[i]); 4821 kvm_for_each_vcpu(i, vcpu, kvm)
4493 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 4822 kvm_arch_vcpu_free(vcpu);
4494 if (kvm->vcpus[i]) { 4823
4495 kvm_arch_vcpu_free(kvm->vcpus[i]); 4824 mutex_lock(&kvm->lock);
4496 kvm->vcpus[i] = NULL; 4825 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
4497 } 4826 kvm->vcpus[i] = NULL;
4498 }
4499 4827
4828 atomic_set(&kvm->online_vcpus, 0);
4829 mutex_unlock(&kvm->lock);
4500} 4830}
4501 4831
4502void kvm_arch_sync_events(struct kvm *kvm) 4832void kvm_arch_sync_events(struct kvm *kvm)
@@ -4573,7 +4903,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
4573 4903
4574 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 4904 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
4575 spin_unlock(&kvm->mmu_lock); 4905 spin_unlock(&kvm->mmu_lock);
4576 kvm_flush_remote_tlbs(kvm);
4577 4906
4578 return 0; 4907 return 0;
4579} 4908}
@@ -4587,8 +4916,10 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
4587int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 4916int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4588{ 4917{
4589 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 4918 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
4590 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED 4919 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
4591 || vcpu->arch.nmi_pending; 4920 || vcpu->arch.nmi_pending ||
4921 (kvm_arch_interrupt_allowed(vcpu) &&
4922 kvm_cpu_has_interrupt(vcpu));
4592} 4923}
4593 4924
4594void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 4925void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
@@ -4612,3 +4943,9 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
4612{ 4943{
4613 return kvm_x86_ops->interrupt_allowed(vcpu); 4944 return kvm_x86_ops->interrupt_allowed(vcpu);
4614} 4945}
4946
4947EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
4948EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
4949EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
4950EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
4951EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);