diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 1098 |
1 files changed, 831 insertions, 267 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1e1bc9d412d..e46282a56565 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/intel-iommu.h> | 38 | #include <linux/intel-iommu.h> |
39 | #include <linux/cpufreq.h> | 39 | #include <linux/cpufreq.h> |
40 | #include <linux/user-return-notifier.h> | 40 | #include <linux/user-return-notifier.h> |
41 | #include <linux/srcu.h> | ||
41 | #include <trace/events/kvm.h> | 42 | #include <trace/events/kvm.h> |
42 | #undef TRACE_INCLUDE_FILE | 43 | #undef TRACE_INCLUDE_FILE |
43 | #define CREATE_TRACE_POINTS | 44 | #define CREATE_TRACE_POINTS |
@@ -93,16 +94,16 @@ module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | |||
93 | 94 | ||
94 | struct kvm_shared_msrs_global { | 95 | struct kvm_shared_msrs_global { |
95 | int nr; | 96 | int nr; |
96 | struct kvm_shared_msr { | 97 | u32 msrs[KVM_NR_SHARED_MSRS]; |
97 | u32 msr; | ||
98 | u64 value; | ||
99 | } msrs[KVM_NR_SHARED_MSRS]; | ||
100 | }; | 98 | }; |
101 | 99 | ||
102 | struct kvm_shared_msrs { | 100 | struct kvm_shared_msrs { |
103 | struct user_return_notifier urn; | 101 | struct user_return_notifier urn; |
104 | bool registered; | 102 | bool registered; |
105 | u64 current_value[KVM_NR_SHARED_MSRS]; | 103 | struct kvm_shared_msr_values { |
104 | u64 host; | ||
105 | u64 curr; | ||
106 | } values[KVM_NR_SHARED_MSRS]; | ||
106 | }; | 107 | }; |
107 | 108 | ||
108 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; | 109 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; |
@@ -147,53 +148,64 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
147 | static void kvm_on_user_return(struct user_return_notifier *urn) | 148 | static void kvm_on_user_return(struct user_return_notifier *urn) |
148 | { | 149 | { |
149 | unsigned slot; | 150 | unsigned slot; |
150 | struct kvm_shared_msr *global; | ||
151 | struct kvm_shared_msrs *locals | 151 | struct kvm_shared_msrs *locals |
152 | = container_of(urn, struct kvm_shared_msrs, urn); | 152 | = container_of(urn, struct kvm_shared_msrs, urn); |
153 | struct kvm_shared_msr_values *values; | ||
153 | 154 | ||
154 | for (slot = 0; slot < shared_msrs_global.nr; ++slot) { | 155 | for (slot = 0; slot < shared_msrs_global.nr; ++slot) { |
155 | global = &shared_msrs_global.msrs[slot]; | 156 | values = &locals->values[slot]; |
156 | if (global->value != locals->current_value[slot]) { | 157 | if (values->host != values->curr) { |
157 | wrmsrl(global->msr, global->value); | 158 | wrmsrl(shared_msrs_global.msrs[slot], values->host); |
158 | locals->current_value[slot] = global->value; | 159 | values->curr = values->host; |
159 | } | 160 | } |
160 | } | 161 | } |
161 | locals->registered = false; | 162 | locals->registered = false; |
162 | user_return_notifier_unregister(urn); | 163 | user_return_notifier_unregister(urn); |
163 | } | 164 | } |
164 | 165 | ||
165 | void kvm_define_shared_msr(unsigned slot, u32 msr) | 166 | static void shared_msr_update(unsigned slot, u32 msr) |
166 | { | 167 | { |
167 | int cpu; | 168 | struct kvm_shared_msrs *smsr; |
168 | u64 value; | 169 | u64 value; |
169 | 170 | ||
171 | smsr = &__get_cpu_var(shared_msrs); | ||
172 | /* only read, and nobody should modify it at this time, | ||
173 | * so don't need lock */ | ||
174 | if (slot >= shared_msrs_global.nr) { | ||
175 | printk(KERN_ERR "kvm: invalid MSR slot!"); | ||
176 | return; | ||
177 | } | ||
178 | rdmsrl_safe(msr, &value); | ||
179 | smsr->values[slot].host = value; | ||
180 | smsr->values[slot].curr = value; | ||
181 | } | ||
182 | |||
183 | void kvm_define_shared_msr(unsigned slot, u32 msr) | ||
184 | { | ||
170 | if (slot >= shared_msrs_global.nr) | 185 | if (slot >= shared_msrs_global.nr) |
171 | shared_msrs_global.nr = slot + 1; | 186 | shared_msrs_global.nr = slot + 1; |
172 | shared_msrs_global.msrs[slot].msr = msr; | 187 | shared_msrs_global.msrs[slot] = msr; |
173 | rdmsrl_safe(msr, &value); | 188 | /* we need ensured the shared_msr_global have been updated */ |
174 | shared_msrs_global.msrs[slot].value = value; | 189 | smp_wmb(); |
175 | for_each_online_cpu(cpu) | ||
176 | per_cpu(shared_msrs, cpu).current_value[slot] = value; | ||
177 | } | 190 | } |
178 | EXPORT_SYMBOL_GPL(kvm_define_shared_msr); | 191 | EXPORT_SYMBOL_GPL(kvm_define_shared_msr); |
179 | 192 | ||
180 | static void kvm_shared_msr_cpu_online(void) | 193 | static void kvm_shared_msr_cpu_online(void) |
181 | { | 194 | { |
182 | unsigned i; | 195 | unsigned i; |
183 | struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs); | ||
184 | 196 | ||
185 | for (i = 0; i < shared_msrs_global.nr; ++i) | 197 | for (i = 0; i < shared_msrs_global.nr; ++i) |
186 | locals->current_value[i] = shared_msrs_global.msrs[i].value; | 198 | shared_msr_update(i, shared_msrs_global.msrs[i]); |
187 | } | 199 | } |
188 | 200 | ||
189 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | 201 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) |
190 | { | 202 | { |
191 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | 203 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); |
192 | 204 | ||
193 | if (((value ^ smsr->current_value[slot]) & mask) == 0) | 205 | if (((value ^ smsr->values[slot].curr) & mask) == 0) |
194 | return; | 206 | return; |
195 | smsr->current_value[slot] = value; | 207 | smsr->values[slot].curr = value; |
196 | wrmsrl(shared_msrs_global.msrs[slot].msr, value); | 208 | wrmsrl(shared_msrs_global.msrs[slot], value); |
197 | if (!smsr->registered) { | 209 | if (!smsr->registered) { |
198 | smsr->urn.on_user_return = kvm_on_user_return; | 210 | smsr->urn.on_user_return = kvm_on_user_return; |
199 | user_return_notifier_register(&smsr->urn); | 211 | user_return_notifier_register(&smsr->urn); |
@@ -257,12 +269,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | |||
257 | } | 269 | } |
258 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 270 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
259 | 271 | ||
272 | #define EXCPT_BENIGN 0 | ||
273 | #define EXCPT_CONTRIBUTORY 1 | ||
274 | #define EXCPT_PF 2 | ||
275 | |||
276 | static int exception_class(int vector) | ||
277 | { | ||
278 | switch (vector) { | ||
279 | case PF_VECTOR: | ||
280 | return EXCPT_PF; | ||
281 | case DE_VECTOR: | ||
282 | case TS_VECTOR: | ||
283 | case NP_VECTOR: | ||
284 | case SS_VECTOR: | ||
285 | case GP_VECTOR: | ||
286 | return EXCPT_CONTRIBUTORY; | ||
287 | default: | ||
288 | break; | ||
289 | } | ||
290 | return EXCPT_BENIGN; | ||
291 | } | ||
292 | |||
293 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | ||
294 | unsigned nr, bool has_error, u32 error_code) | ||
295 | { | ||
296 | u32 prev_nr; | ||
297 | int class1, class2; | ||
298 | |||
299 | if (!vcpu->arch.exception.pending) { | ||
300 | queue: | ||
301 | vcpu->arch.exception.pending = true; | ||
302 | vcpu->arch.exception.has_error_code = has_error; | ||
303 | vcpu->arch.exception.nr = nr; | ||
304 | vcpu->arch.exception.error_code = error_code; | ||
305 | return; | ||
306 | } | ||
307 | |||
308 | /* to check exception */ | ||
309 | prev_nr = vcpu->arch.exception.nr; | ||
310 | if (prev_nr == DF_VECTOR) { | ||
311 | /* triple fault -> shutdown */ | ||
312 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
313 | return; | ||
314 | } | ||
315 | class1 = exception_class(prev_nr); | ||
316 | class2 = exception_class(nr); | ||
317 | if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) | ||
318 | || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { | ||
319 | /* generate double fault per SDM Table 5-5 */ | ||
320 | vcpu->arch.exception.pending = true; | ||
321 | vcpu->arch.exception.has_error_code = true; | ||
322 | vcpu->arch.exception.nr = DF_VECTOR; | ||
323 | vcpu->arch.exception.error_code = 0; | ||
324 | } else | ||
325 | /* replace previous exception with a new one in a hope | ||
326 | that instruction re-execution will regenerate lost | ||
327 | exception */ | ||
328 | goto queue; | ||
329 | } | ||
330 | |||
260 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) | 331 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) |
261 | { | 332 | { |
262 | WARN_ON(vcpu->arch.exception.pending); | 333 | kvm_multiple_exception(vcpu, nr, false, 0); |
263 | vcpu->arch.exception.pending = true; | ||
264 | vcpu->arch.exception.has_error_code = false; | ||
265 | vcpu->arch.exception.nr = nr; | ||
266 | } | 334 | } |
267 | EXPORT_SYMBOL_GPL(kvm_queue_exception); | 335 | EXPORT_SYMBOL_GPL(kvm_queue_exception); |
268 | 336 | ||
@@ -270,25 +338,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | |||
270 | u32 error_code) | 338 | u32 error_code) |
271 | { | 339 | { |
272 | ++vcpu->stat.pf_guest; | 340 | ++vcpu->stat.pf_guest; |
273 | |||
274 | if (vcpu->arch.exception.pending) { | ||
275 | switch(vcpu->arch.exception.nr) { | ||
276 | case DF_VECTOR: | ||
277 | /* triple fault -> shutdown */ | ||
278 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
279 | return; | ||
280 | case PF_VECTOR: | ||
281 | vcpu->arch.exception.nr = DF_VECTOR; | ||
282 | vcpu->arch.exception.error_code = 0; | ||
283 | return; | ||
284 | default: | ||
285 | /* replace previous exception with a new one in a hope | ||
286 | that instruction re-execution will regenerate lost | ||
287 | exception */ | ||
288 | vcpu->arch.exception.pending = false; | ||
289 | break; | ||
290 | } | ||
291 | } | ||
292 | vcpu->arch.cr2 = addr; | 341 | vcpu->arch.cr2 = addr; |
293 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); | 342 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); |
294 | } | 343 | } |
@@ -301,11 +350,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi); | |||
301 | 350 | ||
302 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | 351 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) |
303 | { | 352 | { |
304 | WARN_ON(vcpu->arch.exception.pending); | 353 | kvm_multiple_exception(vcpu, nr, true, error_code); |
305 | vcpu->arch.exception.pending = true; | ||
306 | vcpu->arch.exception.has_error_code = true; | ||
307 | vcpu->arch.exception.nr = nr; | ||
308 | vcpu->arch.exception.error_code = error_code; | ||
309 | } | 354 | } |
310 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); | 355 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); |
311 | 356 | ||
@@ -383,12 +428,18 @@ out: | |||
383 | 428 | ||
384 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 429 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
385 | { | 430 | { |
386 | if (cr0 & CR0_RESERVED_BITS) { | 431 | cr0 |= X86_CR0_ET; |
432 | |||
433 | #ifdef CONFIG_X86_64 | ||
434 | if (cr0 & 0xffffffff00000000UL) { | ||
387 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", | 435 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", |
388 | cr0, vcpu->arch.cr0); | 436 | cr0, kvm_read_cr0(vcpu)); |
389 | kvm_inject_gp(vcpu, 0); | 437 | kvm_inject_gp(vcpu, 0); |
390 | return; | 438 | return; |
391 | } | 439 | } |
440 | #endif | ||
441 | |||
442 | cr0 &= ~CR0_RESERVED_BITS; | ||
392 | 443 | ||
393 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { | 444 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { |
394 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); | 445 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); |
@@ -405,7 +456,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
405 | 456 | ||
406 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 457 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
407 | #ifdef CONFIG_X86_64 | 458 | #ifdef CONFIG_X86_64 |
408 | if ((vcpu->arch.shadow_efer & EFER_LME)) { | 459 | if ((vcpu->arch.efer & EFER_LME)) { |
409 | int cs_db, cs_l; | 460 | int cs_db, cs_l; |
410 | 461 | ||
411 | if (!is_pae(vcpu)) { | 462 | if (!is_pae(vcpu)) { |
@@ -443,13 +494,13 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); | |||
443 | 494 | ||
444 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 495 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
445 | { | 496 | { |
446 | kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); | 497 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); |
447 | } | 498 | } |
448 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 499 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
449 | 500 | ||
450 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 501 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
451 | { | 502 | { |
452 | unsigned long old_cr4 = vcpu->arch.cr4; | 503 | unsigned long old_cr4 = kvm_read_cr4(vcpu); |
453 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | 504 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; |
454 | 505 | ||
455 | if (cr4 & CR4_RESERVED_BITS) { | 506 | if (cr4 & CR4_RESERVED_BITS) { |
@@ -575,9 +626,11 @@ static inline u32 bit(int bitno) | |||
575 | * kvm-specific. Those are put in the beginning of the list. | 626 | * kvm-specific. Those are put in the beginning of the list. |
576 | */ | 627 | */ |
577 | 628 | ||
578 | #define KVM_SAVE_MSRS_BEGIN 2 | 629 | #define KVM_SAVE_MSRS_BEGIN 5 |
579 | static u32 msrs_to_save[] = { | 630 | static u32 msrs_to_save[] = { |
580 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 631 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
632 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | ||
633 | HV_X64_MSR_APIC_ASSIST_PAGE, | ||
581 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 634 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
582 | MSR_K6_STAR, | 635 | MSR_K6_STAR, |
583 | #ifdef CONFIG_X86_64 | 636 | #ifdef CONFIG_X86_64 |
@@ -602,7 +655,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
602 | } | 655 | } |
603 | 656 | ||
604 | if (is_paging(vcpu) | 657 | if (is_paging(vcpu) |
605 | && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { | 658 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { |
606 | printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); | 659 | printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); |
607 | kvm_inject_gp(vcpu, 0); | 660 | kvm_inject_gp(vcpu, 0); |
608 | return; | 661 | return; |
@@ -633,9 +686,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
633 | kvm_x86_ops->set_efer(vcpu, efer); | 686 | kvm_x86_ops->set_efer(vcpu, efer); |
634 | 687 | ||
635 | efer &= ~EFER_LMA; | 688 | efer &= ~EFER_LMA; |
636 | efer |= vcpu->arch.shadow_efer & EFER_LMA; | 689 | efer |= vcpu->arch.efer & EFER_LMA; |
637 | 690 | ||
638 | vcpu->arch.shadow_efer = efer; | 691 | vcpu->arch.efer = efer; |
639 | 692 | ||
640 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 693 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
641 | kvm_mmu_reset_context(vcpu); | 694 | kvm_mmu_reset_context(vcpu); |
@@ -957,6 +1010,100 @@ out: | |||
957 | return r; | 1010 | return r; |
958 | } | 1011 | } |
959 | 1012 | ||
1013 | static bool kvm_hv_hypercall_enabled(struct kvm *kvm) | ||
1014 | { | ||
1015 | return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; | ||
1016 | } | ||
1017 | |||
1018 | static bool kvm_hv_msr_partition_wide(u32 msr) | ||
1019 | { | ||
1020 | bool r = false; | ||
1021 | switch (msr) { | ||
1022 | case HV_X64_MSR_GUEST_OS_ID: | ||
1023 | case HV_X64_MSR_HYPERCALL: | ||
1024 | r = true; | ||
1025 | break; | ||
1026 | } | ||
1027 | |||
1028 | return r; | ||
1029 | } | ||
1030 | |||
1031 | static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
1032 | { | ||
1033 | struct kvm *kvm = vcpu->kvm; | ||
1034 | |||
1035 | switch (msr) { | ||
1036 | case HV_X64_MSR_GUEST_OS_ID: | ||
1037 | kvm->arch.hv_guest_os_id = data; | ||
1038 | /* setting guest os id to zero disables hypercall page */ | ||
1039 | if (!kvm->arch.hv_guest_os_id) | ||
1040 | kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; | ||
1041 | break; | ||
1042 | case HV_X64_MSR_HYPERCALL: { | ||
1043 | u64 gfn; | ||
1044 | unsigned long addr; | ||
1045 | u8 instructions[4]; | ||
1046 | |||
1047 | /* if guest os id is not set hypercall should remain disabled */ | ||
1048 | if (!kvm->arch.hv_guest_os_id) | ||
1049 | break; | ||
1050 | if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { | ||
1051 | kvm->arch.hv_hypercall = data; | ||
1052 | break; | ||
1053 | } | ||
1054 | gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; | ||
1055 | addr = gfn_to_hva(kvm, gfn); | ||
1056 | if (kvm_is_error_hva(addr)) | ||
1057 | return 1; | ||
1058 | kvm_x86_ops->patch_hypercall(vcpu, instructions); | ||
1059 | ((unsigned char *)instructions)[3] = 0xc3; /* ret */ | ||
1060 | if (copy_to_user((void __user *)addr, instructions, 4)) | ||
1061 | return 1; | ||
1062 | kvm->arch.hv_hypercall = data; | ||
1063 | break; | ||
1064 | } | ||
1065 | default: | ||
1066 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | ||
1067 | "data 0x%llx\n", msr, data); | ||
1068 | return 1; | ||
1069 | } | ||
1070 | return 0; | ||
1071 | } | ||
1072 | |||
1073 | static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
1074 | { | ||
1075 | switch (msr) { | ||
1076 | case HV_X64_MSR_APIC_ASSIST_PAGE: { | ||
1077 | unsigned long addr; | ||
1078 | |||
1079 | if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { | ||
1080 | vcpu->arch.hv_vapic = data; | ||
1081 | break; | ||
1082 | } | ||
1083 | addr = gfn_to_hva(vcpu->kvm, data >> | ||
1084 | HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); | ||
1085 | if (kvm_is_error_hva(addr)) | ||
1086 | return 1; | ||
1087 | if (clear_user((void __user *)addr, PAGE_SIZE)) | ||
1088 | return 1; | ||
1089 | vcpu->arch.hv_vapic = data; | ||
1090 | break; | ||
1091 | } | ||
1092 | case HV_X64_MSR_EOI: | ||
1093 | return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); | ||
1094 | case HV_X64_MSR_ICR: | ||
1095 | return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); | ||
1096 | case HV_X64_MSR_TPR: | ||
1097 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); | ||
1098 | default: | ||
1099 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | ||
1100 | "data 0x%llx\n", msr, data); | ||
1101 | return 1; | ||
1102 | } | ||
1103 | |||
1104 | return 0; | ||
1105 | } | ||
1106 | |||
960 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1107 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
961 | { | 1108 | { |
962 | switch (msr) { | 1109 | switch (msr) { |
@@ -1071,6 +1218,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1071 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " | 1218 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " |
1072 | "0x%x data 0x%llx\n", msr, data); | 1219 | "0x%x data 0x%llx\n", msr, data); |
1073 | break; | 1220 | break; |
1221 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | ||
1222 | if (kvm_hv_msr_partition_wide(msr)) { | ||
1223 | int r; | ||
1224 | mutex_lock(&vcpu->kvm->lock); | ||
1225 | r = set_msr_hyperv_pw(vcpu, msr, data); | ||
1226 | mutex_unlock(&vcpu->kvm->lock); | ||
1227 | return r; | ||
1228 | } else | ||
1229 | return set_msr_hyperv(vcpu, msr, data); | ||
1230 | break; | ||
1074 | default: | 1231 | default: |
1075 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1232 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1076 | return xen_hvm_config(vcpu, data); | 1233 | return xen_hvm_config(vcpu, data); |
@@ -1170,6 +1327,54 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1170 | return 0; | 1327 | return 0; |
1171 | } | 1328 | } |
1172 | 1329 | ||
1330 | static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
1331 | { | ||
1332 | u64 data = 0; | ||
1333 | struct kvm *kvm = vcpu->kvm; | ||
1334 | |||
1335 | switch (msr) { | ||
1336 | case HV_X64_MSR_GUEST_OS_ID: | ||
1337 | data = kvm->arch.hv_guest_os_id; | ||
1338 | break; | ||
1339 | case HV_X64_MSR_HYPERCALL: | ||
1340 | data = kvm->arch.hv_hypercall; | ||
1341 | break; | ||
1342 | default: | ||
1343 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | ||
1344 | return 1; | ||
1345 | } | ||
1346 | |||
1347 | *pdata = data; | ||
1348 | return 0; | ||
1349 | } | ||
1350 | |||
1351 | static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
1352 | { | ||
1353 | u64 data = 0; | ||
1354 | |||
1355 | switch (msr) { | ||
1356 | case HV_X64_MSR_VP_INDEX: { | ||
1357 | int r; | ||
1358 | struct kvm_vcpu *v; | ||
1359 | kvm_for_each_vcpu(r, v, vcpu->kvm) | ||
1360 | if (v == vcpu) | ||
1361 | data = r; | ||
1362 | break; | ||
1363 | } | ||
1364 | case HV_X64_MSR_EOI: | ||
1365 | return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); | ||
1366 | case HV_X64_MSR_ICR: | ||
1367 | return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); | ||
1368 | case HV_X64_MSR_TPR: | ||
1369 | return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); | ||
1370 | default: | ||
1371 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | ||
1372 | return 1; | ||
1373 | } | ||
1374 | *pdata = data; | ||
1375 | return 0; | ||
1376 | } | ||
1377 | |||
1173 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 1378 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
1174 | { | 1379 | { |
1175 | u64 data; | 1380 | u64 data; |
@@ -1221,7 +1426,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1221 | data |= (((uint64_t)4ULL) << 40); | 1426 | data |= (((uint64_t)4ULL) << 40); |
1222 | break; | 1427 | break; |
1223 | case MSR_EFER: | 1428 | case MSR_EFER: |
1224 | data = vcpu->arch.shadow_efer; | 1429 | data = vcpu->arch.efer; |
1225 | break; | 1430 | break; |
1226 | case MSR_KVM_WALL_CLOCK: | 1431 | case MSR_KVM_WALL_CLOCK: |
1227 | data = vcpu->kvm->arch.wall_clock; | 1432 | data = vcpu->kvm->arch.wall_clock; |
@@ -1236,6 +1441,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1236 | case MSR_IA32_MCG_STATUS: | 1441 | case MSR_IA32_MCG_STATUS: |
1237 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 1442 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: |
1238 | return get_msr_mce(vcpu, msr, pdata); | 1443 | return get_msr_mce(vcpu, msr, pdata); |
1444 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | ||
1445 | if (kvm_hv_msr_partition_wide(msr)) { | ||
1446 | int r; | ||
1447 | mutex_lock(&vcpu->kvm->lock); | ||
1448 | r = get_msr_hyperv_pw(vcpu, msr, pdata); | ||
1449 | mutex_unlock(&vcpu->kvm->lock); | ||
1450 | return r; | ||
1451 | } else | ||
1452 | return get_msr_hyperv(vcpu, msr, pdata); | ||
1453 | break; | ||
1239 | default: | 1454 | default: |
1240 | if (!ignore_msrs) { | 1455 | if (!ignore_msrs) { |
1241 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1456 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
@@ -1261,15 +1476,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, | |||
1261 | int (*do_msr)(struct kvm_vcpu *vcpu, | 1476 | int (*do_msr)(struct kvm_vcpu *vcpu, |
1262 | unsigned index, u64 *data)) | 1477 | unsigned index, u64 *data)) |
1263 | { | 1478 | { |
1264 | int i; | 1479 | int i, idx; |
1265 | 1480 | ||
1266 | vcpu_load(vcpu); | 1481 | vcpu_load(vcpu); |
1267 | 1482 | ||
1268 | down_read(&vcpu->kvm->slots_lock); | 1483 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
1269 | for (i = 0; i < msrs->nmsrs; ++i) | 1484 | for (i = 0; i < msrs->nmsrs; ++i) |
1270 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) | 1485 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) |
1271 | break; | 1486 | break; |
1272 | up_read(&vcpu->kvm->slots_lock); | 1487 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
1273 | 1488 | ||
1274 | vcpu_put(vcpu); | 1489 | vcpu_put(vcpu); |
1275 | 1490 | ||
@@ -1351,6 +1566,11 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1351 | case KVM_CAP_XEN_HVM: | 1566 | case KVM_CAP_XEN_HVM: |
1352 | case KVM_CAP_ADJUST_CLOCK: | 1567 | case KVM_CAP_ADJUST_CLOCK: |
1353 | case KVM_CAP_VCPU_EVENTS: | 1568 | case KVM_CAP_VCPU_EVENTS: |
1569 | case KVM_CAP_HYPERV: | ||
1570 | case KVM_CAP_HYPERV_VAPIC: | ||
1571 | case KVM_CAP_HYPERV_SPIN: | ||
1572 | case KVM_CAP_PCI_SEGMENT: | ||
1573 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | ||
1354 | r = 1; | 1574 | r = 1; |
1355 | break; | 1575 | break; |
1356 | case KVM_CAP_COALESCED_MMIO: | 1576 | case KVM_CAP_COALESCED_MMIO: |
@@ -1464,8 +1684,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1464 | 1684 | ||
1465 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 1685 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
1466 | { | 1686 | { |
1467 | kvm_x86_ops->vcpu_put(vcpu); | ||
1468 | kvm_put_guest_fpu(vcpu); | 1687 | kvm_put_guest_fpu(vcpu); |
1688 | kvm_x86_ops->vcpu_put(vcpu); | ||
1469 | } | 1689 | } |
1470 | 1690 | ||
1471 | static int is_efer_nx(void) | 1691 | static int is_efer_nx(void) |
@@ -1530,6 +1750,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1530 | cpuid_fix_nx_cap(vcpu); | 1750 | cpuid_fix_nx_cap(vcpu); |
1531 | r = 0; | 1751 | r = 0; |
1532 | kvm_apic_set_version(vcpu); | 1752 | kvm_apic_set_version(vcpu); |
1753 | kvm_x86_ops->cpuid_update(vcpu); | ||
1533 | 1754 | ||
1534 | out_free: | 1755 | out_free: |
1535 | vfree(cpuid_entries); | 1756 | vfree(cpuid_entries); |
@@ -1552,6 +1773,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
1552 | goto out; | 1773 | goto out; |
1553 | vcpu->arch.cpuid_nent = cpuid->nent; | 1774 | vcpu->arch.cpuid_nent = cpuid->nent; |
1554 | kvm_apic_set_version(vcpu); | 1775 | kvm_apic_set_version(vcpu); |
1776 | kvm_x86_ops->cpuid_update(vcpu); | ||
1555 | return 0; | 1777 | return 0; |
1556 | 1778 | ||
1557 | out: | 1779 | out: |
@@ -1594,12 +1816,15 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1594 | u32 index, int *nent, int maxnent) | 1816 | u32 index, int *nent, int maxnent) |
1595 | { | 1817 | { |
1596 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | 1818 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; |
1597 | unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0; | ||
1598 | #ifdef CONFIG_X86_64 | 1819 | #ifdef CONFIG_X86_64 |
1820 | unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) | ||
1821 | ? F(GBPAGES) : 0; | ||
1599 | unsigned f_lm = F(LM); | 1822 | unsigned f_lm = F(LM); |
1600 | #else | 1823 | #else |
1824 | unsigned f_gbpages = 0; | ||
1601 | unsigned f_lm = 0; | 1825 | unsigned f_lm = 0; |
1602 | #endif | 1826 | #endif |
1827 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | ||
1603 | 1828 | ||
1604 | /* cpuid 1.edx */ | 1829 | /* cpuid 1.edx */ |
1605 | const u32 kvm_supported_word0_x86_features = | 1830 | const u32 kvm_supported_word0_x86_features = |
@@ -1619,7 +1844,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1619 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | 1844 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | |
1620 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | 1845 | F(PAT) | F(PSE36) | 0 /* Reserved */ | |
1621 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | 1846 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | |
1622 | F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ | | 1847 | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | |
1623 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | 1848 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); |
1624 | /* cpuid 1.ecx */ | 1849 | /* cpuid 1.ecx */ |
1625 | const u32 kvm_supported_word4_x86_features = | 1850 | const u32 kvm_supported_word4_x86_features = |
@@ -1866,7 +2091,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
1866 | return 0; | 2091 | return 0; |
1867 | if (mce->status & MCI_STATUS_UC) { | 2092 | if (mce->status & MCI_STATUS_UC) { |
1868 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || | 2093 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || |
1869 | !(vcpu->arch.cr4 & X86_CR4_MCE)) { | 2094 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { |
1870 | printk(KERN_DEBUG "kvm: set_mce: " | 2095 | printk(KERN_DEBUG "kvm: set_mce: " |
1871 | "injects mce exception while " | 2096 | "injects mce exception while " |
1872 | "previous one is in progress!\n"); | 2097 | "previous one is in progress!\n"); |
@@ -2160,14 +2385,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
2160 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) | 2385 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) |
2161 | return -EINVAL; | 2386 | return -EINVAL; |
2162 | 2387 | ||
2163 | down_write(&kvm->slots_lock); | 2388 | mutex_lock(&kvm->slots_lock); |
2164 | spin_lock(&kvm->mmu_lock); | 2389 | spin_lock(&kvm->mmu_lock); |
2165 | 2390 | ||
2166 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 2391 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
2167 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 2392 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
2168 | 2393 | ||
2169 | spin_unlock(&kvm->mmu_lock); | 2394 | spin_unlock(&kvm->mmu_lock); |
2170 | up_write(&kvm->slots_lock); | 2395 | mutex_unlock(&kvm->slots_lock); |
2171 | return 0; | 2396 | return 0; |
2172 | } | 2397 | } |
2173 | 2398 | ||
@@ -2176,13 +2401,35 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) | |||
2176 | return kvm->arch.n_alloc_mmu_pages; | 2401 | return kvm->arch.n_alloc_mmu_pages; |
2177 | } | 2402 | } |
2178 | 2403 | ||
2404 | gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | ||
2405 | { | ||
2406 | int i; | ||
2407 | struct kvm_mem_alias *alias; | ||
2408 | struct kvm_mem_aliases *aliases; | ||
2409 | |||
2410 | aliases = rcu_dereference(kvm->arch.aliases); | ||
2411 | |||
2412 | for (i = 0; i < aliases->naliases; ++i) { | ||
2413 | alias = &aliases->aliases[i]; | ||
2414 | if (alias->flags & KVM_ALIAS_INVALID) | ||
2415 | continue; | ||
2416 | if (gfn >= alias->base_gfn | ||
2417 | && gfn < alias->base_gfn + alias->npages) | ||
2418 | return alias->target_gfn + gfn - alias->base_gfn; | ||
2419 | } | ||
2420 | return gfn; | ||
2421 | } | ||
2422 | |||
2179 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | 2423 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) |
2180 | { | 2424 | { |
2181 | int i; | 2425 | int i; |
2182 | struct kvm_mem_alias *alias; | 2426 | struct kvm_mem_alias *alias; |
2427 | struct kvm_mem_aliases *aliases; | ||
2183 | 2428 | ||
2184 | for (i = 0; i < kvm->arch.naliases; ++i) { | 2429 | aliases = rcu_dereference(kvm->arch.aliases); |
2185 | alias = &kvm->arch.aliases[i]; | 2430 | |
2431 | for (i = 0; i < aliases->naliases; ++i) { | ||
2432 | alias = &aliases->aliases[i]; | ||
2186 | if (gfn >= alias->base_gfn | 2433 | if (gfn >= alias->base_gfn |
2187 | && gfn < alias->base_gfn + alias->npages) | 2434 | && gfn < alias->base_gfn + alias->npages) |
2188 | return alias->target_gfn + gfn - alias->base_gfn; | 2435 | return alias->target_gfn + gfn - alias->base_gfn; |
@@ -2200,6 +2447,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
2200 | { | 2447 | { |
2201 | int r, n; | 2448 | int r, n; |
2202 | struct kvm_mem_alias *p; | 2449 | struct kvm_mem_alias *p; |
2450 | struct kvm_mem_aliases *aliases, *old_aliases; | ||
2203 | 2451 | ||
2204 | r = -EINVAL; | 2452 | r = -EINVAL; |
2205 | /* General sanity checks */ | 2453 | /* General sanity checks */ |
@@ -2216,26 +2464,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
2216 | < alias->target_phys_addr) | 2464 | < alias->target_phys_addr) |
2217 | goto out; | 2465 | goto out; |
2218 | 2466 | ||
2219 | down_write(&kvm->slots_lock); | 2467 | r = -ENOMEM; |
2220 | spin_lock(&kvm->mmu_lock); | 2468 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); |
2469 | if (!aliases) | ||
2470 | goto out; | ||
2471 | |||
2472 | mutex_lock(&kvm->slots_lock); | ||
2221 | 2473 | ||
2222 | p = &kvm->arch.aliases[alias->slot]; | 2474 | /* invalidate any gfn reference in case of deletion/shrinking */ |
2475 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2476 | aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; | ||
2477 | old_aliases = kvm->arch.aliases; | ||
2478 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
2479 | synchronize_srcu_expedited(&kvm->srcu); | ||
2480 | kvm_mmu_zap_all(kvm); | ||
2481 | kfree(old_aliases); | ||
2482 | |||
2483 | r = -ENOMEM; | ||
2484 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
2485 | if (!aliases) | ||
2486 | goto out_unlock; | ||
2487 | |||
2488 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2489 | |||
2490 | p = &aliases->aliases[alias->slot]; | ||
2223 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 2491 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
2224 | p->npages = alias->memory_size >> PAGE_SHIFT; | 2492 | p->npages = alias->memory_size >> PAGE_SHIFT; |
2225 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; | 2493 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; |
2494 | p->flags &= ~(KVM_ALIAS_INVALID); | ||
2226 | 2495 | ||
2227 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) | 2496 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) |
2228 | if (kvm->arch.aliases[n - 1].npages) | 2497 | if (aliases->aliases[n - 1].npages) |
2229 | break; | 2498 | break; |
2230 | kvm->arch.naliases = n; | 2499 | aliases->naliases = n; |
2231 | 2500 | ||
2232 | spin_unlock(&kvm->mmu_lock); | 2501 | old_aliases = kvm->arch.aliases; |
2233 | kvm_mmu_zap_all(kvm); | 2502 | rcu_assign_pointer(kvm->arch.aliases, aliases); |
2234 | 2503 | synchronize_srcu_expedited(&kvm->srcu); | |
2235 | up_write(&kvm->slots_lock); | 2504 | kfree(old_aliases); |
2236 | 2505 | r = 0; | |
2237 | return 0; | ||
2238 | 2506 | ||
2507 | out_unlock: | ||
2508 | mutex_unlock(&kvm->slots_lock); | ||
2239 | out: | 2509 | out: |
2240 | return r; | 2510 | return r; |
2241 | } | 2511 | } |
@@ -2273,18 +2543,18 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
2273 | r = 0; | 2543 | r = 0; |
2274 | switch (chip->chip_id) { | 2544 | switch (chip->chip_id) { |
2275 | case KVM_IRQCHIP_PIC_MASTER: | 2545 | case KVM_IRQCHIP_PIC_MASTER: |
2276 | spin_lock(&pic_irqchip(kvm)->lock); | 2546 | raw_spin_lock(&pic_irqchip(kvm)->lock); |
2277 | memcpy(&pic_irqchip(kvm)->pics[0], | 2547 | memcpy(&pic_irqchip(kvm)->pics[0], |
2278 | &chip->chip.pic, | 2548 | &chip->chip.pic, |
2279 | sizeof(struct kvm_pic_state)); | 2549 | sizeof(struct kvm_pic_state)); |
2280 | spin_unlock(&pic_irqchip(kvm)->lock); | 2550 | raw_spin_unlock(&pic_irqchip(kvm)->lock); |
2281 | break; | 2551 | break; |
2282 | case KVM_IRQCHIP_PIC_SLAVE: | 2552 | case KVM_IRQCHIP_PIC_SLAVE: |
2283 | spin_lock(&pic_irqchip(kvm)->lock); | 2553 | raw_spin_lock(&pic_irqchip(kvm)->lock); |
2284 | memcpy(&pic_irqchip(kvm)->pics[1], | 2554 | memcpy(&pic_irqchip(kvm)->pics[1], |
2285 | &chip->chip.pic, | 2555 | &chip->chip.pic, |
2286 | sizeof(struct kvm_pic_state)); | 2556 | sizeof(struct kvm_pic_state)); |
2287 | spin_unlock(&pic_irqchip(kvm)->lock); | 2557 | raw_spin_unlock(&pic_irqchip(kvm)->lock); |
2288 | break; | 2558 | break; |
2289 | case KVM_IRQCHIP_IOAPIC: | 2559 | case KVM_IRQCHIP_IOAPIC: |
2290 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); | 2560 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); |
@@ -2364,29 +2634,62 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
2364 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 2634 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
2365 | struct kvm_dirty_log *log) | 2635 | struct kvm_dirty_log *log) |
2366 | { | 2636 | { |
2367 | int r; | 2637 | int r, n, i; |
2368 | int n; | ||
2369 | struct kvm_memory_slot *memslot; | 2638 | struct kvm_memory_slot *memslot; |
2370 | int is_dirty = 0; | 2639 | unsigned long is_dirty = 0; |
2640 | unsigned long *dirty_bitmap = NULL; | ||
2371 | 2641 | ||
2372 | down_write(&kvm->slots_lock); | 2642 | mutex_lock(&kvm->slots_lock); |
2373 | 2643 | ||
2374 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | 2644 | r = -EINVAL; |
2375 | if (r) | 2645 | if (log->slot >= KVM_MEMORY_SLOTS) |
2646 | goto out; | ||
2647 | |||
2648 | memslot = &kvm->memslots->memslots[log->slot]; | ||
2649 | r = -ENOENT; | ||
2650 | if (!memslot->dirty_bitmap) | ||
2651 | goto out; | ||
2652 | |||
2653 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | ||
2654 | |||
2655 | r = -ENOMEM; | ||
2656 | dirty_bitmap = vmalloc(n); | ||
2657 | if (!dirty_bitmap) | ||
2376 | goto out; | 2658 | goto out; |
2659 | memset(dirty_bitmap, 0, n); | ||
2660 | |||
2661 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) | ||
2662 | is_dirty = memslot->dirty_bitmap[i]; | ||
2377 | 2663 | ||
2378 | /* If nothing is dirty, don't bother messing with page tables. */ | 2664 | /* If nothing is dirty, don't bother messing with page tables. */ |
2379 | if (is_dirty) { | 2665 | if (is_dirty) { |
2666 | struct kvm_memslots *slots, *old_slots; | ||
2667 | |||
2380 | spin_lock(&kvm->mmu_lock); | 2668 | spin_lock(&kvm->mmu_lock); |
2381 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 2669 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
2382 | spin_unlock(&kvm->mmu_lock); | 2670 | spin_unlock(&kvm->mmu_lock); |
2383 | memslot = &kvm->memslots[log->slot]; | 2671 | |
2384 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | 2672 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
2385 | memset(memslot->dirty_bitmap, 0, n); | 2673 | if (!slots) |
2674 | goto out_free; | ||
2675 | |||
2676 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
2677 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | ||
2678 | |||
2679 | old_slots = kvm->memslots; | ||
2680 | rcu_assign_pointer(kvm->memslots, slots); | ||
2681 | synchronize_srcu_expedited(&kvm->srcu); | ||
2682 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | ||
2683 | kfree(old_slots); | ||
2386 | } | 2684 | } |
2685 | |||
2387 | r = 0; | 2686 | r = 0; |
2687 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | ||
2688 | r = -EFAULT; | ||
2689 | out_free: | ||
2690 | vfree(dirty_bitmap); | ||
2388 | out: | 2691 | out: |
2389 | up_write(&kvm->slots_lock); | 2692 | mutex_unlock(&kvm->slots_lock); |
2390 | return r; | 2693 | return r; |
2391 | } | 2694 | } |
2392 | 2695 | ||
@@ -2469,6 +2772,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2469 | if (vpic) { | 2772 | if (vpic) { |
2470 | r = kvm_ioapic_init(kvm); | 2773 | r = kvm_ioapic_init(kvm); |
2471 | if (r) { | 2774 | if (r) { |
2775 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||
2776 | &vpic->dev); | ||
2472 | kfree(vpic); | 2777 | kfree(vpic); |
2473 | goto create_irqchip_unlock; | 2778 | goto create_irqchip_unlock; |
2474 | } | 2779 | } |
@@ -2480,10 +2785,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2480 | r = kvm_setup_default_irq_routing(kvm); | 2785 | r = kvm_setup_default_irq_routing(kvm); |
2481 | if (r) { | 2786 | if (r) { |
2482 | mutex_lock(&kvm->irq_lock); | 2787 | mutex_lock(&kvm->irq_lock); |
2483 | kfree(kvm->arch.vpic); | 2788 | kvm_ioapic_destroy(kvm); |
2484 | kfree(kvm->arch.vioapic); | 2789 | kvm_destroy_pic(kvm); |
2485 | kvm->arch.vpic = NULL; | ||
2486 | kvm->arch.vioapic = NULL; | ||
2487 | mutex_unlock(&kvm->irq_lock); | 2790 | mutex_unlock(&kvm->irq_lock); |
2488 | } | 2791 | } |
2489 | create_irqchip_unlock: | 2792 | create_irqchip_unlock: |
@@ -2499,7 +2802,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2499 | sizeof(struct kvm_pit_config))) | 2802 | sizeof(struct kvm_pit_config))) |
2500 | goto out; | 2803 | goto out; |
2501 | create_pit: | 2804 | create_pit: |
2502 | down_write(&kvm->slots_lock); | 2805 | mutex_lock(&kvm->slots_lock); |
2503 | r = -EEXIST; | 2806 | r = -EEXIST; |
2504 | if (kvm->arch.vpit) | 2807 | if (kvm->arch.vpit) |
2505 | goto create_pit_unlock; | 2808 | goto create_pit_unlock; |
@@ -2508,7 +2811,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2508 | if (kvm->arch.vpit) | 2811 | if (kvm->arch.vpit) |
2509 | r = 0; | 2812 | r = 0; |
2510 | create_pit_unlock: | 2813 | create_pit_unlock: |
2511 | up_write(&kvm->slots_lock); | 2814 | mutex_unlock(&kvm->slots_lock); |
2512 | break; | 2815 | break; |
2513 | case KVM_IRQ_LINE_STATUS: | 2816 | case KVM_IRQ_LINE_STATUS: |
2514 | case KVM_IRQ_LINE: { | 2817 | case KVM_IRQ_LINE: { |
@@ -2725,7 +3028,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | |||
2725 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) | 3028 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) |
2726 | return 0; | 3029 | return 0; |
2727 | 3030 | ||
2728 | return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v); | 3031 | return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
2729 | } | 3032 | } |
2730 | 3033 | ||
2731 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | 3034 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) |
@@ -2734,17 +3037,44 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
2734 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) | 3037 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) |
2735 | return 0; | 3038 | return 0; |
2736 | 3039 | ||
2737 | return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v); | 3040 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
2738 | } | 3041 | } |
2739 | 3042 | ||
2740 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3043 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) |
2741 | struct kvm_vcpu *vcpu) | 3044 | { |
3045 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3046 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3047 | } | ||
3048 | |||
3049 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3050 | { | ||
3051 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3052 | access |= PFERR_FETCH_MASK; | ||
3053 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3054 | } | ||
3055 | |||
3056 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3057 | { | ||
3058 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3059 | access |= PFERR_WRITE_MASK; | ||
3060 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3061 | } | ||
3062 | |||
3063 | /* uses this to access any guest's mapped memory without checking CPL */ | ||
3064 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3065 | { | ||
3066 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error); | ||
3067 | } | ||
3068 | |||
3069 | static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | ||
3070 | struct kvm_vcpu *vcpu, u32 access, | ||
3071 | u32 *error) | ||
2742 | { | 3072 | { |
2743 | void *data = val; | 3073 | void *data = val; |
2744 | int r = X86EMUL_CONTINUE; | 3074 | int r = X86EMUL_CONTINUE; |
2745 | 3075 | ||
2746 | while (bytes) { | 3076 | while (bytes) { |
2747 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3077 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error); |
2748 | unsigned offset = addr & (PAGE_SIZE-1); | 3078 | unsigned offset = addr & (PAGE_SIZE-1); |
2749 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); | 3079 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); |
2750 | int ret; | 3080 | int ret; |
@@ -2767,14 +3097,37 @@ out: | |||
2767 | return r; | 3097 | return r; |
2768 | } | 3098 | } |
2769 | 3099 | ||
3100 | /* used for instruction fetching */ | ||
3101 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, | ||
3102 | struct kvm_vcpu *vcpu, u32 *error) | ||
3103 | { | ||
3104 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3105 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, | ||
3106 | access | PFERR_FETCH_MASK, error); | ||
3107 | } | ||
3108 | |||
3109 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | ||
3110 | struct kvm_vcpu *vcpu, u32 *error) | ||
3111 | { | ||
3112 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3113 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, | ||
3114 | error); | ||
3115 | } | ||
3116 | |||
3117 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | ||
3118 | struct kvm_vcpu *vcpu, u32 *error) | ||
3119 | { | ||
3120 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | ||
3121 | } | ||
3122 | |||
2770 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3123 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, |
2771 | struct kvm_vcpu *vcpu) | 3124 | struct kvm_vcpu *vcpu, u32 *error) |
2772 | { | 3125 | { |
2773 | void *data = val; | 3126 | void *data = val; |
2774 | int r = X86EMUL_CONTINUE; | 3127 | int r = X86EMUL_CONTINUE; |
2775 | 3128 | ||
2776 | while (bytes) { | 3129 | while (bytes) { |
2777 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3130 | gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); |
2778 | unsigned offset = addr & (PAGE_SIZE-1); | 3131 | unsigned offset = addr & (PAGE_SIZE-1); |
2779 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3132 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
2780 | int ret; | 3133 | int ret; |
@@ -2804,6 +3157,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
2804 | struct kvm_vcpu *vcpu) | 3157 | struct kvm_vcpu *vcpu) |
2805 | { | 3158 | { |
2806 | gpa_t gpa; | 3159 | gpa_t gpa; |
3160 | u32 error_code; | ||
2807 | 3161 | ||
2808 | if (vcpu->mmio_read_completed) { | 3162 | if (vcpu->mmio_read_completed) { |
2809 | memcpy(val, vcpu->mmio_data, bytes); | 3163 | memcpy(val, vcpu->mmio_data, bytes); |
@@ -2813,17 +3167,20 @@ static int emulator_read_emulated(unsigned long addr, | |||
2813 | return X86EMUL_CONTINUE; | 3167 | return X86EMUL_CONTINUE; |
2814 | } | 3168 | } |
2815 | 3169 | ||
2816 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3170 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); |
3171 | |||
3172 | if (gpa == UNMAPPED_GVA) { | ||
3173 | kvm_inject_page_fault(vcpu, addr, error_code); | ||
3174 | return X86EMUL_PROPAGATE_FAULT; | ||
3175 | } | ||
2817 | 3176 | ||
2818 | /* For APIC access vmexit */ | 3177 | /* For APIC access vmexit */ |
2819 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3178 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
2820 | goto mmio; | 3179 | goto mmio; |
2821 | 3180 | ||
2822 | if (kvm_read_guest_virt(addr, val, bytes, vcpu) | 3181 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) |
2823 | == X86EMUL_CONTINUE) | 3182 | == X86EMUL_CONTINUE) |
2824 | return X86EMUL_CONTINUE; | 3183 | return X86EMUL_CONTINUE; |
2825 | if (gpa == UNMAPPED_GVA) | ||
2826 | return X86EMUL_PROPAGATE_FAULT; | ||
2827 | 3184 | ||
2828 | mmio: | 3185 | mmio: |
2829 | /* | 3186 | /* |
@@ -2862,11 +3219,12 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
2862 | struct kvm_vcpu *vcpu) | 3219 | struct kvm_vcpu *vcpu) |
2863 | { | 3220 | { |
2864 | gpa_t gpa; | 3221 | gpa_t gpa; |
3222 | u32 error_code; | ||
2865 | 3223 | ||
2866 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3224 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); |
2867 | 3225 | ||
2868 | if (gpa == UNMAPPED_GVA) { | 3226 | if (gpa == UNMAPPED_GVA) { |
2869 | kvm_inject_page_fault(vcpu, addr, 2); | 3227 | kvm_inject_page_fault(vcpu, addr, error_code); |
2870 | return X86EMUL_PROPAGATE_FAULT; | 3228 | return X86EMUL_PROPAGATE_FAULT; |
2871 | } | 3229 | } |
2872 | 3230 | ||
@@ -2930,7 +3288,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
2930 | char *kaddr; | 3288 | char *kaddr; |
2931 | u64 val; | 3289 | u64 val; |
2932 | 3290 | ||
2933 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3291 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); |
2934 | 3292 | ||
2935 | if (gpa == UNMAPPED_GVA || | 3293 | if (gpa == UNMAPPED_GVA || |
2936 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3294 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -2967,35 +3325,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | |||
2967 | 3325 | ||
2968 | int emulate_clts(struct kvm_vcpu *vcpu) | 3326 | int emulate_clts(struct kvm_vcpu *vcpu) |
2969 | { | 3327 | { |
2970 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); | 3328 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
3329 | kvm_x86_ops->fpu_activate(vcpu); | ||
2971 | return X86EMUL_CONTINUE; | 3330 | return X86EMUL_CONTINUE; |
2972 | } | 3331 | } |
2973 | 3332 | ||
2974 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3333 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
2975 | { | 3334 | { |
2976 | struct kvm_vcpu *vcpu = ctxt->vcpu; | 3335 | return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); |
2977 | |||
2978 | switch (dr) { | ||
2979 | case 0 ... 3: | ||
2980 | *dest = kvm_x86_ops->get_dr(vcpu, dr); | ||
2981 | return X86EMUL_CONTINUE; | ||
2982 | default: | ||
2983 | pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr); | ||
2984 | return X86EMUL_UNHANDLEABLE; | ||
2985 | } | ||
2986 | } | 3336 | } |
2987 | 3337 | ||
2988 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3338 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
2989 | { | 3339 | { |
2990 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3340 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
2991 | int exception; | ||
2992 | 3341 | ||
2993 | kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); | 3342 | return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); |
2994 | if (exception) { | ||
2995 | /* FIXME: better handling */ | ||
2996 | return X86EMUL_UNHANDLEABLE; | ||
2997 | } | ||
2998 | return X86EMUL_CONTINUE; | ||
2999 | } | 3343 | } |
3000 | 3344 | ||
3001 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3345 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
@@ -3009,7 +3353,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
3009 | 3353 | ||
3010 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); | 3354 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); |
3011 | 3355 | ||
3012 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); | 3356 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); |
3013 | 3357 | ||
3014 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", | 3358 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", |
3015 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); | 3359 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); |
@@ -3017,7 +3361,8 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
3017 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 3361 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
3018 | 3362 | ||
3019 | static struct x86_emulate_ops emulate_ops = { | 3363 | static struct x86_emulate_ops emulate_ops = { |
3020 | .read_std = kvm_read_guest_virt, | 3364 | .read_std = kvm_read_guest_virt_system, |
3365 | .fetch = kvm_fetch_guest_virt, | ||
3021 | .read_emulated = emulator_read_emulated, | 3366 | .read_emulated = emulator_read_emulated, |
3022 | .write_emulated = emulator_write_emulated, | 3367 | .write_emulated = emulator_write_emulated, |
3023 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 3368 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
@@ -3060,8 +3405,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3060 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3405 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
3061 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | 3406 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); |
3062 | vcpu->arch.emulate_ctxt.mode = | 3407 | vcpu->arch.emulate_ctxt.mode = |
3408 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
3063 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3409 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
3064 | ? X86EMUL_MODE_REAL : cs_l | 3410 | ? X86EMUL_MODE_VM86 : cs_l |
3065 | ? X86EMUL_MODE_PROT64 : cs_db | 3411 | ? X86EMUL_MODE_PROT64 : cs_db |
3066 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3412 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
3067 | 3413 | ||
@@ -3153,12 +3499,17 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) | |||
3153 | gva_t q = vcpu->arch.pio.guest_gva; | 3499 | gva_t q = vcpu->arch.pio.guest_gva; |
3154 | unsigned bytes; | 3500 | unsigned bytes; |
3155 | int ret; | 3501 | int ret; |
3502 | u32 error_code; | ||
3156 | 3503 | ||
3157 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; | 3504 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; |
3158 | if (vcpu->arch.pio.in) | 3505 | if (vcpu->arch.pio.in) |
3159 | ret = kvm_write_guest_virt(q, p, bytes, vcpu); | 3506 | ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); |
3160 | else | 3507 | else |
3161 | ret = kvm_read_guest_virt(q, p, bytes, vcpu); | 3508 | ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); |
3509 | |||
3510 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3511 | kvm_inject_page_fault(vcpu, q, error_code); | ||
3512 | |||
3162 | return ret; | 3513 | return ret; |
3163 | } | 3514 | } |
3164 | 3515 | ||
@@ -3179,7 +3530,7 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
3179 | if (io->in) { | 3530 | if (io->in) { |
3180 | r = pio_copy_data(vcpu); | 3531 | r = pio_copy_data(vcpu); |
3181 | if (r) | 3532 | if (r) |
3182 | return r; | 3533 | goto out; |
3183 | } | 3534 | } |
3184 | 3535 | ||
3185 | delta = 1; | 3536 | delta = 1; |
@@ -3206,7 +3557,7 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
3206 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | 3557 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); |
3207 | } | 3558 | } |
3208 | } | 3559 | } |
3209 | 3560 | out: | |
3210 | io->count -= io->cur_count; | 3561 | io->count -= io->cur_count; |
3211 | io->cur_count = 0; | 3562 | io->cur_count = 0; |
3212 | 3563 | ||
@@ -3219,11 +3570,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
3219 | int r; | 3570 | int r; |
3220 | 3571 | ||
3221 | if (vcpu->arch.pio.in) | 3572 | if (vcpu->arch.pio.in) |
3222 | r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, | 3573 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, |
3223 | vcpu->arch.pio.size, pd); | 3574 | vcpu->arch.pio.size, pd); |
3224 | else | 3575 | else |
3225 | r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, | 3576 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, |
3226 | vcpu->arch.pio.size, pd); | 3577 | vcpu->arch.pio.port, vcpu->arch.pio.size, |
3578 | pd); | ||
3227 | return r; | 3579 | return r; |
3228 | } | 3580 | } |
3229 | 3581 | ||
@@ -3234,7 +3586,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu) | |||
3234 | int i, r = 0; | 3586 | int i, r = 0; |
3235 | 3587 | ||
3236 | for (i = 0; i < io->cur_count; i++) { | 3588 | for (i = 0; i < io->cur_count; i++) { |
3237 | if (kvm_io_bus_write(&vcpu->kvm->pio_bus, | 3589 | if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, |
3238 | io->port, io->size, pd)) { | 3590 | io->port, io->size, pd)) { |
3239 | r = -EOPNOTSUPP; | 3591 | r = -EOPNOTSUPP; |
3240 | break; | 3592 | break; |
@@ -3248,6 +3600,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | |||
3248 | { | 3600 | { |
3249 | unsigned long val; | 3601 | unsigned long val; |
3250 | 3602 | ||
3603 | trace_kvm_pio(!in, port, size, 1); | ||
3604 | |||
3251 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3605 | vcpu->run->exit_reason = KVM_EXIT_IO; |
3252 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 3606 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
3253 | vcpu->run->io.size = vcpu->arch.pio.size = size; | 3607 | vcpu->run->io.size = vcpu->arch.pio.size = size; |
@@ -3259,11 +3613,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | |||
3259 | vcpu->arch.pio.down = 0; | 3613 | vcpu->arch.pio.down = 0; |
3260 | vcpu->arch.pio.rep = 0; | 3614 | vcpu->arch.pio.rep = 0; |
3261 | 3615 | ||
3262 | trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, | 3616 | if (!vcpu->arch.pio.in) { |
3263 | size, 1); | 3617 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3264 | 3618 | memcpy(vcpu->arch.pio_data, &val, 4); | |
3265 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 3619 | } |
3266 | memcpy(vcpu->arch.pio_data, &val, 4); | ||
3267 | 3620 | ||
3268 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | 3621 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { |
3269 | complete_pio(vcpu); | 3622 | complete_pio(vcpu); |
@@ -3280,6 +3633,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | |||
3280 | unsigned now, in_page; | 3633 | unsigned now, in_page; |
3281 | int ret = 0; | 3634 | int ret = 0; |
3282 | 3635 | ||
3636 | trace_kvm_pio(!in, port, size, count); | ||
3637 | |||
3283 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3638 | vcpu->run->exit_reason = KVM_EXIT_IO; |
3284 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 3639 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
3285 | vcpu->run->io.size = vcpu->arch.pio.size = size; | 3640 | vcpu->run->io.size = vcpu->arch.pio.size = size; |
@@ -3291,9 +3646,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | |||
3291 | vcpu->arch.pio.down = down; | 3646 | vcpu->arch.pio.down = down; |
3292 | vcpu->arch.pio.rep = rep; | 3647 | vcpu->arch.pio.rep = rep; |
3293 | 3648 | ||
3294 | trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, | ||
3295 | size, count); | ||
3296 | |||
3297 | if (!count) { | 3649 | if (!count) { |
3298 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 3650 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
3299 | return 1; | 3651 | return 1; |
@@ -3325,10 +3677,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | |||
3325 | if (!vcpu->arch.pio.in) { | 3677 | if (!vcpu->arch.pio.in) { |
3326 | /* string PIO write */ | 3678 | /* string PIO write */ |
3327 | ret = pio_copy_data(vcpu); | 3679 | ret = pio_copy_data(vcpu); |
3328 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 3680 | if (ret == X86EMUL_PROPAGATE_FAULT) |
3329 | kvm_inject_gp(vcpu, 0); | ||
3330 | return 1; | 3681 | return 1; |
3331 | } | ||
3332 | if (ret == 0 && !pio_string_write(vcpu)) { | 3682 | if (ret == 0 && !pio_string_write(vcpu)) { |
3333 | complete_pio(vcpu); | 3683 | complete_pio(vcpu); |
3334 | if (vcpu->arch.pio.count == 0) | 3684 | if (vcpu->arch.pio.count == 0) |
@@ -3487,11 +3837,76 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, | |||
3487 | return a0 | ((gpa_t)a1 << 32); | 3837 | return a0 | ((gpa_t)a1 << 32); |
3488 | } | 3838 | } |
3489 | 3839 | ||
3840 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | ||
3841 | { | ||
3842 | u64 param, ingpa, outgpa, ret; | ||
3843 | uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; | ||
3844 | bool fast, longmode; | ||
3845 | int cs_db, cs_l; | ||
3846 | |||
3847 | /* | ||
3848 | * hypercall generates UD from non zero cpl and real mode | ||
3849 | * per HYPER-V spec | ||
3850 | */ | ||
3851 | if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { | ||
3852 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3853 | return 0; | ||
3854 | } | ||
3855 | |||
3856 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | ||
3857 | longmode = is_long_mode(vcpu) && cs_l == 1; | ||
3858 | |||
3859 | if (!longmode) { | ||
3860 | param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | | ||
3861 | (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); | ||
3862 | ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | | ||
3863 | (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); | ||
3864 | outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | | ||
3865 | (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); | ||
3866 | } | ||
3867 | #ifdef CONFIG_X86_64 | ||
3868 | else { | ||
3869 | param = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
3870 | ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
3871 | outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); | ||
3872 | } | ||
3873 | #endif | ||
3874 | |||
3875 | code = param & 0xffff; | ||
3876 | fast = (param >> 16) & 0x1; | ||
3877 | rep_cnt = (param >> 32) & 0xfff; | ||
3878 | rep_idx = (param >> 48) & 0xfff; | ||
3879 | |||
3880 | trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); | ||
3881 | |||
3882 | switch (code) { | ||
3883 | case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: | ||
3884 | kvm_vcpu_on_spin(vcpu); | ||
3885 | break; | ||
3886 | default: | ||
3887 | res = HV_STATUS_INVALID_HYPERCALL_CODE; | ||
3888 | break; | ||
3889 | } | ||
3890 | |||
3891 | ret = res | (((u64)rep_done & 0xfff) << 32); | ||
3892 | if (longmode) { | ||
3893 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); | ||
3894 | } else { | ||
3895 | kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); | ||
3896 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); | ||
3897 | } | ||
3898 | |||
3899 | return 1; | ||
3900 | } | ||
3901 | |||
3490 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 3902 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
3491 | { | 3903 | { |
3492 | unsigned long nr, a0, a1, a2, a3, ret; | 3904 | unsigned long nr, a0, a1, a2, a3, ret; |
3493 | int r = 1; | 3905 | int r = 1; |
3494 | 3906 | ||
3907 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) | ||
3908 | return kvm_hv_hypercall(vcpu); | ||
3909 | |||
3495 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); | 3910 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3496 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); | 3911 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); |
3497 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); | 3912 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); |
@@ -3534,10 +3949,8 @@ EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); | |||
3534 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | 3949 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) |
3535 | { | 3950 | { |
3536 | char instruction[3]; | 3951 | char instruction[3]; |
3537 | int ret = 0; | ||
3538 | unsigned long rip = kvm_rip_read(vcpu); | 3952 | unsigned long rip = kvm_rip_read(vcpu); |
3539 | 3953 | ||
3540 | |||
3541 | /* | 3954 | /* |
3542 | * Blow out the MMU to ensure that no other VCPU has an active mapping | 3955 | * Blow out the MMU to ensure that no other VCPU has an active mapping |
3543 | * to ensure that the updated hypercall appears atomically across all | 3956 | * to ensure that the updated hypercall appears atomically across all |
@@ -3546,11 +3959,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
3546 | kvm_mmu_zap_all(vcpu->kvm); | 3959 | kvm_mmu_zap_all(vcpu->kvm); |
3547 | 3960 | ||
3548 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 3961 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
3549 | if (emulator_write_emulated(rip, instruction, 3, vcpu) | ||
3550 | != X86EMUL_CONTINUE) | ||
3551 | ret = -EFAULT; | ||
3552 | 3962 | ||
3553 | return ret; | 3963 | return emulator_write_emulated(rip, instruction, 3, vcpu); |
3554 | } | 3964 | } |
3555 | 3965 | ||
3556 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | 3966 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) |
@@ -3583,10 +3993,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | |||
3583 | { | 3993 | { |
3584 | unsigned long value; | 3994 | unsigned long value; |
3585 | 3995 | ||
3586 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | ||
3587 | switch (cr) { | 3996 | switch (cr) { |
3588 | case 0: | 3997 | case 0: |
3589 | value = vcpu->arch.cr0; | 3998 | value = kvm_read_cr0(vcpu); |
3590 | break; | 3999 | break; |
3591 | case 2: | 4000 | case 2: |
3592 | value = vcpu->arch.cr2; | 4001 | value = vcpu->arch.cr2; |
@@ -3595,7 +4004,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | |||
3595 | value = vcpu->arch.cr3; | 4004 | value = vcpu->arch.cr3; |
3596 | break; | 4005 | break; |
3597 | case 4: | 4006 | case 4: |
3598 | value = vcpu->arch.cr4; | 4007 | value = kvm_read_cr4(vcpu); |
3599 | break; | 4008 | break; |
3600 | case 8: | 4009 | case 8: |
3601 | value = kvm_get_cr8(vcpu); | 4010 | value = kvm_get_cr8(vcpu); |
@@ -3613,7 +4022,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
3613 | { | 4022 | { |
3614 | switch (cr) { | 4023 | switch (cr) { |
3615 | case 0: | 4024 | case 0: |
3616 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); | 4025 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); |
3617 | *rflags = kvm_get_rflags(vcpu); | 4026 | *rflags = kvm_get_rflags(vcpu); |
3618 | break; | 4027 | break; |
3619 | case 2: | 4028 | case 2: |
@@ -3623,7 +4032,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
3623 | kvm_set_cr3(vcpu, val); | 4032 | kvm_set_cr3(vcpu, val); |
3624 | break; | 4033 | break; |
3625 | case 4: | 4034 | case 4: |
3626 | kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); | 4035 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); |
3627 | break; | 4036 | break; |
3628 | case 8: | 4037 | case 8: |
3629 | kvm_set_cr8(vcpu, val & 0xfUL); | 4038 | kvm_set_cr8(vcpu, val & 0xfUL); |
@@ -3690,6 +4099,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | |||
3690 | } | 4099 | } |
3691 | return best; | 4100 | return best; |
3692 | } | 4101 | } |
4102 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | ||
3693 | 4103 | ||
3694 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | 4104 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) |
3695 | { | 4105 | { |
@@ -3773,14 +4183,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu) | |||
3773 | static void vapic_exit(struct kvm_vcpu *vcpu) | 4183 | static void vapic_exit(struct kvm_vcpu *vcpu) |
3774 | { | 4184 | { |
3775 | struct kvm_lapic *apic = vcpu->arch.apic; | 4185 | struct kvm_lapic *apic = vcpu->arch.apic; |
4186 | int idx; | ||
3776 | 4187 | ||
3777 | if (!apic || !apic->vapic_addr) | 4188 | if (!apic || !apic->vapic_addr) |
3778 | return; | 4189 | return; |
3779 | 4190 | ||
3780 | down_read(&vcpu->kvm->slots_lock); | 4191 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
3781 | kvm_release_page_dirty(apic->vapic_page); | 4192 | kvm_release_page_dirty(apic->vapic_page); |
3782 | mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); | 4193 | mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); |
3783 | up_read(&vcpu->kvm->slots_lock); | 4194 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
3784 | } | 4195 | } |
3785 | 4196 | ||
3786 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | 4197 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) |
@@ -3876,12 +4287,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
3876 | r = 0; | 4287 | r = 0; |
3877 | goto out; | 4288 | goto out; |
3878 | } | 4289 | } |
4290 | if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { | ||
4291 | vcpu->fpu_active = 0; | ||
4292 | kvm_x86_ops->fpu_deactivate(vcpu); | ||
4293 | } | ||
3879 | } | 4294 | } |
3880 | 4295 | ||
3881 | preempt_disable(); | 4296 | preempt_disable(); |
3882 | 4297 | ||
3883 | kvm_x86_ops->prepare_guest_switch(vcpu); | 4298 | kvm_x86_ops->prepare_guest_switch(vcpu); |
3884 | kvm_load_guest_fpu(vcpu); | 4299 | if (vcpu->fpu_active) |
4300 | kvm_load_guest_fpu(vcpu); | ||
3885 | 4301 | ||
3886 | local_irq_disable(); | 4302 | local_irq_disable(); |
3887 | 4303 | ||
@@ -3909,7 +4325,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
3909 | kvm_lapic_sync_to_vapic(vcpu); | 4325 | kvm_lapic_sync_to_vapic(vcpu); |
3910 | } | 4326 | } |
3911 | 4327 | ||
3912 | up_read(&vcpu->kvm->slots_lock); | 4328 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
3913 | 4329 | ||
3914 | kvm_guest_enter(); | 4330 | kvm_guest_enter(); |
3915 | 4331 | ||
@@ -3951,7 +4367,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
3951 | 4367 | ||
3952 | preempt_enable(); | 4368 | preempt_enable(); |
3953 | 4369 | ||
3954 | down_read(&vcpu->kvm->slots_lock); | 4370 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
3955 | 4371 | ||
3956 | /* | 4372 | /* |
3957 | * Profile KVM exit RIPs: | 4373 | * Profile KVM exit RIPs: |
@@ -3973,6 +4389,7 @@ out: | |||
3973 | static int __vcpu_run(struct kvm_vcpu *vcpu) | 4389 | static int __vcpu_run(struct kvm_vcpu *vcpu) |
3974 | { | 4390 | { |
3975 | int r; | 4391 | int r; |
4392 | struct kvm *kvm = vcpu->kvm; | ||
3976 | 4393 | ||
3977 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { | 4394 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { |
3978 | pr_debug("vcpu %d received sipi with vector # %x\n", | 4395 | pr_debug("vcpu %d received sipi with vector # %x\n", |
@@ -3984,7 +4401,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
3984 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 4401 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
3985 | } | 4402 | } |
3986 | 4403 | ||
3987 | down_read(&vcpu->kvm->slots_lock); | 4404 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
3988 | vapic_enter(vcpu); | 4405 | vapic_enter(vcpu); |
3989 | 4406 | ||
3990 | r = 1; | 4407 | r = 1; |
@@ -3992,9 +4409,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
3992 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 4409 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
3993 | r = vcpu_enter_guest(vcpu); | 4410 | r = vcpu_enter_guest(vcpu); |
3994 | else { | 4411 | else { |
3995 | up_read(&vcpu->kvm->slots_lock); | 4412 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
3996 | kvm_vcpu_block(vcpu); | 4413 | kvm_vcpu_block(vcpu); |
3997 | down_read(&vcpu->kvm->slots_lock); | 4414 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
3998 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | 4415 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) |
3999 | { | 4416 | { |
4000 | switch(vcpu->arch.mp_state) { | 4417 | switch(vcpu->arch.mp_state) { |
@@ -4029,13 +4446,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4029 | ++vcpu->stat.signal_exits; | 4446 | ++vcpu->stat.signal_exits; |
4030 | } | 4447 | } |
4031 | if (need_resched()) { | 4448 | if (need_resched()) { |
4032 | up_read(&vcpu->kvm->slots_lock); | 4449 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4033 | kvm_resched(vcpu); | 4450 | kvm_resched(vcpu); |
4034 | down_read(&vcpu->kvm->slots_lock); | 4451 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
4035 | } | 4452 | } |
4036 | } | 4453 | } |
4037 | 4454 | ||
4038 | up_read(&vcpu->kvm->slots_lock); | 4455 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4039 | post_kvm_run_save(vcpu); | 4456 | post_kvm_run_save(vcpu); |
4040 | 4457 | ||
4041 | vapic_exit(vcpu); | 4458 | vapic_exit(vcpu); |
@@ -4074,10 +4491,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4074 | vcpu->mmio_read_completed = 1; | 4491 | vcpu->mmio_read_completed = 1; |
4075 | vcpu->mmio_needed = 0; | 4492 | vcpu->mmio_needed = 0; |
4076 | 4493 | ||
4077 | down_read(&vcpu->kvm->slots_lock); | 4494 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
4078 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, | 4495 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, |
4079 | EMULTYPE_NO_DECODE); | 4496 | EMULTYPE_NO_DECODE); |
4080 | up_read(&vcpu->kvm->slots_lock); | 4497 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
4081 | if (r == EMULATE_DO_MMIO) { | 4498 | if (r == EMULATE_DO_MMIO) { |
4082 | /* | 4499 | /* |
4083 | * Read-modify-write. Back to userspace. | 4500 | * Read-modify-write. Back to userspace. |
@@ -4204,13 +4621,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4204 | sregs->gdt.limit = dt.limit; | 4621 | sregs->gdt.limit = dt.limit; |
4205 | sregs->gdt.base = dt.base; | 4622 | sregs->gdt.base = dt.base; |
4206 | 4623 | ||
4207 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 4624 | sregs->cr0 = kvm_read_cr0(vcpu); |
4208 | sregs->cr0 = vcpu->arch.cr0; | ||
4209 | sregs->cr2 = vcpu->arch.cr2; | 4625 | sregs->cr2 = vcpu->arch.cr2; |
4210 | sregs->cr3 = vcpu->arch.cr3; | 4626 | sregs->cr3 = vcpu->arch.cr3; |
4211 | sregs->cr4 = vcpu->arch.cr4; | 4627 | sregs->cr4 = kvm_read_cr4(vcpu); |
4212 | sregs->cr8 = kvm_get_cr8(vcpu); | 4628 | sregs->cr8 = kvm_get_cr8(vcpu); |
4213 | sregs->efer = vcpu->arch.shadow_efer; | 4629 | sregs->efer = vcpu->arch.efer; |
4214 | sregs->apic_base = kvm_get_apic_base(vcpu); | 4630 | sregs->apic_base = kvm_get_apic_base(vcpu); |
4215 | 4631 | ||
4216 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); | 4632 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); |
@@ -4298,14 +4714,23 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
4298 | { | 4714 | { |
4299 | struct descriptor_table dtable; | 4715 | struct descriptor_table dtable; |
4300 | u16 index = selector >> 3; | 4716 | u16 index = selector >> 3; |
4717 | int ret; | ||
4718 | u32 err; | ||
4719 | gva_t addr; | ||
4301 | 4720 | ||
4302 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | 4721 | get_segment_descriptor_dtable(vcpu, selector, &dtable); |
4303 | 4722 | ||
4304 | if (dtable.limit < index * 8 + 7) { | 4723 | if (dtable.limit < index * 8 + 7) { |
4305 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | 4724 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); |
4306 | return 1; | 4725 | return X86EMUL_PROPAGATE_FAULT; |
4307 | } | 4726 | } |
4308 | return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); | 4727 | addr = dtable.base + index * 8; |
4728 | ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), | ||
4729 | vcpu, &err); | ||
4730 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
4731 | kvm_inject_page_fault(vcpu, addr, err); | ||
4732 | |||
4733 | return ret; | ||
4309 | } | 4734 | } |
4310 | 4735 | ||
4311 | /* allowed just for 8 bytes segments */ | 4736 | /* allowed just for 8 bytes segments */ |
@@ -4319,15 +4744,23 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
4319 | 4744 | ||
4320 | if (dtable.limit < index * 8 + 7) | 4745 | if (dtable.limit < index * 8 + 7) |
4321 | return 1; | 4746 | return 1; |
4322 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); | 4747 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); |
4748 | } | ||
4749 | |||
4750 | static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, | ||
4751 | struct desc_struct *seg_desc) | ||
4752 | { | ||
4753 | u32 base_addr = get_desc_base(seg_desc); | ||
4754 | |||
4755 | return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); | ||
4323 | } | 4756 | } |
4324 | 4757 | ||
4325 | static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, | 4758 | static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, |
4326 | struct desc_struct *seg_desc) | 4759 | struct desc_struct *seg_desc) |
4327 | { | 4760 | { |
4328 | u32 base_addr = get_desc_base(seg_desc); | 4761 | u32 base_addr = get_desc_base(seg_desc); |
4329 | 4762 | ||
4330 | return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); | 4763 | return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); |
4331 | } | 4764 | } |
4332 | 4765 | ||
4333 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | 4766 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) |
@@ -4338,18 +4771,6 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | |||
4338 | return kvm_seg.selector; | 4771 | return kvm_seg.selector; |
4339 | } | 4772 | } |
4340 | 4773 | ||
4341 | static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, | ||
4342 | u16 selector, | ||
4343 | struct kvm_segment *kvm_seg) | ||
4344 | { | ||
4345 | struct desc_struct seg_desc; | ||
4346 | |||
4347 | if (load_guest_segment_descriptor(vcpu, selector, &seg_desc)) | ||
4348 | return 1; | ||
4349 | seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg); | ||
4350 | return 0; | ||
4351 | } | ||
4352 | |||
4353 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | 4774 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) |
4354 | { | 4775 | { |
4355 | struct kvm_segment segvar = { | 4776 | struct kvm_segment segvar = { |
@@ -4367,7 +4788,7 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se | |||
4367 | .unusable = 0, | 4788 | .unusable = 0, |
4368 | }; | 4789 | }; |
4369 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | 4790 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); |
4370 | return 0; | 4791 | return X86EMUL_CONTINUE; |
4371 | } | 4792 | } |
4372 | 4793 | ||
4373 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | 4794 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) |
@@ -4377,24 +4798,112 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | |||
4377 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); | 4798 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); |
4378 | } | 4799 | } |
4379 | 4800 | ||
4380 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 4801 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) |
4381 | int type_bits, int seg) | ||
4382 | { | 4802 | { |
4383 | struct kvm_segment kvm_seg; | 4803 | struct kvm_segment kvm_seg; |
4804 | struct desc_struct seg_desc; | ||
4805 | u8 dpl, rpl, cpl; | ||
4806 | unsigned err_vec = GP_VECTOR; | ||
4807 | u32 err_code = 0; | ||
4808 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
4809 | int ret; | ||
4384 | 4810 | ||
4385 | if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE)) | 4811 | if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) |
4386 | return kvm_load_realmode_segment(vcpu, selector, seg); | 4812 | return kvm_load_realmode_segment(vcpu, selector, seg); |
4387 | if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) | ||
4388 | return 1; | ||
4389 | kvm_seg.type |= type_bits; | ||
4390 | 4813 | ||
4391 | if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && | 4814 | /* NULL selector is not valid for TR, CS and SS */ |
4392 | seg != VCPU_SREG_LDTR) | 4815 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) |
4393 | if (!kvm_seg.s) | 4816 | && null_selector) |
4394 | kvm_seg.unusable = 1; | 4817 | goto exception; |
4818 | |||
4819 | /* TR should be in GDT only */ | ||
4820 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | ||
4821 | goto exception; | ||
4822 | |||
4823 | ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4824 | if (ret) | ||
4825 | return ret; | ||
4826 | |||
4827 | seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); | ||
4828 | |||
4829 | if (null_selector) { /* for NULL selector skip all following checks */ | ||
4830 | kvm_seg.unusable = 1; | ||
4831 | goto load; | ||
4832 | } | ||
4833 | |||
4834 | err_code = selector & 0xfffc; | ||
4835 | err_vec = GP_VECTOR; | ||
4395 | 4836 | ||
4837 | /* can't load system descriptor into segment selecor */ | ||
4838 | if (seg <= VCPU_SREG_GS && !kvm_seg.s) | ||
4839 | goto exception; | ||
4840 | |||
4841 | if (!kvm_seg.present) { | ||
4842 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
4843 | goto exception; | ||
4844 | } | ||
4845 | |||
4846 | rpl = selector & 3; | ||
4847 | dpl = kvm_seg.dpl; | ||
4848 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
4849 | |||
4850 | switch (seg) { | ||
4851 | case VCPU_SREG_SS: | ||
4852 | /* | ||
4853 | * segment is not a writable data segment or segment | ||
4854 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
4855 | */ | ||
4856 | if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) | ||
4857 | goto exception; | ||
4858 | break; | ||
4859 | case VCPU_SREG_CS: | ||
4860 | if (!(kvm_seg.type & 8)) | ||
4861 | goto exception; | ||
4862 | |||
4863 | if (kvm_seg.type & 4) { | ||
4864 | /* conforming */ | ||
4865 | if (dpl > cpl) | ||
4866 | goto exception; | ||
4867 | } else { | ||
4868 | /* nonconforming */ | ||
4869 | if (rpl > cpl || dpl != cpl) | ||
4870 | goto exception; | ||
4871 | } | ||
4872 | /* CS(RPL) <- CPL */ | ||
4873 | selector = (selector & 0xfffc) | cpl; | ||
4874 | break; | ||
4875 | case VCPU_SREG_TR: | ||
4876 | if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) | ||
4877 | goto exception; | ||
4878 | break; | ||
4879 | case VCPU_SREG_LDTR: | ||
4880 | if (kvm_seg.s || kvm_seg.type != 2) | ||
4881 | goto exception; | ||
4882 | break; | ||
4883 | default: /* DS, ES, FS, or GS */ | ||
4884 | /* | ||
4885 | * segment is not a data or readable code segment or | ||
4886 | * ((segment is a data or nonconforming code segment) | ||
4887 | * and (both RPL and CPL > DPL)) | ||
4888 | */ | ||
4889 | if ((kvm_seg.type & 0xa) == 0x8 || | ||
4890 | (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) | ||
4891 | goto exception; | ||
4892 | break; | ||
4893 | } | ||
4894 | |||
4895 | if (!kvm_seg.unusable && kvm_seg.s) { | ||
4896 | /* mark segment as accessed */ | ||
4897 | kvm_seg.type |= 1; | ||
4898 | seg_desc.type |= 1; | ||
4899 | save_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4900 | } | ||
4901 | load: | ||
4396 | kvm_set_segment(vcpu, &kvm_seg, seg); | 4902 | kvm_set_segment(vcpu, &kvm_seg, seg); |
4397 | return 0; | 4903 | return X86EMUL_CONTINUE; |
4904 | exception: | ||
4905 | kvm_queue_exception_e(vcpu, err_vec, err_code); | ||
4906 | return X86EMUL_PROPAGATE_FAULT; | ||
4398 | } | 4907 | } |
4399 | 4908 | ||
4400 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | 4909 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, |
@@ -4420,6 +4929,14 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
4420 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | 4929 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); |
4421 | } | 4930 | } |
4422 | 4931 | ||
4932 | static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) | ||
4933 | { | ||
4934 | struct kvm_segment kvm_seg; | ||
4935 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4936 | kvm_seg.selector = sel; | ||
4937 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4938 | } | ||
4939 | |||
4423 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | 4940 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, |
4424 | struct tss_segment_32 *tss) | 4941 | struct tss_segment_32 *tss) |
4425 | { | 4942 | { |
@@ -4437,25 +4954,41 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
4437 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); | 4954 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); |
4438 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); | 4955 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); |
4439 | 4956 | ||
4440 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) | 4957 | /* |
4958 | * SDM says that segment selectors are loaded before segment | ||
4959 | * descriptors | ||
4960 | */ | ||
4961 | kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); | ||
4962 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
4963 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
4964 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
4965 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
4966 | kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); | ||
4967 | kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); | ||
4968 | |||
4969 | /* | ||
4970 | * Now load segment descriptors. If fault happenes at this stage | ||
4971 | * it is handled in a context of new task | ||
4972 | */ | ||
4973 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) | ||
4441 | return 1; | 4974 | return 1; |
4442 | 4975 | ||
4443 | if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | 4976 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) |
4444 | return 1; | 4977 | return 1; |
4445 | 4978 | ||
4446 | if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | 4979 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) |
4447 | return 1; | 4980 | return 1; |
4448 | 4981 | ||
4449 | if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | 4982 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) |
4450 | return 1; | 4983 | return 1; |
4451 | 4984 | ||
4452 | if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | 4985 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) |
4453 | return 1; | 4986 | return 1; |
4454 | 4987 | ||
4455 | if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) | 4988 | if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) |
4456 | return 1; | 4989 | return 1; |
4457 | 4990 | ||
4458 | if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) | 4991 | if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) |
4459 | return 1; | 4992 | return 1; |
4460 | return 0; | 4993 | return 0; |
4461 | } | 4994 | } |
@@ -4495,19 +5028,33 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, | |||
4495 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); | 5028 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); |
4496 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); | 5029 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); |
4497 | 5030 | ||
4498 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) | 5031 | /* |
5032 | * SDM says that segment selectors are loaded before segment | ||
5033 | * descriptors | ||
5034 | */ | ||
5035 | kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); | ||
5036 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
5037 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
5038 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5039 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5040 | |||
5041 | /* | ||
5042 | * Now load segment descriptors. If fault happenes at this stage | ||
5043 | * it is handled in a context of new task | ||
5044 | */ | ||
5045 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) | ||
4499 | return 1; | 5046 | return 1; |
4500 | 5047 | ||
4501 | if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | 5048 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) |
4502 | return 1; | 5049 | return 1; |
4503 | 5050 | ||
4504 | if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | 5051 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) |
4505 | return 1; | 5052 | return 1; |
4506 | 5053 | ||
4507 | if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | 5054 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) |
4508 | return 1; | 5055 | return 1; |
4509 | 5056 | ||
4510 | if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | 5057 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) |
4511 | return 1; | 5058 | return 1; |
4512 | return 0; | 5059 | return 0; |
4513 | } | 5060 | } |
@@ -4529,7 +5076,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4529 | sizeof tss_segment_16)) | 5076 | sizeof tss_segment_16)) |
4530 | goto out; | 5077 | goto out; |
4531 | 5078 | ||
4532 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | 5079 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), |
4533 | &tss_segment_16, sizeof tss_segment_16)) | 5080 | &tss_segment_16, sizeof tss_segment_16)) |
4534 | goto out; | 5081 | goto out; |
4535 | 5082 | ||
@@ -4537,7 +5084,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4537 | tss_segment_16.prev_task_link = old_tss_sel; | 5084 | tss_segment_16.prev_task_link = old_tss_sel; |
4538 | 5085 | ||
4539 | if (kvm_write_guest(vcpu->kvm, | 5086 | if (kvm_write_guest(vcpu->kvm, |
4540 | get_tss_base_addr(vcpu, nseg_desc), | 5087 | get_tss_base_addr_write(vcpu, nseg_desc), |
4541 | &tss_segment_16.prev_task_link, | 5088 | &tss_segment_16.prev_task_link, |
4542 | sizeof tss_segment_16.prev_task_link)) | 5089 | sizeof tss_segment_16.prev_task_link)) |
4543 | goto out; | 5090 | goto out; |
@@ -4568,7 +5115,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4568 | sizeof tss_segment_32)) | 5115 | sizeof tss_segment_32)) |
4569 | goto out; | 5116 | goto out; |
4570 | 5117 | ||
4571 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | 5118 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), |
4572 | &tss_segment_32, sizeof tss_segment_32)) | 5119 | &tss_segment_32, sizeof tss_segment_32)) |
4573 | goto out; | 5120 | goto out; |
4574 | 5121 | ||
@@ -4576,7 +5123,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4576 | tss_segment_32.prev_task_link = old_tss_sel; | 5123 | tss_segment_32.prev_task_link = old_tss_sel; |
4577 | 5124 | ||
4578 | if (kvm_write_guest(vcpu->kvm, | 5125 | if (kvm_write_guest(vcpu->kvm, |
4579 | get_tss_base_addr(vcpu, nseg_desc), | 5126 | get_tss_base_addr_write(vcpu, nseg_desc), |
4580 | &tss_segment_32.prev_task_link, | 5127 | &tss_segment_32.prev_task_link, |
4581 | sizeof tss_segment_32.prev_task_link)) | 5128 | sizeof tss_segment_32.prev_task_link)) |
4582 | goto out; | 5129 | goto out; |
@@ -4599,7 +5146,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4599 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | 5146 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); |
4600 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | 5147 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); |
4601 | 5148 | ||
4602 | old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); | 5149 | old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); |
4603 | 5150 | ||
4604 | /* FIXME: Handle errors. Failure to read either TSS or their | 5151 | /* FIXME: Handle errors. Failure to read either TSS or their |
4605 | * descriptors should generate a pagefault. | 5152 | * descriptors should generate a pagefault. |
@@ -4658,7 +5205,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4658 | &nseg_desc); | 5205 | &nseg_desc); |
4659 | } | 5206 | } |
4660 | 5207 | ||
4661 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); | 5208 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); |
4662 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | 5209 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); |
4663 | tr_seg.type = 11; | 5210 | tr_seg.type = 11; |
4664 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 5211 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); |
@@ -4689,17 +5236,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4689 | 5236 | ||
4690 | kvm_set_cr8(vcpu, sregs->cr8); | 5237 | kvm_set_cr8(vcpu, sregs->cr8); |
4691 | 5238 | ||
4692 | mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; | 5239 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; |
4693 | kvm_x86_ops->set_efer(vcpu, sregs->efer); | 5240 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
4694 | kvm_set_apic_base(vcpu, sregs->apic_base); | 5241 | kvm_set_apic_base(vcpu, sregs->apic_base); |
4695 | 5242 | ||
4696 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 5243 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; |
4697 | |||
4698 | mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0; | ||
4699 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | 5244 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); |
4700 | vcpu->arch.cr0 = sregs->cr0; | 5245 | vcpu->arch.cr0 = sregs->cr0; |
4701 | 5246 | ||
4702 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; | 5247 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
4703 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5248 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
4704 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5249 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
4705 | load_pdptrs(vcpu, vcpu->arch.cr3); | 5250 | load_pdptrs(vcpu, vcpu->arch.cr3); |
@@ -4734,7 +5279,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4734 | /* Older userspace won't unhalt the vcpu on reset. */ | 5279 | /* Older userspace won't unhalt the vcpu on reset. */ |
4735 | if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && | 5280 | if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && |
4736 | sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && | 5281 | sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && |
4737 | !(vcpu->arch.cr0 & X86_CR0_PE)) | 5282 | !is_protmode(vcpu)) |
4738 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5283 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
4739 | 5284 | ||
4740 | vcpu_put(vcpu); | 5285 | vcpu_put(vcpu); |
@@ -4832,11 +5377,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
4832 | { | 5377 | { |
4833 | unsigned long vaddr = tr->linear_address; | 5378 | unsigned long vaddr = tr->linear_address; |
4834 | gpa_t gpa; | 5379 | gpa_t gpa; |
5380 | int idx; | ||
4835 | 5381 | ||
4836 | vcpu_load(vcpu); | 5382 | vcpu_load(vcpu); |
4837 | down_read(&vcpu->kvm->slots_lock); | 5383 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
4838 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); | 5384 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); |
4839 | up_read(&vcpu->kvm->slots_lock); | 5385 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
4840 | tr->physical_address = gpa; | 5386 | tr->physical_address = gpa; |
4841 | tr->valid = gpa != UNMAPPED_GVA; | 5387 | tr->valid = gpa != UNMAPPED_GVA; |
4842 | tr->writeable = 1; | 5388 | tr->writeable = 1; |
@@ -4917,14 +5463,14 @@ EXPORT_SYMBOL_GPL(fx_init); | |||
4917 | 5463 | ||
4918 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | 5464 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) |
4919 | { | 5465 | { |
4920 | if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) | 5466 | if (vcpu->guest_fpu_loaded) |
4921 | return; | 5467 | return; |
4922 | 5468 | ||
4923 | vcpu->guest_fpu_loaded = 1; | 5469 | vcpu->guest_fpu_loaded = 1; |
4924 | kvm_fx_save(&vcpu->arch.host_fx_image); | 5470 | kvm_fx_save(&vcpu->arch.host_fx_image); |
4925 | kvm_fx_restore(&vcpu->arch.guest_fx_image); | 5471 | kvm_fx_restore(&vcpu->arch.guest_fx_image); |
5472 | trace_kvm_fpu(1); | ||
4926 | } | 5473 | } |
4927 | EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); | ||
4928 | 5474 | ||
4929 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | 5475 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) |
4930 | { | 5476 | { |
@@ -4935,8 +5481,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
4935 | kvm_fx_save(&vcpu->arch.guest_fx_image); | 5481 | kvm_fx_save(&vcpu->arch.guest_fx_image); |
4936 | kvm_fx_restore(&vcpu->arch.host_fx_image); | 5482 | kvm_fx_restore(&vcpu->arch.host_fx_image); |
4937 | ++vcpu->stat.fpu_reload; | 5483 | ++vcpu->stat.fpu_reload; |
5484 | set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); | ||
5485 | trace_kvm_fpu(0); | ||
4938 | } | 5486 | } |
4939 | EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); | ||
4940 | 5487 | ||
4941 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 5488 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
4942 | { | 5489 | { |
@@ -5088,11 +5635,13 @@ fail: | |||
5088 | 5635 | ||
5089 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | 5636 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) |
5090 | { | 5637 | { |
5638 | int idx; | ||
5639 | |||
5091 | kfree(vcpu->arch.mce_banks); | 5640 | kfree(vcpu->arch.mce_banks); |
5092 | kvm_free_lapic(vcpu); | 5641 | kvm_free_lapic(vcpu); |
5093 | down_read(&vcpu->kvm->slots_lock); | 5642 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
5094 | kvm_mmu_destroy(vcpu); | 5643 | kvm_mmu_destroy(vcpu); |
5095 | up_read(&vcpu->kvm->slots_lock); | 5644 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
5096 | free_page((unsigned long)vcpu->arch.pio_data); | 5645 | free_page((unsigned long)vcpu->arch.pio_data); |
5097 | } | 5646 | } |
5098 | 5647 | ||
@@ -5103,6 +5652,12 @@ struct kvm *kvm_arch_create_vm(void) | |||
5103 | if (!kvm) | 5652 | if (!kvm) |
5104 | return ERR_PTR(-ENOMEM); | 5653 | return ERR_PTR(-ENOMEM); |
5105 | 5654 | ||
5655 | kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
5656 | if (!kvm->arch.aliases) { | ||
5657 | kfree(kvm); | ||
5658 | return ERR_PTR(-ENOMEM); | ||
5659 | } | ||
5660 | |||
5106 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 5661 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
5107 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 5662 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
5108 | 5663 | ||
@@ -5159,16 +5714,18 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
5159 | put_page(kvm->arch.apic_access_page); | 5714 | put_page(kvm->arch.apic_access_page); |
5160 | if (kvm->arch.ept_identity_pagetable) | 5715 | if (kvm->arch.ept_identity_pagetable) |
5161 | put_page(kvm->arch.ept_identity_pagetable); | 5716 | put_page(kvm->arch.ept_identity_pagetable); |
5717 | cleanup_srcu_struct(&kvm->srcu); | ||
5718 | kfree(kvm->arch.aliases); | ||
5162 | kfree(kvm); | 5719 | kfree(kvm); |
5163 | } | 5720 | } |
5164 | 5721 | ||
5165 | int kvm_arch_set_memory_region(struct kvm *kvm, | 5722 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
5166 | struct kvm_userspace_memory_region *mem, | 5723 | struct kvm_memory_slot *memslot, |
5167 | struct kvm_memory_slot old, | 5724 | struct kvm_memory_slot old, |
5725 | struct kvm_userspace_memory_region *mem, | ||
5168 | int user_alloc) | 5726 | int user_alloc) |
5169 | { | 5727 | { |
5170 | int npages = mem->memory_size >> PAGE_SHIFT; | 5728 | int npages = memslot->npages; |
5171 | struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; | ||
5172 | 5729 | ||
5173 | /*To keep backward compatibility with older userspace, | 5730 | /*To keep backward compatibility with older userspace, |
5174 | *x86 needs to hanlde !user_alloc case. | 5731 | *x86 needs to hanlde !user_alloc case. |
@@ -5188,26 +5745,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
5188 | if (IS_ERR((void *)userspace_addr)) | 5745 | if (IS_ERR((void *)userspace_addr)) |
5189 | return PTR_ERR((void *)userspace_addr); | 5746 | return PTR_ERR((void *)userspace_addr); |
5190 | 5747 | ||
5191 | /* set userspace_addr atomically for kvm_hva_to_rmapp */ | ||
5192 | spin_lock(&kvm->mmu_lock); | ||
5193 | memslot->userspace_addr = userspace_addr; | 5748 | memslot->userspace_addr = userspace_addr; |
5194 | spin_unlock(&kvm->mmu_lock); | ||
5195 | } else { | ||
5196 | if (!old.user_alloc && old.rmap) { | ||
5197 | int ret; | ||
5198 | |||
5199 | down_write(¤t->mm->mmap_sem); | ||
5200 | ret = do_munmap(current->mm, old.userspace_addr, | ||
5201 | old.npages * PAGE_SIZE); | ||
5202 | up_write(¤t->mm->mmap_sem); | ||
5203 | if (ret < 0) | ||
5204 | printk(KERN_WARNING | ||
5205 | "kvm_vm_ioctl_set_memory_region: " | ||
5206 | "failed to munmap memory\n"); | ||
5207 | } | ||
5208 | } | 5749 | } |
5209 | } | 5750 | } |
5210 | 5751 | ||
5752 | |||
5753 | return 0; | ||
5754 | } | ||
5755 | |||
5756 | void kvm_arch_commit_memory_region(struct kvm *kvm, | ||
5757 | struct kvm_userspace_memory_region *mem, | ||
5758 | struct kvm_memory_slot old, | ||
5759 | int user_alloc) | ||
5760 | { | ||
5761 | |||
5762 | int npages = mem->memory_size >> PAGE_SHIFT; | ||
5763 | |||
5764 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { | ||
5765 | int ret; | ||
5766 | |||
5767 | down_write(¤t->mm->mmap_sem); | ||
5768 | ret = do_munmap(current->mm, old.userspace_addr, | ||
5769 | old.npages * PAGE_SIZE); | ||
5770 | up_write(¤t->mm->mmap_sem); | ||
5771 | if (ret < 0) | ||
5772 | printk(KERN_WARNING | ||
5773 | "kvm_vm_ioctl_set_memory_region: " | ||
5774 | "failed to munmap memory\n"); | ||
5775 | } | ||
5776 | |||
5211 | spin_lock(&kvm->mmu_lock); | 5777 | spin_lock(&kvm->mmu_lock); |
5212 | if (!kvm->arch.n_requested_mmu_pages) { | 5778 | if (!kvm->arch.n_requested_mmu_pages) { |
5213 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | 5779 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); |
@@ -5216,8 +5782,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
5216 | 5782 | ||
5217 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 5783 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
5218 | spin_unlock(&kvm->mmu_lock); | 5784 | spin_unlock(&kvm->mmu_lock); |
5219 | |||
5220 | return 0; | ||
5221 | } | 5785 | } |
5222 | 5786 | ||
5223 | void kvm_arch_flush_shadow(struct kvm *kvm) | 5787 | void kvm_arch_flush_shadow(struct kvm *kvm) |