diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 359 |
1 files changed, 223 insertions, 136 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6c79a14a3b6f..1d9b33843c80 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -46,6 +46,7 @@ MODULE_LICENSE("GPL"); | |||
46 | #define SVM_FEATURE_NPT (1 << 0) | 46 | #define SVM_FEATURE_NPT (1 << 0) |
47 | #define SVM_FEATURE_LBRV (1 << 1) | 47 | #define SVM_FEATURE_LBRV (1 << 1) |
48 | #define SVM_FEATURE_SVML (1 << 2) | 48 | #define SVM_FEATURE_SVML (1 << 2) |
49 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
49 | 50 | ||
50 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 51 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
51 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 52 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
@@ -53,15 +54,6 @@ MODULE_LICENSE("GPL"); | |||
53 | 54 | ||
54 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) | 55 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) |
55 | 56 | ||
56 | /* Turn on to get debugging output*/ | ||
57 | /* #define NESTED_DEBUG */ | ||
58 | |||
59 | #ifdef NESTED_DEBUG | ||
60 | #define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args) | ||
61 | #else | ||
62 | #define nsvm_printk(fmt, args...) do {} while(0) | ||
63 | #endif | ||
64 | |||
65 | static const u32 host_save_user_msrs[] = { | 57 | static const u32 host_save_user_msrs[] = { |
66 | #ifdef CONFIG_X86_64 | 58 | #ifdef CONFIG_X86_64 |
67 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, | 59 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, |
@@ -85,6 +77,9 @@ struct nested_state { | |||
85 | /* gpa pointers to the real vectors */ | 77 | /* gpa pointers to the real vectors */ |
86 | u64 vmcb_msrpm; | 78 | u64 vmcb_msrpm; |
87 | 79 | ||
80 | /* A VMEXIT is required but not yet emulated */ | ||
81 | bool exit_required; | ||
82 | |||
88 | /* cache for intercepts of the guest */ | 83 | /* cache for intercepts of the guest */ |
89 | u16 intercept_cr_read; | 84 | u16 intercept_cr_read; |
90 | u16 intercept_cr_write; | 85 | u16 intercept_cr_write; |
@@ -112,6 +107,8 @@ struct vcpu_svm { | |||
112 | u32 *msrpm; | 107 | u32 *msrpm; |
113 | 108 | ||
114 | struct nested_state nested; | 109 | struct nested_state nested; |
110 | |||
111 | bool nmi_singlestep; | ||
115 | }; | 112 | }; |
116 | 113 | ||
117 | /* enable NPT for AMD64 and X86 with PAE */ | 114 | /* enable NPT for AMD64 and X86 with PAE */ |
@@ -286,7 +283,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
286 | struct vcpu_svm *svm = to_svm(vcpu); | 283 | struct vcpu_svm *svm = to_svm(vcpu); |
287 | 284 | ||
288 | if (!svm->next_rip) { | 285 | if (!svm->next_rip) { |
289 | if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != | 286 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
290 | EMULATE_DONE) | 287 | EMULATE_DONE) |
291 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 288 | printk(KERN_DEBUG "%s: NOP\n", __func__); |
292 | return; | 289 | return; |
@@ -316,7 +313,7 @@ static void svm_hardware_disable(void *garbage) | |||
316 | cpu_svm_disable(); | 313 | cpu_svm_disable(); |
317 | } | 314 | } |
318 | 315 | ||
319 | static void svm_hardware_enable(void *garbage) | 316 | static int svm_hardware_enable(void *garbage) |
320 | { | 317 | { |
321 | 318 | ||
322 | struct svm_cpu_data *sd; | 319 | struct svm_cpu_data *sd; |
@@ -325,16 +322,21 @@ static void svm_hardware_enable(void *garbage) | |||
325 | struct desc_struct *gdt; | 322 | struct desc_struct *gdt; |
326 | int me = raw_smp_processor_id(); | 323 | int me = raw_smp_processor_id(); |
327 | 324 | ||
325 | rdmsrl(MSR_EFER, efer); | ||
326 | if (efer & EFER_SVME) | ||
327 | return -EBUSY; | ||
328 | |||
328 | if (!has_svm()) { | 329 | if (!has_svm()) { |
329 | printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); | 330 | printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", |
330 | return; | 331 | me); |
332 | return -EINVAL; | ||
331 | } | 333 | } |
332 | sd = per_cpu(svm_data, me); | 334 | sd = per_cpu(svm_data, me); |
333 | 335 | ||
334 | if (!sd) { | 336 | if (!sd) { |
335 | printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", | 337 | printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", |
336 | me); | 338 | me); |
337 | return; | 339 | return -EINVAL; |
338 | } | 340 | } |
339 | 341 | ||
340 | sd->asid_generation = 1; | 342 | sd->asid_generation = 1; |
@@ -345,11 +347,11 @@ static void svm_hardware_enable(void *garbage) | |||
345 | gdt = (struct desc_struct *)gdt_descr.base; | 347 | gdt = (struct desc_struct *)gdt_descr.base; |
346 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 348 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
347 | 349 | ||
348 | rdmsrl(MSR_EFER, efer); | ||
349 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 350 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
350 | 351 | ||
351 | wrmsrl(MSR_VM_HSAVE_PA, | 352 | wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); |
352 | page_to_pfn(sd->save_area) << PAGE_SHIFT); | 353 | |
354 | return 0; | ||
353 | } | 355 | } |
354 | 356 | ||
355 | static void svm_cpu_uninit(int cpu) | 357 | static void svm_cpu_uninit(int cpu) |
@@ -475,7 +477,7 @@ static __init int svm_hardware_setup(void) | |||
475 | kvm_enable_efer_bits(EFER_SVME); | 477 | kvm_enable_efer_bits(EFER_SVME); |
476 | } | 478 | } |
477 | 479 | ||
478 | for_each_online_cpu(cpu) { | 480 | for_each_possible_cpu(cpu) { |
479 | r = svm_cpu_init(cpu); | 481 | r = svm_cpu_init(cpu); |
480 | if (r) | 482 | if (r) |
481 | goto err; | 483 | goto err; |
@@ -509,7 +511,7 @@ static __exit void svm_hardware_unsetup(void) | |||
509 | { | 511 | { |
510 | int cpu; | 512 | int cpu; |
511 | 513 | ||
512 | for_each_online_cpu(cpu) | 514 | for_each_possible_cpu(cpu) |
513 | svm_cpu_uninit(cpu); | 515 | svm_cpu_uninit(cpu); |
514 | 516 | ||
515 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); | 517 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); |
@@ -624,11 +626,12 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
624 | save->rip = 0x0000fff0; | 626 | save->rip = 0x0000fff0; |
625 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 627 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
626 | 628 | ||
627 | /* | 629 | /* This is the guest-visible cr0 value. |
628 | * cr0 val on cpu init should be 0x60000010, we enable cpu | 630 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
629 | * cache by default. the orderly way is to enable cache in bios. | ||
630 | */ | 631 | */ |
631 | save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; | 632 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
633 | kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); | ||
634 | |||
632 | save->cr4 = X86_CR4_PAE; | 635 | save->cr4 = X86_CR4_PAE; |
633 | /* rdx = ?? */ | 636 | /* rdx = ?? */ |
634 | 637 | ||
@@ -643,8 +646,6 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
643 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| | 646 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| |
644 | INTERCEPT_CR3_MASK); | 647 | INTERCEPT_CR3_MASK); |
645 | save->g_pat = 0x0007040600070406ULL; | 648 | save->g_pat = 0x0007040600070406ULL; |
646 | /* enable caching because the QEMU Bios doesn't enable it */ | ||
647 | save->cr0 = X86_CR0_ET; | ||
648 | save->cr3 = 0; | 649 | save->cr3 = 0; |
649 | save->cr4 = 0; | 650 | save->cr4 = 0; |
650 | } | 651 | } |
@@ -653,6 +654,11 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
653 | svm->nested.vmcb = 0; | 654 | svm->nested.vmcb = 0; |
654 | svm->vcpu.arch.hflags = 0; | 655 | svm->vcpu.arch.hflags = 0; |
655 | 656 | ||
657 | if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { | ||
658 | control->pause_filter_count = 3000; | ||
659 | control->intercept |= (1ULL << INTERCEPT_PAUSE); | ||
660 | } | ||
661 | |||
656 | enable_gif(svm); | 662 | enable_gif(svm); |
657 | } | 663 | } |
658 | 664 | ||
@@ -757,15 +763,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
757 | int i; | 763 | int i; |
758 | 764 | ||
759 | if (unlikely(cpu != vcpu->cpu)) { | 765 | if (unlikely(cpu != vcpu->cpu)) { |
760 | u64 tsc_this, delta; | 766 | u64 delta; |
761 | 767 | ||
762 | /* | 768 | /* |
763 | * Make sure that the guest sees a monotonically | 769 | * Make sure that the guest sees a monotonically |
764 | * increasing TSC. | 770 | * increasing TSC. |
765 | */ | 771 | */ |
766 | rdtscll(tsc_this); | 772 | delta = vcpu->arch.host_tsc - native_read_tsc(); |
767 | delta = vcpu->arch.host_tsc - tsc_this; | ||
768 | svm->vmcb->control.tsc_offset += delta; | 773 | svm->vmcb->control.tsc_offset += delta; |
774 | if (is_nested(svm)) | ||
775 | svm->nested.hsave->control.tsc_offset += delta; | ||
769 | vcpu->cpu = cpu; | 776 | vcpu->cpu = cpu; |
770 | kvm_migrate_timers(vcpu); | 777 | kvm_migrate_timers(vcpu); |
771 | svm->asid_generation = 0; | 778 | svm->asid_generation = 0; |
@@ -784,7 +791,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
784 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 791 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
785 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 792 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
786 | 793 | ||
787 | rdtscll(vcpu->arch.host_tsc); | 794 | vcpu->arch.host_tsc = native_read_tsc(); |
788 | } | 795 | } |
789 | 796 | ||
790 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 797 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
@@ -1042,7 +1049,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
1042 | svm->vmcb->control.intercept_exceptions &= | 1049 | svm->vmcb->control.intercept_exceptions &= |
1043 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); | 1050 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); |
1044 | 1051 | ||
1045 | if (vcpu->arch.singlestep) | 1052 | if (svm->nmi_singlestep) |
1046 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); | 1053 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); |
1047 | 1054 | ||
1048 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 1055 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
@@ -1057,26 +1064,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
1057 | vcpu->guest_debug = 0; | 1064 | vcpu->guest_debug = 0; |
1058 | } | 1065 | } |
1059 | 1066 | ||
1060 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 1067 | static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) |
1061 | { | 1068 | { |
1062 | int old_debug = vcpu->guest_debug; | ||
1063 | struct vcpu_svm *svm = to_svm(vcpu); | 1069 | struct vcpu_svm *svm = to_svm(vcpu); |
1064 | 1070 | ||
1065 | vcpu->guest_debug = dbg->control; | ||
1066 | |||
1067 | update_db_intercept(vcpu); | ||
1068 | |||
1069 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1071 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1070 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; | 1072 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; |
1071 | else | 1073 | else |
1072 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | 1074 | svm->vmcb->save.dr7 = vcpu->arch.dr7; |
1073 | 1075 | ||
1074 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 1076 | update_db_intercept(vcpu); |
1075 | svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
1076 | else if (old_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1077 | svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
1078 | |||
1079 | return 0; | ||
1080 | } | 1077 | } |
1081 | 1078 | ||
1082 | static void load_host_msrs(struct kvm_vcpu *vcpu) | 1079 | static void load_host_msrs(struct kvm_vcpu *vcpu) |
@@ -1177,7 +1174,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | |||
1177 | } | 1174 | } |
1178 | } | 1175 | } |
1179 | 1176 | ||
1180 | static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1177 | static int pf_interception(struct vcpu_svm *svm) |
1181 | { | 1178 | { |
1182 | u64 fault_address; | 1179 | u64 fault_address; |
1183 | u32 error_code; | 1180 | u32 error_code; |
@@ -1191,17 +1188,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1191 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1188 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
1192 | } | 1189 | } |
1193 | 1190 | ||
1194 | static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1191 | static int db_interception(struct vcpu_svm *svm) |
1195 | { | 1192 | { |
1193 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1194 | |||
1196 | if (!(svm->vcpu.guest_debug & | 1195 | if (!(svm->vcpu.guest_debug & |
1197 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && | 1196 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && |
1198 | !svm->vcpu.arch.singlestep) { | 1197 | !svm->nmi_singlestep) { |
1199 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); | 1198 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); |
1200 | return 1; | 1199 | return 1; |
1201 | } | 1200 | } |
1202 | 1201 | ||
1203 | if (svm->vcpu.arch.singlestep) { | 1202 | if (svm->nmi_singlestep) { |
1204 | svm->vcpu.arch.singlestep = false; | 1203 | svm->nmi_singlestep = false; |
1205 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) | 1204 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) |
1206 | svm->vmcb->save.rflags &= | 1205 | svm->vmcb->save.rflags &= |
1207 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | 1206 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); |
@@ -1220,25 +1219,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1220 | return 1; | 1219 | return 1; |
1221 | } | 1220 | } |
1222 | 1221 | ||
1223 | static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1222 | static int bp_interception(struct vcpu_svm *svm) |
1224 | { | 1223 | { |
1224 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1225 | |||
1225 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1226 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
1226 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1227 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
1227 | kvm_run->debug.arch.exception = BP_VECTOR; | 1228 | kvm_run->debug.arch.exception = BP_VECTOR; |
1228 | return 0; | 1229 | return 0; |
1229 | } | 1230 | } |
1230 | 1231 | ||
1231 | static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1232 | static int ud_interception(struct vcpu_svm *svm) |
1232 | { | 1233 | { |
1233 | int er; | 1234 | int er; |
1234 | 1235 | ||
1235 | er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); | 1236 | er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); |
1236 | if (er != EMULATE_DONE) | 1237 | if (er != EMULATE_DONE) |
1237 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1238 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1238 | return 1; | 1239 | return 1; |
1239 | } | 1240 | } |
1240 | 1241 | ||
1241 | static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1242 | static int nm_interception(struct vcpu_svm *svm) |
1242 | { | 1243 | { |
1243 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1244 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
1244 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) | 1245 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) |
@@ -1248,7 +1249,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1248 | return 1; | 1249 | return 1; |
1249 | } | 1250 | } |
1250 | 1251 | ||
1251 | static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1252 | static int mc_interception(struct vcpu_svm *svm) |
1252 | { | 1253 | { |
1253 | /* | 1254 | /* |
1254 | * On an #MC intercept the MCE handler is not called automatically in | 1255 | * On an #MC intercept the MCE handler is not called automatically in |
@@ -1261,8 +1262,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1261 | return 1; | 1262 | return 1; |
1262 | } | 1263 | } |
1263 | 1264 | ||
1264 | static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1265 | static int shutdown_interception(struct vcpu_svm *svm) |
1265 | { | 1266 | { |
1267 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1268 | |||
1266 | /* | 1269 | /* |
1267 | * VMCB is undefined after a SHUTDOWN intercept | 1270 | * VMCB is undefined after a SHUTDOWN intercept |
1268 | * so reinitialize it. | 1271 | * so reinitialize it. |
@@ -1274,7 +1277,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1274 | return 0; | 1277 | return 0; |
1275 | } | 1278 | } |
1276 | 1279 | ||
1277 | static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1280 | static int io_interception(struct vcpu_svm *svm) |
1278 | { | 1281 | { |
1279 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1282 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
1280 | int size, in, string; | 1283 | int size, in, string; |
@@ -1288,7 +1291,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1288 | 1291 | ||
1289 | if (string) { | 1292 | if (string) { |
1290 | if (emulate_instruction(&svm->vcpu, | 1293 | if (emulate_instruction(&svm->vcpu, |
1291 | kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) | 1294 | 0, 0, 0) == EMULATE_DO_MMIO) |
1292 | return 0; | 1295 | return 0; |
1293 | return 1; | 1296 | return 1; |
1294 | } | 1297 | } |
@@ -1298,33 +1301,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1298 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1301 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
1299 | 1302 | ||
1300 | skip_emulated_instruction(&svm->vcpu); | 1303 | skip_emulated_instruction(&svm->vcpu); |
1301 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); | 1304 | return kvm_emulate_pio(&svm->vcpu, in, size, port); |
1302 | } | 1305 | } |
1303 | 1306 | ||
1304 | static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1307 | static int nmi_interception(struct vcpu_svm *svm) |
1305 | { | 1308 | { |
1306 | return 1; | 1309 | return 1; |
1307 | } | 1310 | } |
1308 | 1311 | ||
1309 | static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1312 | static int intr_interception(struct vcpu_svm *svm) |
1310 | { | 1313 | { |
1311 | ++svm->vcpu.stat.irq_exits; | 1314 | ++svm->vcpu.stat.irq_exits; |
1312 | return 1; | 1315 | return 1; |
1313 | } | 1316 | } |
1314 | 1317 | ||
1315 | static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1318 | static int nop_on_interception(struct vcpu_svm *svm) |
1316 | { | 1319 | { |
1317 | return 1; | 1320 | return 1; |
1318 | } | 1321 | } |
1319 | 1322 | ||
1320 | static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1323 | static int halt_interception(struct vcpu_svm *svm) |
1321 | { | 1324 | { |
1322 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; | 1325 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; |
1323 | skip_emulated_instruction(&svm->vcpu); | 1326 | skip_emulated_instruction(&svm->vcpu); |
1324 | return kvm_emulate_halt(&svm->vcpu); | 1327 | return kvm_emulate_halt(&svm->vcpu); |
1325 | } | 1328 | } |
1326 | 1329 | ||
1327 | static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1330 | static int vmmcall_interception(struct vcpu_svm *svm) |
1328 | { | 1331 | { |
1329 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 1332 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1330 | skip_emulated_instruction(&svm->vcpu); | 1333 | skip_emulated_instruction(&svm->vcpu); |
@@ -1375,8 +1378,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
1375 | 1378 | ||
1376 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1379 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
1377 | 1380 | ||
1378 | if (nested_svm_exit_handled(svm)) { | 1381 | if (svm->nested.intercept & 1ULL) { |
1379 | nsvm_printk("VMexit -> INTR\n"); | 1382 | /* |
1383 | * The #vmexit can't be emulated here directly because this | ||
1384 | * code path runs with irqs and preemtion disabled. A | ||
1385 | * #vmexit emulation might sleep. Only signal request for | ||
1386 | * the #vmexit here. | ||
1387 | */ | ||
1388 | svm->nested.exit_required = true; | ||
1389 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | ||
1380 | return 1; | 1390 | return 1; |
1381 | } | 1391 | } |
1382 | 1392 | ||
@@ -1387,10 +1397,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | |||
1387 | { | 1397 | { |
1388 | struct page *page; | 1398 | struct page *page; |
1389 | 1399 | ||
1390 | down_read(¤t->mm->mmap_sem); | ||
1391 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1400 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
1392 | up_read(¤t->mm->mmap_sem); | ||
1393 | |||
1394 | if (is_error_page(page)) | 1401 | if (is_error_page(page)) |
1395 | goto error; | 1402 | goto error; |
1396 | 1403 | ||
@@ -1529,14 +1536,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1529 | } | 1536 | } |
1530 | default: { | 1537 | default: { |
1531 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | 1538 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); |
1532 | nsvm_printk("exit code: 0x%x\n", exit_code); | ||
1533 | if (svm->nested.intercept & exit_bits) | 1539 | if (svm->nested.intercept & exit_bits) |
1534 | vmexit = NESTED_EXIT_DONE; | 1540 | vmexit = NESTED_EXIT_DONE; |
1535 | } | 1541 | } |
1536 | } | 1542 | } |
1537 | 1543 | ||
1538 | if (vmexit == NESTED_EXIT_DONE) { | 1544 | if (vmexit == NESTED_EXIT_DONE) { |
1539 | nsvm_printk("#VMEXIT reason=%04x\n", exit_code); | ||
1540 | nested_svm_vmexit(svm); | 1545 | nested_svm_vmexit(svm); |
1541 | } | 1546 | } |
1542 | 1547 | ||
@@ -1581,6 +1586,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1581 | struct vmcb *hsave = svm->nested.hsave; | 1586 | struct vmcb *hsave = svm->nested.hsave; |
1582 | struct vmcb *vmcb = svm->vmcb; | 1587 | struct vmcb *vmcb = svm->vmcb; |
1583 | 1588 | ||
1589 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | ||
1590 | vmcb->control.exit_info_1, | ||
1591 | vmcb->control.exit_info_2, | ||
1592 | vmcb->control.exit_int_info, | ||
1593 | vmcb->control.exit_int_info_err); | ||
1594 | |||
1584 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1595 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); |
1585 | if (!nested_vmcb) | 1596 | if (!nested_vmcb) |
1586 | return 1; | 1597 | return 1; |
@@ -1614,6 +1625,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1614 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; | 1625 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; |
1615 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; | 1626 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; |
1616 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; | 1627 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; |
1628 | |||
1629 | /* | ||
1630 | * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have | ||
1631 | * to make sure that we do not lose injected events. So check event_inj | ||
1632 | * here and copy it to exit_int_info if it is valid. | ||
1633 | * Exit_int_info and event_inj can't be both valid because the case | ||
1634 | * below only happens on a VMRUN instruction intercept which has | ||
1635 | * no valid exit_int_info set. | ||
1636 | */ | ||
1637 | if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { | ||
1638 | struct vmcb_control_area *nc = &nested_vmcb->control; | ||
1639 | |||
1640 | nc->exit_int_info = vmcb->control.event_inj; | ||
1641 | nc->exit_int_info_err = vmcb->control.event_inj_err; | ||
1642 | } | ||
1643 | |||
1617 | nested_vmcb->control.tlb_ctl = 0; | 1644 | nested_vmcb->control.tlb_ctl = 0; |
1618 | nested_vmcb->control.event_inj = 0; | 1645 | nested_vmcb->control.event_inj = 0; |
1619 | nested_vmcb->control.event_inj_err = 0; | 1646 | nested_vmcb->control.event_inj_err = 0; |
@@ -1625,10 +1652,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1625 | /* Restore the original control entries */ | 1652 | /* Restore the original control entries */ |
1626 | copy_vmcb_control_area(vmcb, hsave); | 1653 | copy_vmcb_control_area(vmcb, hsave); |
1627 | 1654 | ||
1628 | /* Kill any pending exceptions */ | ||
1629 | if (svm->vcpu.arch.exception.pending == true) | ||
1630 | nsvm_printk("WARNING: Pending Exception\n"); | ||
1631 | |||
1632 | kvm_clear_exception_queue(&svm->vcpu); | 1655 | kvm_clear_exception_queue(&svm->vcpu); |
1633 | kvm_clear_interrupt_queue(&svm->vcpu); | 1656 | kvm_clear_interrupt_queue(&svm->vcpu); |
1634 | 1657 | ||
@@ -1699,6 +1722,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1699 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1722 | /* nested_vmcb is our indicator if nested SVM is activated */ |
1700 | svm->nested.vmcb = svm->vmcb->save.rax; | 1723 | svm->nested.vmcb = svm->vmcb->save.rax; |
1701 | 1724 | ||
1725 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
1726 | nested_vmcb->save.rip, | ||
1727 | nested_vmcb->control.int_ctl, | ||
1728 | nested_vmcb->control.event_inj, | ||
1729 | nested_vmcb->control.nested_ctl); | ||
1730 | |||
1702 | /* Clear internal status */ | 1731 | /* Clear internal status */ |
1703 | kvm_clear_exception_queue(&svm->vcpu); | 1732 | kvm_clear_exception_queue(&svm->vcpu); |
1704 | kvm_clear_interrupt_queue(&svm->vcpu); | 1733 | kvm_clear_interrupt_queue(&svm->vcpu); |
@@ -1786,28 +1815,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1786 | svm->nested.intercept = nested_vmcb->control.intercept; | 1815 | svm->nested.intercept = nested_vmcb->control.intercept; |
1787 | 1816 | ||
1788 | force_new_asid(&svm->vcpu); | 1817 | force_new_asid(&svm->vcpu); |
1789 | svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; | ||
1790 | svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; | ||
1791 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; | 1818 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; |
1792 | if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { | ||
1793 | nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", | ||
1794 | nested_vmcb->control.int_ctl); | ||
1795 | } | ||
1796 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) | 1819 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) |
1797 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; | 1820 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; |
1798 | else | 1821 | else |
1799 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 1822 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
1800 | 1823 | ||
1801 | nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", | ||
1802 | nested_vmcb->control.exit_int_info, | ||
1803 | nested_vmcb->control.int_state); | ||
1804 | |||
1805 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 1824 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
1806 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 1825 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
1807 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 1826 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
1808 | if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) | ||
1809 | nsvm_printk("Injecting Event: 0x%x\n", | ||
1810 | nested_vmcb->control.event_inj); | ||
1811 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 1827 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
1812 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 1828 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
1813 | 1829 | ||
@@ -1834,7 +1850,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
1834 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; | 1850 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; |
1835 | } | 1851 | } |
1836 | 1852 | ||
1837 | static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1853 | static int vmload_interception(struct vcpu_svm *svm) |
1838 | { | 1854 | { |
1839 | struct vmcb *nested_vmcb; | 1855 | struct vmcb *nested_vmcb; |
1840 | 1856 | ||
@@ -1854,7 +1870,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1854 | return 1; | 1870 | return 1; |
1855 | } | 1871 | } |
1856 | 1872 | ||
1857 | static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1873 | static int vmsave_interception(struct vcpu_svm *svm) |
1858 | { | 1874 | { |
1859 | struct vmcb *nested_vmcb; | 1875 | struct vmcb *nested_vmcb; |
1860 | 1876 | ||
@@ -1874,10 +1890,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1874 | return 1; | 1890 | return 1; |
1875 | } | 1891 | } |
1876 | 1892 | ||
1877 | static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1893 | static int vmrun_interception(struct vcpu_svm *svm) |
1878 | { | 1894 | { |
1879 | nsvm_printk("VMrun\n"); | ||
1880 | |||
1881 | if (nested_svm_check_permissions(svm)) | 1895 | if (nested_svm_check_permissions(svm)) |
1882 | return 1; | 1896 | return 1; |
1883 | 1897 | ||
@@ -1904,7 +1918,7 @@ failed: | |||
1904 | return 1; | 1918 | return 1; |
1905 | } | 1919 | } |
1906 | 1920 | ||
1907 | static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1921 | static int stgi_interception(struct vcpu_svm *svm) |
1908 | { | 1922 | { |
1909 | if (nested_svm_check_permissions(svm)) | 1923 | if (nested_svm_check_permissions(svm)) |
1910 | return 1; | 1924 | return 1; |
@@ -1917,7 +1931,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1917 | return 1; | 1931 | return 1; |
1918 | } | 1932 | } |
1919 | 1933 | ||
1920 | static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1934 | static int clgi_interception(struct vcpu_svm *svm) |
1921 | { | 1935 | { |
1922 | if (nested_svm_check_permissions(svm)) | 1936 | if (nested_svm_check_permissions(svm)) |
1923 | return 1; | 1937 | return 1; |
@@ -1934,10 +1948,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1934 | return 1; | 1948 | return 1; |
1935 | } | 1949 | } |
1936 | 1950 | ||
1937 | static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1951 | static int invlpga_interception(struct vcpu_svm *svm) |
1938 | { | 1952 | { |
1939 | struct kvm_vcpu *vcpu = &svm->vcpu; | 1953 | struct kvm_vcpu *vcpu = &svm->vcpu; |
1940 | nsvm_printk("INVLPGA\n"); | 1954 | |
1955 | trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], | ||
1956 | vcpu->arch.regs[VCPU_REGS_RAX]); | ||
1941 | 1957 | ||
1942 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ | 1958 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ |
1943 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); | 1959 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); |
@@ -1947,15 +1963,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1947 | return 1; | 1963 | return 1; |
1948 | } | 1964 | } |
1949 | 1965 | ||
1950 | static int invalid_op_interception(struct vcpu_svm *svm, | 1966 | static int skinit_interception(struct vcpu_svm *svm) |
1951 | struct kvm_run *kvm_run) | ||
1952 | { | 1967 | { |
1968 | trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); | ||
1969 | |||
1953 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1970 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1954 | return 1; | 1971 | return 1; |
1955 | } | 1972 | } |
1956 | 1973 | ||
1957 | static int task_switch_interception(struct vcpu_svm *svm, | 1974 | static int invalid_op_interception(struct vcpu_svm *svm) |
1958 | struct kvm_run *kvm_run) | 1975 | { |
1976 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | ||
1977 | return 1; | ||
1978 | } | ||
1979 | |||
1980 | static int task_switch_interception(struct vcpu_svm *svm) | ||
1959 | { | 1981 | { |
1960 | u16 tss_selector; | 1982 | u16 tss_selector; |
1961 | int reason; | 1983 | int reason; |
@@ -2005,14 +2027,14 @@ static int task_switch_interception(struct vcpu_svm *svm, | |||
2005 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2027 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); |
2006 | } | 2028 | } |
2007 | 2029 | ||
2008 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2030 | static int cpuid_interception(struct vcpu_svm *svm) |
2009 | { | 2031 | { |
2010 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 2032 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
2011 | kvm_emulate_cpuid(&svm->vcpu); | 2033 | kvm_emulate_cpuid(&svm->vcpu); |
2012 | return 1; | 2034 | return 1; |
2013 | } | 2035 | } |
2014 | 2036 | ||
2015 | static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2037 | static int iret_interception(struct vcpu_svm *svm) |
2016 | { | 2038 | { |
2017 | ++svm->vcpu.stat.nmi_window_exits; | 2039 | ++svm->vcpu.stat.nmi_window_exits; |
2018 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2040 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); |
@@ -2020,26 +2042,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
2020 | return 1; | 2042 | return 1; |
2021 | } | 2043 | } |
2022 | 2044 | ||
2023 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2045 | static int invlpg_interception(struct vcpu_svm *svm) |
2024 | { | 2046 | { |
2025 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) | 2047 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) |
2026 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | 2048 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
2027 | return 1; | 2049 | return 1; |
2028 | } | 2050 | } |
2029 | 2051 | ||
2030 | static int emulate_on_interception(struct vcpu_svm *svm, | 2052 | static int emulate_on_interception(struct vcpu_svm *svm) |
2031 | struct kvm_run *kvm_run) | ||
2032 | { | 2053 | { |
2033 | if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) | 2054 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) |
2034 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | 2055 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
2035 | return 1; | 2056 | return 1; |
2036 | } | 2057 | } |
2037 | 2058 | ||
2038 | static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2059 | static int cr8_write_interception(struct vcpu_svm *svm) |
2039 | { | 2060 | { |
2061 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
2062 | |||
2040 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | 2063 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); |
2041 | /* instruction emulation calls kvm_set_cr8() */ | 2064 | /* instruction emulation calls kvm_set_cr8() */ |
2042 | emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); | 2065 | emulate_instruction(&svm->vcpu, 0, 0, 0); |
2043 | if (irqchip_in_kernel(svm->vcpu.kvm)) { | 2066 | if (irqchip_in_kernel(svm->vcpu.kvm)) { |
2044 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2067 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; |
2045 | return 1; | 2068 | return 1; |
@@ -2056,10 +2079,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2056 | 2079 | ||
2057 | switch (ecx) { | 2080 | switch (ecx) { |
2058 | case MSR_IA32_TSC: { | 2081 | case MSR_IA32_TSC: { |
2059 | u64 tsc; | 2082 | u64 tsc_offset; |
2060 | 2083 | ||
2061 | rdtscll(tsc); | 2084 | if (is_nested(svm)) |
2062 | *data = svm->vmcb->control.tsc_offset + tsc; | 2085 | tsc_offset = svm->nested.hsave->control.tsc_offset; |
2086 | else | ||
2087 | tsc_offset = svm->vmcb->control.tsc_offset; | ||
2088 | |||
2089 | *data = tsc_offset + native_read_tsc(); | ||
2063 | break; | 2090 | break; |
2064 | } | 2091 | } |
2065 | case MSR_K6_STAR: | 2092 | case MSR_K6_STAR: |
@@ -2121,7 +2148,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2121 | return 0; | 2148 | return 0; |
2122 | } | 2149 | } |
2123 | 2150 | ||
2124 | static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2151 | static int rdmsr_interception(struct vcpu_svm *svm) |
2125 | { | 2152 | { |
2126 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2153 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
2127 | u64 data; | 2154 | u64 data; |
@@ -2145,10 +2172,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2145 | 2172 | ||
2146 | switch (ecx) { | 2173 | switch (ecx) { |
2147 | case MSR_IA32_TSC: { | 2174 | case MSR_IA32_TSC: { |
2148 | u64 tsc; | 2175 | u64 tsc_offset = data - native_read_tsc(); |
2176 | u64 g_tsc_offset = 0; | ||
2177 | |||
2178 | if (is_nested(svm)) { | ||
2179 | g_tsc_offset = svm->vmcb->control.tsc_offset - | ||
2180 | svm->nested.hsave->control.tsc_offset; | ||
2181 | svm->nested.hsave->control.tsc_offset = tsc_offset; | ||
2182 | } | ||
2183 | |||
2184 | svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset; | ||
2149 | 2185 | ||
2150 | rdtscll(tsc); | ||
2151 | svm->vmcb->control.tsc_offset = data - tsc; | ||
2152 | break; | 2186 | break; |
2153 | } | 2187 | } |
2154 | case MSR_K6_STAR: | 2188 | case MSR_K6_STAR: |
@@ -2207,7 +2241,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2207 | return 0; | 2241 | return 0; |
2208 | } | 2242 | } |
2209 | 2243 | ||
2210 | static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2244 | static int wrmsr_interception(struct vcpu_svm *svm) |
2211 | { | 2245 | { |
2212 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2246 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
2213 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) | 2247 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) |
@@ -2223,17 +2257,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
2223 | return 1; | 2257 | return 1; |
2224 | } | 2258 | } |
2225 | 2259 | ||
2226 | static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2260 | static int msr_interception(struct vcpu_svm *svm) |
2227 | { | 2261 | { |
2228 | if (svm->vmcb->control.exit_info_1) | 2262 | if (svm->vmcb->control.exit_info_1) |
2229 | return wrmsr_interception(svm, kvm_run); | 2263 | return wrmsr_interception(svm); |
2230 | else | 2264 | else |
2231 | return rdmsr_interception(svm, kvm_run); | 2265 | return rdmsr_interception(svm); |
2232 | } | 2266 | } |
2233 | 2267 | ||
2234 | static int interrupt_window_interception(struct vcpu_svm *svm, | 2268 | static int interrupt_window_interception(struct vcpu_svm *svm) |
2235 | struct kvm_run *kvm_run) | ||
2236 | { | 2269 | { |
2270 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
2271 | |||
2237 | svm_clear_vintr(svm); | 2272 | svm_clear_vintr(svm); |
2238 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 2273 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
2239 | /* | 2274 | /* |
@@ -2251,8 +2286,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm, | |||
2251 | return 1; | 2286 | return 1; |
2252 | } | 2287 | } |
2253 | 2288 | ||
2254 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | 2289 | static int pause_interception(struct vcpu_svm *svm) |
2255 | struct kvm_run *kvm_run) = { | 2290 | { |
2291 | kvm_vcpu_on_spin(&(svm->vcpu)); | ||
2292 | return 1; | ||
2293 | } | ||
2294 | |||
2295 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | ||
2256 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2296 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
2257 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2297 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
2258 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2298 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
@@ -2287,6 +2327,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
2287 | [SVM_EXIT_CPUID] = cpuid_interception, | 2327 | [SVM_EXIT_CPUID] = cpuid_interception, |
2288 | [SVM_EXIT_IRET] = iret_interception, | 2328 | [SVM_EXIT_IRET] = iret_interception, |
2289 | [SVM_EXIT_INVD] = emulate_on_interception, | 2329 | [SVM_EXIT_INVD] = emulate_on_interception, |
2330 | [SVM_EXIT_PAUSE] = pause_interception, | ||
2290 | [SVM_EXIT_HLT] = halt_interception, | 2331 | [SVM_EXIT_HLT] = halt_interception, |
2291 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2332 | [SVM_EXIT_INVLPG] = invlpg_interception, |
2292 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2333 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
@@ -2300,26 +2341,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
2300 | [SVM_EXIT_VMSAVE] = vmsave_interception, | 2341 | [SVM_EXIT_VMSAVE] = vmsave_interception, |
2301 | [SVM_EXIT_STGI] = stgi_interception, | 2342 | [SVM_EXIT_STGI] = stgi_interception, |
2302 | [SVM_EXIT_CLGI] = clgi_interception, | 2343 | [SVM_EXIT_CLGI] = clgi_interception, |
2303 | [SVM_EXIT_SKINIT] = invalid_op_interception, | 2344 | [SVM_EXIT_SKINIT] = skinit_interception, |
2304 | [SVM_EXIT_WBINVD] = emulate_on_interception, | 2345 | [SVM_EXIT_WBINVD] = emulate_on_interception, |
2305 | [SVM_EXIT_MONITOR] = invalid_op_interception, | 2346 | [SVM_EXIT_MONITOR] = invalid_op_interception, |
2306 | [SVM_EXIT_MWAIT] = invalid_op_interception, | 2347 | [SVM_EXIT_MWAIT] = invalid_op_interception, |
2307 | [SVM_EXIT_NPF] = pf_interception, | 2348 | [SVM_EXIT_NPF] = pf_interception, |
2308 | }; | 2349 | }; |
2309 | 2350 | ||
2310 | static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 2351 | static int handle_exit(struct kvm_vcpu *vcpu) |
2311 | { | 2352 | { |
2312 | struct vcpu_svm *svm = to_svm(vcpu); | 2353 | struct vcpu_svm *svm = to_svm(vcpu); |
2354 | struct kvm_run *kvm_run = vcpu->run; | ||
2313 | u32 exit_code = svm->vmcb->control.exit_code; | 2355 | u32 exit_code = svm->vmcb->control.exit_code; |
2314 | 2356 | ||
2315 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2357 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); |
2316 | 2358 | ||
2359 | if (unlikely(svm->nested.exit_required)) { | ||
2360 | nested_svm_vmexit(svm); | ||
2361 | svm->nested.exit_required = false; | ||
2362 | |||
2363 | return 1; | ||
2364 | } | ||
2365 | |||
2317 | if (is_nested(svm)) { | 2366 | if (is_nested(svm)) { |
2318 | int vmexit; | 2367 | int vmexit; |
2319 | 2368 | ||
2320 | nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", | 2369 | trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, |
2321 | exit_code, svm->vmcb->control.exit_info_1, | 2370 | svm->vmcb->control.exit_info_1, |
2322 | svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); | 2371 | svm->vmcb->control.exit_info_2, |
2372 | svm->vmcb->control.exit_int_info, | ||
2373 | svm->vmcb->control.exit_int_info_err); | ||
2323 | 2374 | ||
2324 | vmexit = nested_svm_exit_special(svm); | 2375 | vmexit = nested_svm_exit_special(svm); |
2325 | 2376 | ||
@@ -2369,7 +2420,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2369 | return 0; | 2420 | return 0; |
2370 | } | 2421 | } |
2371 | 2422 | ||
2372 | return svm_exit_handlers[exit_code](svm, kvm_run); | 2423 | return svm_exit_handlers[exit_code](svm); |
2373 | } | 2424 | } |
2374 | 2425 | ||
2375 | static void reload_tss(struct kvm_vcpu *vcpu) | 2426 | static void reload_tss(struct kvm_vcpu *vcpu) |
@@ -2446,20 +2497,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2446 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | 2497 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); |
2447 | } | 2498 | } |
2448 | 2499 | ||
2500 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) | ||
2501 | { | ||
2502 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2503 | |||
2504 | return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); | ||
2505 | } | ||
2506 | |||
2507 | static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | ||
2508 | { | ||
2509 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2510 | |||
2511 | if (masked) { | ||
2512 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | ||
2513 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | ||
2514 | } else { | ||
2515 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | ||
2516 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | ||
2517 | } | ||
2518 | } | ||
2519 | |||
2449 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | 2520 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) |
2450 | { | 2521 | { |
2451 | struct vcpu_svm *svm = to_svm(vcpu); | 2522 | struct vcpu_svm *svm = to_svm(vcpu); |
2452 | struct vmcb *vmcb = svm->vmcb; | 2523 | struct vmcb *vmcb = svm->vmcb; |
2453 | return (vmcb->save.rflags & X86_EFLAGS_IF) && | 2524 | int ret; |
2454 | !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2525 | |
2455 | gif_set(svm) && | 2526 | if (!gif_set(svm) || |
2456 | !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)); | 2527 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) |
2528 | return 0; | ||
2529 | |||
2530 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); | ||
2531 | |||
2532 | if (is_nested(svm)) | ||
2533 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); | ||
2534 | |||
2535 | return ret; | ||
2457 | } | 2536 | } |
2458 | 2537 | ||
2459 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 2538 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
2460 | { | 2539 | { |
2461 | struct vcpu_svm *svm = to_svm(vcpu); | 2540 | struct vcpu_svm *svm = to_svm(vcpu); |
2462 | nsvm_printk("Trying to open IRQ window\n"); | ||
2463 | 2541 | ||
2464 | nested_svm_intr(svm); | 2542 | nested_svm_intr(svm); |
2465 | 2543 | ||
@@ -2484,7 +2562,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2484 | /* Something prevents NMI from been injected. Single step over | 2562 | /* Something prevents NMI from been injected. Single step over |
2485 | possible problem (IRET or exception injection or interrupt | 2563 | possible problem (IRET or exception injection or interrupt |
2486 | shadow) */ | 2564 | shadow) */ |
2487 | vcpu->arch.singlestep = true; | 2565 | svm->nmi_singlestep = true; |
2488 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2566 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
2489 | update_db_intercept(vcpu); | 2567 | update_db_intercept(vcpu); |
2490 | } | 2568 | } |
@@ -2574,13 +2652,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2574 | #define R "e" | 2652 | #define R "e" |
2575 | #endif | 2653 | #endif |
2576 | 2654 | ||
2577 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2655 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
2578 | { | 2656 | { |
2579 | struct vcpu_svm *svm = to_svm(vcpu); | 2657 | struct vcpu_svm *svm = to_svm(vcpu); |
2580 | u16 fs_selector; | 2658 | u16 fs_selector; |
2581 | u16 gs_selector; | 2659 | u16 gs_selector; |
2582 | u16 ldt_selector; | 2660 | u16 ldt_selector; |
2583 | 2661 | ||
2662 | /* | ||
2663 | * A vmexit emulation is required before the vcpu can be executed | ||
2664 | * again. | ||
2665 | */ | ||
2666 | if (unlikely(svm->nested.exit_required)) | ||
2667 | return; | ||
2668 | |||
2584 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | 2669 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; |
2585 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | 2670 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; |
2586 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | 2671 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; |
@@ -2879,6 +2964,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2879 | .queue_exception = svm_queue_exception, | 2964 | .queue_exception = svm_queue_exception, |
2880 | .interrupt_allowed = svm_interrupt_allowed, | 2965 | .interrupt_allowed = svm_interrupt_allowed, |
2881 | .nmi_allowed = svm_nmi_allowed, | 2966 | .nmi_allowed = svm_nmi_allowed, |
2967 | .get_nmi_mask = svm_get_nmi_mask, | ||
2968 | .set_nmi_mask = svm_set_nmi_mask, | ||
2882 | .enable_nmi_window = enable_nmi_window, | 2969 | .enable_nmi_window = enable_nmi_window, |
2883 | .enable_irq_window = enable_irq_window, | 2970 | .enable_irq_window = enable_irq_window, |
2884 | .update_cr8_intercept = update_cr8_intercept, | 2971 | .update_cr8_intercept = update_cr8_intercept, |