aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c359
1 files changed, 223 insertions, 136 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6c79a14a3b6f..1d9b33843c80 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -46,6 +46,7 @@ MODULE_LICENSE("GPL");
46#define SVM_FEATURE_NPT (1 << 0) 46#define SVM_FEATURE_NPT (1 << 0)
47#define SVM_FEATURE_LBRV (1 << 1) 47#define SVM_FEATURE_LBRV (1 << 1)
48#define SVM_FEATURE_SVML (1 << 2) 48#define SVM_FEATURE_SVML (1 << 2)
49#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
49 50
50#define NESTED_EXIT_HOST 0 /* Exit handled on host level */ 51#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
51#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ 52#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
@@ -53,15 +54,6 @@ MODULE_LICENSE("GPL");
53 54
54#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 55#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
55 56
56/* Turn on to get debugging output*/
57/* #define NESTED_DEBUG */
58
59#ifdef NESTED_DEBUG
60#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
61#else
62#define nsvm_printk(fmt, args...) do {} while(0)
63#endif
64
65static const u32 host_save_user_msrs[] = { 57static const u32 host_save_user_msrs[] = {
66#ifdef CONFIG_X86_64 58#ifdef CONFIG_X86_64
67 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, 59 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -85,6 +77,9 @@ struct nested_state {
85 /* gpa pointers to the real vectors */ 77 /* gpa pointers to the real vectors */
86 u64 vmcb_msrpm; 78 u64 vmcb_msrpm;
87 79
80 /* A VMEXIT is required but not yet emulated */
81 bool exit_required;
82
88 /* cache for intercepts of the guest */ 83 /* cache for intercepts of the guest */
89 u16 intercept_cr_read; 84 u16 intercept_cr_read;
90 u16 intercept_cr_write; 85 u16 intercept_cr_write;
@@ -112,6 +107,8 @@ struct vcpu_svm {
112 u32 *msrpm; 107 u32 *msrpm;
113 108
114 struct nested_state nested; 109 struct nested_state nested;
110
111 bool nmi_singlestep;
115}; 112};
116 113
117/* enable NPT for AMD64 and X86 with PAE */ 114/* enable NPT for AMD64 and X86 with PAE */
@@ -286,7 +283,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
286 struct vcpu_svm *svm = to_svm(vcpu); 283 struct vcpu_svm *svm = to_svm(vcpu);
287 284
288 if (!svm->next_rip) { 285 if (!svm->next_rip) {
289 if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != 286 if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
290 EMULATE_DONE) 287 EMULATE_DONE)
291 printk(KERN_DEBUG "%s: NOP\n", __func__); 288 printk(KERN_DEBUG "%s: NOP\n", __func__);
292 return; 289 return;
@@ -316,7 +313,7 @@ static void svm_hardware_disable(void *garbage)
316 cpu_svm_disable(); 313 cpu_svm_disable();
317} 314}
318 315
319static void svm_hardware_enable(void *garbage) 316static int svm_hardware_enable(void *garbage)
320{ 317{
321 318
322 struct svm_cpu_data *sd; 319 struct svm_cpu_data *sd;
@@ -325,16 +322,21 @@ static void svm_hardware_enable(void *garbage)
325 struct desc_struct *gdt; 322 struct desc_struct *gdt;
326 int me = raw_smp_processor_id(); 323 int me = raw_smp_processor_id();
327 324
325 rdmsrl(MSR_EFER, efer);
326 if (efer & EFER_SVME)
327 return -EBUSY;
328
328 if (!has_svm()) { 329 if (!has_svm()) {
329 printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); 330 printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
330 return; 331 me);
332 return -EINVAL;
331 } 333 }
332 sd = per_cpu(svm_data, me); 334 sd = per_cpu(svm_data, me);
333 335
334 if (!sd) { 336 if (!sd) {
335 printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", 337 printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
336 me); 338 me);
337 return; 339 return -EINVAL;
338 } 340 }
339 341
340 sd->asid_generation = 1; 342 sd->asid_generation = 1;
@@ -345,11 +347,11 @@ static void svm_hardware_enable(void *garbage)
345 gdt = (struct desc_struct *)gdt_descr.base; 347 gdt = (struct desc_struct *)gdt_descr.base;
346 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 348 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
347 349
348 rdmsrl(MSR_EFER, efer);
349 wrmsrl(MSR_EFER, efer | EFER_SVME); 350 wrmsrl(MSR_EFER, efer | EFER_SVME);
350 351
351 wrmsrl(MSR_VM_HSAVE_PA, 352 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
352 page_to_pfn(sd->save_area) << PAGE_SHIFT); 353
354 return 0;
353} 355}
354 356
355static void svm_cpu_uninit(int cpu) 357static void svm_cpu_uninit(int cpu)
@@ -475,7 +477,7 @@ static __init int svm_hardware_setup(void)
475 kvm_enable_efer_bits(EFER_SVME); 477 kvm_enable_efer_bits(EFER_SVME);
476 } 478 }
477 479
478 for_each_online_cpu(cpu) { 480 for_each_possible_cpu(cpu) {
479 r = svm_cpu_init(cpu); 481 r = svm_cpu_init(cpu);
480 if (r) 482 if (r)
481 goto err; 483 goto err;
@@ -509,7 +511,7 @@ static __exit void svm_hardware_unsetup(void)
509{ 511{
510 int cpu; 512 int cpu;
511 513
512 for_each_online_cpu(cpu) 514 for_each_possible_cpu(cpu)
513 svm_cpu_uninit(cpu); 515 svm_cpu_uninit(cpu);
514 516
515 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); 517 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
@@ -624,11 +626,12 @@ static void init_vmcb(struct vcpu_svm *svm)
624 save->rip = 0x0000fff0; 626 save->rip = 0x0000fff0;
625 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; 627 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
626 628
627 /* 629 /* This is the guest-visible cr0 value.
628 * cr0 val on cpu init should be 0x60000010, we enable cpu 630 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
629 * cache by default. the orderly way is to enable cache in bios.
630 */ 631 */
631 save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; 632 svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
633 kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
634
632 save->cr4 = X86_CR4_PAE; 635 save->cr4 = X86_CR4_PAE;
633 /* rdx = ?? */ 636 /* rdx = ?? */
634 637
@@ -643,8 +646,6 @@ static void init_vmcb(struct vcpu_svm *svm)
643 control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| 646 control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
644 INTERCEPT_CR3_MASK); 647 INTERCEPT_CR3_MASK);
645 save->g_pat = 0x0007040600070406ULL; 648 save->g_pat = 0x0007040600070406ULL;
646 /* enable caching because the QEMU Bios doesn't enable it */
647 save->cr0 = X86_CR0_ET;
648 save->cr3 = 0; 649 save->cr3 = 0;
649 save->cr4 = 0; 650 save->cr4 = 0;
650 } 651 }
@@ -653,6 +654,11 @@ static void init_vmcb(struct vcpu_svm *svm)
653 svm->nested.vmcb = 0; 654 svm->nested.vmcb = 0;
654 svm->vcpu.arch.hflags = 0; 655 svm->vcpu.arch.hflags = 0;
655 656
657 if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
658 control->pause_filter_count = 3000;
659 control->intercept |= (1ULL << INTERCEPT_PAUSE);
660 }
661
656 enable_gif(svm); 662 enable_gif(svm);
657} 663}
658 664
@@ -757,15 +763,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
757 int i; 763 int i;
758 764
759 if (unlikely(cpu != vcpu->cpu)) { 765 if (unlikely(cpu != vcpu->cpu)) {
760 u64 tsc_this, delta; 766 u64 delta;
761 767
762 /* 768 /*
763 * Make sure that the guest sees a monotonically 769 * Make sure that the guest sees a monotonically
764 * increasing TSC. 770 * increasing TSC.
765 */ 771 */
766 rdtscll(tsc_this); 772 delta = vcpu->arch.host_tsc - native_read_tsc();
767 delta = vcpu->arch.host_tsc - tsc_this;
768 svm->vmcb->control.tsc_offset += delta; 773 svm->vmcb->control.tsc_offset += delta;
774 if (is_nested(svm))
775 svm->nested.hsave->control.tsc_offset += delta;
769 vcpu->cpu = cpu; 776 vcpu->cpu = cpu;
770 kvm_migrate_timers(vcpu); 777 kvm_migrate_timers(vcpu);
771 svm->asid_generation = 0; 778 svm->asid_generation = 0;
@@ -784,7 +791,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
784 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 791 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
785 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 792 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
786 793
787 rdtscll(vcpu->arch.host_tsc); 794 vcpu->arch.host_tsc = native_read_tsc();
788} 795}
789 796
790static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 797static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@ -1042,7 +1049,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
1042 svm->vmcb->control.intercept_exceptions &= 1049 svm->vmcb->control.intercept_exceptions &=
1043 ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); 1050 ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
1044 1051
1045 if (vcpu->arch.singlestep) 1052 if (svm->nmi_singlestep)
1046 svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); 1053 svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
1047 1054
1048 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { 1055 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
@@ -1057,26 +1064,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
1057 vcpu->guest_debug = 0; 1064 vcpu->guest_debug = 0;
1058} 1065}
1059 1066
1060static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) 1067static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1061{ 1068{
1062 int old_debug = vcpu->guest_debug;
1063 struct vcpu_svm *svm = to_svm(vcpu); 1069 struct vcpu_svm *svm = to_svm(vcpu);
1064 1070
1065 vcpu->guest_debug = dbg->control;
1066
1067 update_db_intercept(vcpu);
1068
1069 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1071 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1070 svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; 1072 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
1071 else 1073 else
1072 svm->vmcb->save.dr7 = vcpu->arch.dr7; 1074 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1073 1075
1074 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 1076 update_db_intercept(vcpu);
1075 svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
1076 else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
1077 svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1078
1079 return 0;
1080} 1077}
1081 1078
1082static void load_host_msrs(struct kvm_vcpu *vcpu) 1079static void load_host_msrs(struct kvm_vcpu *vcpu)
@@ -1177,7 +1174,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
1177 } 1174 }
1178} 1175}
1179 1176
1180static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1177static int pf_interception(struct vcpu_svm *svm)
1181{ 1178{
1182 u64 fault_address; 1179 u64 fault_address;
1183 u32 error_code; 1180 u32 error_code;
@@ -1191,17 +1188,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1191 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1188 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1192} 1189}
1193 1190
1194static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1191static int db_interception(struct vcpu_svm *svm)
1195{ 1192{
1193 struct kvm_run *kvm_run = svm->vcpu.run;
1194
1196 if (!(svm->vcpu.guest_debug & 1195 if (!(svm->vcpu.guest_debug &
1197 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && 1196 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1198 !svm->vcpu.arch.singlestep) { 1197 !svm->nmi_singlestep) {
1199 kvm_queue_exception(&svm->vcpu, DB_VECTOR); 1198 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1200 return 1; 1199 return 1;
1201 } 1200 }
1202 1201
1203 if (svm->vcpu.arch.singlestep) { 1202 if (svm->nmi_singlestep) {
1204 svm->vcpu.arch.singlestep = false; 1203 svm->nmi_singlestep = false;
1205 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) 1204 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1206 svm->vmcb->save.rflags &= 1205 svm->vmcb->save.rflags &=
1207 ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 1206 ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
@@ -1220,25 +1219,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1220 return 1; 1219 return 1;
1221} 1220}
1222 1221
1223static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1222static int bp_interception(struct vcpu_svm *svm)
1224{ 1223{
1224 struct kvm_run *kvm_run = svm->vcpu.run;
1225
1225 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1226 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1226 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1227 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1227 kvm_run->debug.arch.exception = BP_VECTOR; 1228 kvm_run->debug.arch.exception = BP_VECTOR;
1228 return 0; 1229 return 0;
1229} 1230}
1230 1231
1231static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1232static int ud_interception(struct vcpu_svm *svm)
1232{ 1233{
1233 int er; 1234 int er;
1234 1235
1235 er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); 1236 er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
1236 if (er != EMULATE_DONE) 1237 if (er != EMULATE_DONE)
1237 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1238 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1238 return 1; 1239 return 1;
1239} 1240}
1240 1241
1241static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1242static int nm_interception(struct vcpu_svm *svm)
1242{ 1243{
1243 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1244 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
1244 if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) 1245 if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
@@ -1248,7 +1249,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1248 return 1; 1249 return 1;
1249} 1250}
1250 1251
1251static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1252static int mc_interception(struct vcpu_svm *svm)
1252{ 1253{
1253 /* 1254 /*
1254 * On an #MC intercept the MCE handler is not called automatically in 1255 * On an #MC intercept the MCE handler is not called automatically in
@@ -1261,8 +1262,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1261 return 1; 1262 return 1;
1262} 1263}
1263 1264
1264static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1265static int shutdown_interception(struct vcpu_svm *svm)
1265{ 1266{
1267 struct kvm_run *kvm_run = svm->vcpu.run;
1268
1266 /* 1269 /*
1267 * VMCB is undefined after a SHUTDOWN intercept 1270 * VMCB is undefined after a SHUTDOWN intercept
1268 * so reinitialize it. 1271 * so reinitialize it.
@@ -1274,7 +1277,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1274 return 0; 1277 return 0;
1275} 1278}
1276 1279
1277static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1280static int io_interception(struct vcpu_svm *svm)
1278{ 1281{
1279 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1282 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1280 int size, in, string; 1283 int size, in, string;
@@ -1288,7 +1291,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1288 1291
1289 if (string) { 1292 if (string) {
1290 if (emulate_instruction(&svm->vcpu, 1293 if (emulate_instruction(&svm->vcpu,
1291 kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) 1294 0, 0, 0) == EMULATE_DO_MMIO)
1292 return 0; 1295 return 0;
1293 return 1; 1296 return 1;
1294 } 1297 }
@@ -1298,33 +1301,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1298 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1301 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1299 1302
1300 skip_emulated_instruction(&svm->vcpu); 1303 skip_emulated_instruction(&svm->vcpu);
1301 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1304 return kvm_emulate_pio(&svm->vcpu, in, size, port);
1302} 1305}
1303 1306
1304static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1307static int nmi_interception(struct vcpu_svm *svm)
1305{ 1308{
1306 return 1; 1309 return 1;
1307} 1310}
1308 1311
1309static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1312static int intr_interception(struct vcpu_svm *svm)
1310{ 1313{
1311 ++svm->vcpu.stat.irq_exits; 1314 ++svm->vcpu.stat.irq_exits;
1312 return 1; 1315 return 1;
1313} 1316}
1314 1317
1315static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1318static int nop_on_interception(struct vcpu_svm *svm)
1316{ 1319{
1317 return 1; 1320 return 1;
1318} 1321}
1319 1322
1320static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1323static int halt_interception(struct vcpu_svm *svm)
1321{ 1324{
1322 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; 1325 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1323 skip_emulated_instruction(&svm->vcpu); 1326 skip_emulated_instruction(&svm->vcpu);
1324 return kvm_emulate_halt(&svm->vcpu); 1327 return kvm_emulate_halt(&svm->vcpu);
1325} 1328}
1326 1329
1327static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1330static int vmmcall_interception(struct vcpu_svm *svm)
1328{ 1331{
1329 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1332 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1330 skip_emulated_instruction(&svm->vcpu); 1333 skip_emulated_instruction(&svm->vcpu);
@@ -1375,8 +1378,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
1375 1378
1376 svm->vmcb->control.exit_code = SVM_EXIT_INTR; 1379 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
1377 1380
1378 if (nested_svm_exit_handled(svm)) { 1381 if (svm->nested.intercept & 1ULL) {
1379 nsvm_printk("VMexit -> INTR\n"); 1382 /*
1383 * The #vmexit can't be emulated here directly because this
1384 * code path runs with irqs and preemtion disabled. A
1385 * #vmexit emulation might sleep. Only signal request for
1386 * the #vmexit here.
1387 */
1388 svm->nested.exit_required = true;
1389 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1380 return 1; 1390 return 1;
1381 } 1391 }
1382 1392
@@ -1387,10 +1397,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
1387{ 1397{
1388 struct page *page; 1398 struct page *page;
1389 1399
1390 down_read(&current->mm->mmap_sem);
1391 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); 1400 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1392 up_read(&current->mm->mmap_sem);
1393
1394 if (is_error_page(page)) 1401 if (is_error_page(page))
1395 goto error; 1402 goto error;
1396 1403
@@ -1529,14 +1536,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
1529 } 1536 }
1530 default: { 1537 default: {
1531 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 1538 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1532 nsvm_printk("exit code: 0x%x\n", exit_code);
1533 if (svm->nested.intercept & exit_bits) 1539 if (svm->nested.intercept & exit_bits)
1534 vmexit = NESTED_EXIT_DONE; 1540 vmexit = NESTED_EXIT_DONE;
1535 } 1541 }
1536 } 1542 }
1537 1543
1538 if (vmexit == NESTED_EXIT_DONE) { 1544 if (vmexit == NESTED_EXIT_DONE) {
1539 nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
1540 nested_svm_vmexit(svm); 1545 nested_svm_vmexit(svm);
1541 } 1546 }
1542 1547
@@ -1581,6 +1586,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1581 struct vmcb *hsave = svm->nested.hsave; 1586 struct vmcb *hsave = svm->nested.hsave;
1582 struct vmcb *vmcb = svm->vmcb; 1587 struct vmcb *vmcb = svm->vmcb;
1583 1588
1589 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
1590 vmcb->control.exit_info_1,
1591 vmcb->control.exit_info_2,
1592 vmcb->control.exit_int_info,
1593 vmcb->control.exit_int_info_err);
1594
1584 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); 1595 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
1585 if (!nested_vmcb) 1596 if (!nested_vmcb)
1586 return 1; 1597 return 1;
@@ -1614,6 +1625,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1614 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; 1625 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
1615 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; 1626 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
1616 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; 1627 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
1628
1629 /*
1630 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
1631 * to make sure that we do not lose injected events. So check event_inj
1632 * here and copy it to exit_int_info if it is valid.
1633 * Exit_int_info and event_inj can't be both valid because the case
1634 * below only happens on a VMRUN instruction intercept which has
1635 * no valid exit_int_info set.
1636 */
1637 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
1638 struct vmcb_control_area *nc = &nested_vmcb->control;
1639
1640 nc->exit_int_info = vmcb->control.event_inj;
1641 nc->exit_int_info_err = vmcb->control.event_inj_err;
1642 }
1643
1617 nested_vmcb->control.tlb_ctl = 0; 1644 nested_vmcb->control.tlb_ctl = 0;
1618 nested_vmcb->control.event_inj = 0; 1645 nested_vmcb->control.event_inj = 0;
1619 nested_vmcb->control.event_inj_err = 0; 1646 nested_vmcb->control.event_inj_err = 0;
@@ -1625,10 +1652,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1625 /* Restore the original control entries */ 1652 /* Restore the original control entries */
1626 copy_vmcb_control_area(vmcb, hsave); 1653 copy_vmcb_control_area(vmcb, hsave);
1627 1654
1628 /* Kill any pending exceptions */
1629 if (svm->vcpu.arch.exception.pending == true)
1630 nsvm_printk("WARNING: Pending Exception\n");
1631
1632 kvm_clear_exception_queue(&svm->vcpu); 1655 kvm_clear_exception_queue(&svm->vcpu);
1633 kvm_clear_interrupt_queue(&svm->vcpu); 1656 kvm_clear_interrupt_queue(&svm->vcpu);
1634 1657
@@ -1699,6 +1722,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1699 /* nested_vmcb is our indicator if nested SVM is activated */ 1722 /* nested_vmcb is our indicator if nested SVM is activated */
1700 svm->nested.vmcb = svm->vmcb->save.rax; 1723 svm->nested.vmcb = svm->vmcb->save.rax;
1701 1724
1725 trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
1726 nested_vmcb->save.rip,
1727 nested_vmcb->control.int_ctl,
1728 nested_vmcb->control.event_inj,
1729 nested_vmcb->control.nested_ctl);
1730
1702 /* Clear internal status */ 1731 /* Clear internal status */
1703 kvm_clear_exception_queue(&svm->vcpu); 1732 kvm_clear_exception_queue(&svm->vcpu);
1704 kvm_clear_interrupt_queue(&svm->vcpu); 1733 kvm_clear_interrupt_queue(&svm->vcpu);
@@ -1786,28 +1815,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1786 svm->nested.intercept = nested_vmcb->control.intercept; 1815 svm->nested.intercept = nested_vmcb->control.intercept;
1787 1816
1788 force_new_asid(&svm->vcpu); 1817 force_new_asid(&svm->vcpu);
1789 svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
1790 svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
1791 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; 1818 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
1792 if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
1793 nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
1794 nested_vmcb->control.int_ctl);
1795 }
1796 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) 1819 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
1797 svm->vcpu.arch.hflags |= HF_VINTR_MASK; 1820 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
1798 else 1821 else
1799 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; 1822 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
1800 1823
1801 nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
1802 nested_vmcb->control.exit_int_info,
1803 nested_vmcb->control.int_state);
1804
1805 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 1824 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
1806 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 1825 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
1807 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; 1826 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
1808 if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
1809 nsvm_printk("Injecting Event: 0x%x\n",
1810 nested_vmcb->control.event_inj);
1811 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 1827 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
1812 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 1828 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
1813 1829
@@ -1834,7 +1850,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1834 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; 1850 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
1835} 1851}
1836 1852
1837static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1853static int vmload_interception(struct vcpu_svm *svm)
1838{ 1854{
1839 struct vmcb *nested_vmcb; 1855 struct vmcb *nested_vmcb;
1840 1856
@@ -1854,7 +1870,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1854 return 1; 1870 return 1;
1855} 1871}
1856 1872
1857static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1873static int vmsave_interception(struct vcpu_svm *svm)
1858{ 1874{
1859 struct vmcb *nested_vmcb; 1875 struct vmcb *nested_vmcb;
1860 1876
@@ -1874,10 +1890,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1874 return 1; 1890 return 1;
1875} 1891}
1876 1892
1877static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1893static int vmrun_interception(struct vcpu_svm *svm)
1878{ 1894{
1879 nsvm_printk("VMrun\n");
1880
1881 if (nested_svm_check_permissions(svm)) 1895 if (nested_svm_check_permissions(svm))
1882 return 1; 1896 return 1;
1883 1897
@@ -1904,7 +1918,7 @@ failed:
1904 return 1; 1918 return 1;
1905} 1919}
1906 1920
1907static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1921static int stgi_interception(struct vcpu_svm *svm)
1908{ 1922{
1909 if (nested_svm_check_permissions(svm)) 1923 if (nested_svm_check_permissions(svm))
1910 return 1; 1924 return 1;
@@ -1917,7 +1931,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1917 return 1; 1931 return 1;
1918} 1932}
1919 1933
1920static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1934static int clgi_interception(struct vcpu_svm *svm)
1921{ 1935{
1922 if (nested_svm_check_permissions(svm)) 1936 if (nested_svm_check_permissions(svm))
1923 return 1; 1937 return 1;
@@ -1934,10 +1948,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1934 return 1; 1948 return 1;
1935} 1949}
1936 1950
1937static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1951static int invlpga_interception(struct vcpu_svm *svm)
1938{ 1952{
1939 struct kvm_vcpu *vcpu = &svm->vcpu; 1953 struct kvm_vcpu *vcpu = &svm->vcpu;
1940 nsvm_printk("INVLPGA\n"); 1954
1955 trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
1956 vcpu->arch.regs[VCPU_REGS_RAX]);
1941 1957
1942 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ 1958 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
1943 kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); 1959 kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
@@ -1947,15 +1963,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1947 return 1; 1963 return 1;
1948} 1964}
1949 1965
1950static int invalid_op_interception(struct vcpu_svm *svm, 1966static int skinit_interception(struct vcpu_svm *svm)
1951 struct kvm_run *kvm_run)
1952{ 1967{
1968 trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
1969
1953 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1970 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1954 return 1; 1971 return 1;
1955} 1972}
1956 1973
1957static int task_switch_interception(struct vcpu_svm *svm, 1974static int invalid_op_interception(struct vcpu_svm *svm)
1958 struct kvm_run *kvm_run) 1975{
1976 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1977 return 1;
1978}
1979
1980static int task_switch_interception(struct vcpu_svm *svm)
1959{ 1981{
1960 u16 tss_selector; 1982 u16 tss_selector;
1961 int reason; 1983 int reason;
@@ -2005,14 +2027,14 @@ static int task_switch_interception(struct vcpu_svm *svm,
2005 return kvm_task_switch(&svm->vcpu, tss_selector, reason); 2027 return kvm_task_switch(&svm->vcpu, tss_selector, reason);
2006} 2028}
2007 2029
2008static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2030static int cpuid_interception(struct vcpu_svm *svm)
2009{ 2031{
2010 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 2032 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2011 kvm_emulate_cpuid(&svm->vcpu); 2033 kvm_emulate_cpuid(&svm->vcpu);
2012 return 1; 2034 return 1;
2013} 2035}
2014 2036
2015static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2037static int iret_interception(struct vcpu_svm *svm)
2016{ 2038{
2017 ++svm->vcpu.stat.nmi_window_exits; 2039 ++svm->vcpu.stat.nmi_window_exits;
2018 svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); 2040 svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
@@ -2020,26 +2042,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
2020 return 1; 2042 return 1;
2021} 2043}
2022 2044
2023static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2045static int invlpg_interception(struct vcpu_svm *svm)
2024{ 2046{
2025 if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) 2047 if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
2026 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); 2048 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
2027 return 1; 2049 return 1;
2028} 2050}
2029 2051
2030static int emulate_on_interception(struct vcpu_svm *svm, 2052static int emulate_on_interception(struct vcpu_svm *svm)
2031 struct kvm_run *kvm_run)
2032{ 2053{
2033 if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) 2054 if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
2034 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); 2055 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
2035 return 1; 2056 return 1;
2036} 2057}
2037 2058
2038static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2059static int cr8_write_interception(struct vcpu_svm *svm)
2039{ 2060{
2061 struct kvm_run *kvm_run = svm->vcpu.run;
2062
2040 u8 cr8_prev = kvm_get_cr8(&svm->vcpu); 2063 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
2041 /* instruction emulation calls kvm_set_cr8() */ 2064 /* instruction emulation calls kvm_set_cr8() */
2042 emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); 2065 emulate_instruction(&svm->vcpu, 0, 0, 0);
2043 if (irqchip_in_kernel(svm->vcpu.kvm)) { 2066 if (irqchip_in_kernel(svm->vcpu.kvm)) {
2044 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; 2067 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
2045 return 1; 2068 return 1;
@@ -2056,10 +2079,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2056 2079
2057 switch (ecx) { 2080 switch (ecx) {
2058 case MSR_IA32_TSC: { 2081 case MSR_IA32_TSC: {
2059 u64 tsc; 2082 u64 tsc_offset;
2060 2083
2061 rdtscll(tsc); 2084 if (is_nested(svm))
2062 *data = svm->vmcb->control.tsc_offset + tsc; 2085 tsc_offset = svm->nested.hsave->control.tsc_offset;
2086 else
2087 tsc_offset = svm->vmcb->control.tsc_offset;
2088
2089 *data = tsc_offset + native_read_tsc();
2063 break; 2090 break;
2064 } 2091 }
2065 case MSR_K6_STAR: 2092 case MSR_K6_STAR:
@@ -2121,7 +2148,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2121 return 0; 2148 return 0;
2122} 2149}
2123 2150
2124static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2151static int rdmsr_interception(struct vcpu_svm *svm)
2125{ 2152{
2126 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 2153 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2127 u64 data; 2154 u64 data;
@@ -2145,10 +2172,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2145 2172
2146 switch (ecx) { 2173 switch (ecx) {
2147 case MSR_IA32_TSC: { 2174 case MSR_IA32_TSC: {
2148 u64 tsc; 2175 u64 tsc_offset = data - native_read_tsc();
2176 u64 g_tsc_offset = 0;
2177
2178 if (is_nested(svm)) {
2179 g_tsc_offset = svm->vmcb->control.tsc_offset -
2180 svm->nested.hsave->control.tsc_offset;
2181 svm->nested.hsave->control.tsc_offset = tsc_offset;
2182 }
2183
2184 svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
2149 2185
2150 rdtscll(tsc);
2151 svm->vmcb->control.tsc_offset = data - tsc;
2152 break; 2186 break;
2153 } 2187 }
2154 case MSR_K6_STAR: 2188 case MSR_K6_STAR:
@@ -2207,7 +2241,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2207 return 0; 2241 return 0;
2208} 2242}
2209 2243
2210static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2244static int wrmsr_interception(struct vcpu_svm *svm)
2211{ 2245{
2212 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 2246 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2213 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) 2247 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
@@ -2223,17 +2257,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
2223 return 1; 2257 return 1;
2224} 2258}
2225 2259
2226static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2260static int msr_interception(struct vcpu_svm *svm)
2227{ 2261{
2228 if (svm->vmcb->control.exit_info_1) 2262 if (svm->vmcb->control.exit_info_1)
2229 return wrmsr_interception(svm, kvm_run); 2263 return wrmsr_interception(svm);
2230 else 2264 else
2231 return rdmsr_interception(svm, kvm_run); 2265 return rdmsr_interception(svm);
2232} 2266}
2233 2267
2234static int interrupt_window_interception(struct vcpu_svm *svm, 2268static int interrupt_window_interception(struct vcpu_svm *svm)
2235 struct kvm_run *kvm_run)
2236{ 2269{
2270 struct kvm_run *kvm_run = svm->vcpu.run;
2271
2237 svm_clear_vintr(svm); 2272 svm_clear_vintr(svm);
2238 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 2273 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2239 /* 2274 /*
@@ -2251,8 +2286,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
2251 return 1; 2286 return 1;
2252} 2287}
2253 2288
2254static int (*svm_exit_handlers[])(struct vcpu_svm *svm, 2289static int pause_interception(struct vcpu_svm *svm)
2255 struct kvm_run *kvm_run) = { 2290{
2291 kvm_vcpu_on_spin(&(svm->vcpu));
2292 return 1;
2293}
2294
2295static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2256 [SVM_EXIT_READ_CR0] = emulate_on_interception, 2296 [SVM_EXIT_READ_CR0] = emulate_on_interception,
2257 [SVM_EXIT_READ_CR3] = emulate_on_interception, 2297 [SVM_EXIT_READ_CR3] = emulate_on_interception,
2258 [SVM_EXIT_READ_CR4] = emulate_on_interception, 2298 [SVM_EXIT_READ_CR4] = emulate_on_interception,
@@ -2287,6 +2327,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
2287 [SVM_EXIT_CPUID] = cpuid_interception, 2327 [SVM_EXIT_CPUID] = cpuid_interception,
2288 [SVM_EXIT_IRET] = iret_interception, 2328 [SVM_EXIT_IRET] = iret_interception,
2289 [SVM_EXIT_INVD] = emulate_on_interception, 2329 [SVM_EXIT_INVD] = emulate_on_interception,
2330 [SVM_EXIT_PAUSE] = pause_interception,
2290 [SVM_EXIT_HLT] = halt_interception, 2331 [SVM_EXIT_HLT] = halt_interception,
2291 [SVM_EXIT_INVLPG] = invlpg_interception, 2332 [SVM_EXIT_INVLPG] = invlpg_interception,
2292 [SVM_EXIT_INVLPGA] = invlpga_interception, 2333 [SVM_EXIT_INVLPGA] = invlpga_interception,
@@ -2300,26 +2341,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
2300 [SVM_EXIT_VMSAVE] = vmsave_interception, 2341 [SVM_EXIT_VMSAVE] = vmsave_interception,
2301 [SVM_EXIT_STGI] = stgi_interception, 2342 [SVM_EXIT_STGI] = stgi_interception,
2302 [SVM_EXIT_CLGI] = clgi_interception, 2343 [SVM_EXIT_CLGI] = clgi_interception,
2303 [SVM_EXIT_SKINIT] = invalid_op_interception, 2344 [SVM_EXIT_SKINIT] = skinit_interception,
2304 [SVM_EXIT_WBINVD] = emulate_on_interception, 2345 [SVM_EXIT_WBINVD] = emulate_on_interception,
2305 [SVM_EXIT_MONITOR] = invalid_op_interception, 2346 [SVM_EXIT_MONITOR] = invalid_op_interception,
2306 [SVM_EXIT_MWAIT] = invalid_op_interception, 2347 [SVM_EXIT_MWAIT] = invalid_op_interception,
2307 [SVM_EXIT_NPF] = pf_interception, 2348 [SVM_EXIT_NPF] = pf_interception,
2308}; 2349};
2309 2350
2310static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 2351static int handle_exit(struct kvm_vcpu *vcpu)
2311{ 2352{
2312 struct vcpu_svm *svm = to_svm(vcpu); 2353 struct vcpu_svm *svm = to_svm(vcpu);
2354 struct kvm_run *kvm_run = vcpu->run;
2313 u32 exit_code = svm->vmcb->control.exit_code; 2355 u32 exit_code = svm->vmcb->control.exit_code;
2314 2356
2315 trace_kvm_exit(exit_code, svm->vmcb->save.rip); 2357 trace_kvm_exit(exit_code, svm->vmcb->save.rip);
2316 2358
2359 if (unlikely(svm->nested.exit_required)) {
2360 nested_svm_vmexit(svm);
2361 svm->nested.exit_required = false;
2362
2363 return 1;
2364 }
2365
2317 if (is_nested(svm)) { 2366 if (is_nested(svm)) {
2318 int vmexit; 2367 int vmexit;
2319 2368
2320 nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", 2369 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
2321 exit_code, svm->vmcb->control.exit_info_1, 2370 svm->vmcb->control.exit_info_1,
2322 svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); 2371 svm->vmcb->control.exit_info_2,
2372 svm->vmcb->control.exit_int_info,
2373 svm->vmcb->control.exit_int_info_err);
2323 2374
2324 vmexit = nested_svm_exit_special(svm); 2375 vmexit = nested_svm_exit_special(svm);
2325 2376
@@ -2369,7 +2420,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2369 return 0; 2420 return 0;
2370 } 2421 }
2371 2422
2372 return svm_exit_handlers[exit_code](svm, kvm_run); 2423 return svm_exit_handlers[exit_code](svm);
2373} 2424}
2374 2425
2375static void reload_tss(struct kvm_vcpu *vcpu) 2426static void reload_tss(struct kvm_vcpu *vcpu)
@@ -2446,20 +2497,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
2446 !(svm->vcpu.arch.hflags & HF_NMI_MASK); 2497 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
2447} 2498}
2448 2499
2500static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
2501{
2502 struct vcpu_svm *svm = to_svm(vcpu);
2503
2504 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
2505}
2506
2507static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
2508{
2509 struct vcpu_svm *svm = to_svm(vcpu);
2510
2511 if (masked) {
2512 svm->vcpu.arch.hflags |= HF_NMI_MASK;
2513 svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
2514 } else {
2515 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
2516 svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
2517 }
2518}
2519
2449static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) 2520static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
2450{ 2521{
2451 struct vcpu_svm *svm = to_svm(vcpu); 2522 struct vcpu_svm *svm = to_svm(vcpu);
2452 struct vmcb *vmcb = svm->vmcb; 2523 struct vmcb *vmcb = svm->vmcb;
2453 return (vmcb->save.rflags & X86_EFLAGS_IF) && 2524 int ret;
2454 !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && 2525
2455 gif_set(svm) && 2526 if (!gif_set(svm) ||
2456 !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)); 2527 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
2528 return 0;
2529
2530 ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
2531
2532 if (is_nested(svm))
2533 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
2534
2535 return ret;
2457} 2536}
2458 2537
2459static void enable_irq_window(struct kvm_vcpu *vcpu) 2538static void enable_irq_window(struct kvm_vcpu *vcpu)
2460{ 2539{
2461 struct vcpu_svm *svm = to_svm(vcpu); 2540 struct vcpu_svm *svm = to_svm(vcpu);
2462 nsvm_printk("Trying to open IRQ window\n");
2463 2541
2464 nested_svm_intr(svm); 2542 nested_svm_intr(svm);
2465 2543
@@ -2484,7 +2562,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
2484 /* Something prevents NMI from been injected. Single step over 2562 /* Something prevents NMI from been injected. Single step over
2485 possible problem (IRET or exception injection or interrupt 2563 possible problem (IRET or exception injection or interrupt
2486 shadow) */ 2564 shadow) */
2487 vcpu->arch.singlestep = true; 2565 svm->nmi_singlestep = true;
2488 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); 2566 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2489 update_db_intercept(vcpu); 2567 update_db_intercept(vcpu);
2490} 2568}
@@ -2574,13 +2652,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
2574#define R "e" 2652#define R "e"
2575#endif 2653#endif
2576 2654
2577static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2655static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2578{ 2656{
2579 struct vcpu_svm *svm = to_svm(vcpu); 2657 struct vcpu_svm *svm = to_svm(vcpu);
2580 u16 fs_selector; 2658 u16 fs_selector;
2581 u16 gs_selector; 2659 u16 gs_selector;
2582 u16 ldt_selector; 2660 u16 ldt_selector;
2583 2661
2662 /*
2663 * A vmexit emulation is required before the vcpu can be executed
2664 * again.
2665 */
2666 if (unlikely(svm->nested.exit_required))
2667 return;
2668
2584 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; 2669 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
2585 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; 2670 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
2586 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; 2671 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
@@ -2879,6 +2964,8 @@ static struct kvm_x86_ops svm_x86_ops = {
2879 .queue_exception = svm_queue_exception, 2964 .queue_exception = svm_queue_exception,
2880 .interrupt_allowed = svm_interrupt_allowed, 2965 .interrupt_allowed = svm_interrupt_allowed,
2881 .nmi_allowed = svm_nmi_allowed, 2966 .nmi_allowed = svm_nmi_allowed,
2967 .get_nmi_mask = svm_get_nmi_mask,
2968 .set_nmi_mask = svm_set_nmi_mask,
2882 .enable_nmi_window = enable_nmi_window, 2969 .enable_nmi_window = enable_nmi_window,
2883 .enable_irq_window = enable_irq_window, 2970 .enable_irq_window = enable_irq_window,
2884 .update_cr8_intercept = update_cr8_intercept, 2971 .update_cr8_intercept = update_cr8_intercept,