aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c660
1 files changed, 390 insertions, 270 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c17404add91f..737361fcd503 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -26,6 +26,7 @@
26#include <linux/highmem.h> 26#include <linux/highmem.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/ftrace_event.h> 28#include <linux/ftrace_event.h>
29#include <linux/slab.h>
29 30
30#include <asm/desc.h> 31#include <asm/desc.h>
31 32
@@ -46,6 +47,7 @@ MODULE_LICENSE("GPL");
46#define SVM_FEATURE_NPT (1 << 0) 47#define SVM_FEATURE_NPT (1 << 0)
47#define SVM_FEATURE_LBRV (1 << 1) 48#define SVM_FEATURE_LBRV (1 << 1)
48#define SVM_FEATURE_SVML (1 << 2) 49#define SVM_FEATURE_SVML (1 << 2)
50#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
49 51
50#define NESTED_EXIT_HOST 0 /* Exit handled on host level */ 52#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
51#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ 53#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
@@ -53,15 +55,6 @@ MODULE_LICENSE("GPL");
53 55
54#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 56#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
55 57
56/* Turn on to get debugging output*/
57/* #define NESTED_DEBUG */
58
59#ifdef NESTED_DEBUG
60#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
61#else
62#define nsvm_printk(fmt, args...) do {} while(0)
63#endif
64
65static const u32 host_save_user_msrs[] = { 58static const u32 host_save_user_msrs[] = {
66#ifdef CONFIG_X86_64 59#ifdef CONFIG_X86_64
67 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, 60 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -85,6 +78,9 @@ struct nested_state {
85 /* gpa pointers to the real vectors */ 78 /* gpa pointers to the real vectors */
86 u64 vmcb_msrpm; 79 u64 vmcb_msrpm;
87 80
81 /* A VMEXIT is required but not yet emulated */
82 bool exit_required;
83
88 /* cache for intercepts of the guest */ 84 /* cache for intercepts of the guest */
89 u16 intercept_cr_read; 85 u16 intercept_cr_read;
90 u16 intercept_cr_write; 86 u16 intercept_cr_write;
@@ -112,6 +108,8 @@ struct vcpu_svm {
112 u32 *msrpm; 108 u32 *msrpm;
113 109
114 struct nested_state nested; 110 struct nested_state nested;
111
112 bool nmi_singlestep;
115}; 113};
116 114
117/* enable NPT for AMD64 and X86 with PAE */ 115/* enable NPT for AMD64 and X86 with PAE */
@@ -234,7 +232,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
234 efer &= ~EFER_LME; 232 efer &= ~EFER_LME;
235 233
236 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; 234 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
237 vcpu->arch.shadow_efer = efer; 235 vcpu->arch.efer = efer;
238} 236}
239 237
240static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 238static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@@ -286,7 +284,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
286 struct vcpu_svm *svm = to_svm(vcpu); 284 struct vcpu_svm *svm = to_svm(vcpu);
287 285
288 if (!svm->next_rip) { 286 if (!svm->next_rip) {
289 if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != 287 if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
290 EMULATE_DONE) 288 EMULATE_DONE)
291 printk(KERN_DEBUG "%s: NOP\n", __func__); 289 printk(KERN_DEBUG "%s: NOP\n", __func__);
292 return; 290 return;
@@ -316,75 +314,79 @@ static void svm_hardware_disable(void *garbage)
316 cpu_svm_disable(); 314 cpu_svm_disable();
317} 315}
318 316
319static void svm_hardware_enable(void *garbage) 317static int svm_hardware_enable(void *garbage)
320{ 318{
321 319
322 struct svm_cpu_data *svm_data; 320 struct svm_cpu_data *sd;
323 uint64_t efer; 321 uint64_t efer;
324 struct descriptor_table gdt_descr; 322 struct descriptor_table gdt_descr;
325 struct desc_struct *gdt; 323 struct desc_struct *gdt;
326 int me = raw_smp_processor_id(); 324 int me = raw_smp_processor_id();
327 325
326 rdmsrl(MSR_EFER, efer);
327 if (efer & EFER_SVME)
328 return -EBUSY;
329
328 if (!has_svm()) { 330 if (!has_svm()) {
329 printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); 331 printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
330 return; 332 me);
333 return -EINVAL;
331 } 334 }
332 svm_data = per_cpu(svm_data, me); 335 sd = per_cpu(svm_data, me);
333 336
334 if (!svm_data) { 337 if (!sd) {
335 printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", 338 printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
336 me); 339 me);
337 return; 340 return -EINVAL;
338 } 341 }
339 342
340 svm_data->asid_generation = 1; 343 sd->asid_generation = 1;
341 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 344 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
342 svm_data->next_asid = svm_data->max_asid + 1; 345 sd->next_asid = sd->max_asid + 1;
343 346
344 kvm_get_gdt(&gdt_descr); 347 kvm_get_gdt(&gdt_descr);
345 gdt = (struct desc_struct *)gdt_descr.base; 348 gdt = (struct desc_struct *)gdt_descr.base;
346 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 349 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
347 350
348 rdmsrl(MSR_EFER, efer);
349 wrmsrl(MSR_EFER, efer | EFER_SVME); 351 wrmsrl(MSR_EFER, efer | EFER_SVME);
350 352
351 wrmsrl(MSR_VM_HSAVE_PA, 353 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
352 page_to_pfn(svm_data->save_area) << PAGE_SHIFT); 354
355 return 0;
353} 356}
354 357
355static void svm_cpu_uninit(int cpu) 358static void svm_cpu_uninit(int cpu)
356{ 359{
357 struct svm_cpu_data *svm_data 360 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
358 = per_cpu(svm_data, raw_smp_processor_id());
359 361
360 if (!svm_data) 362 if (!sd)
361 return; 363 return;
362 364
363 per_cpu(svm_data, raw_smp_processor_id()) = NULL; 365 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
364 __free_page(svm_data->save_area); 366 __free_page(sd->save_area);
365 kfree(svm_data); 367 kfree(sd);
366} 368}
367 369
368static int svm_cpu_init(int cpu) 370static int svm_cpu_init(int cpu)
369{ 371{
370 struct svm_cpu_data *svm_data; 372 struct svm_cpu_data *sd;
371 int r; 373 int r;
372 374
373 svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); 375 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
374 if (!svm_data) 376 if (!sd)
375 return -ENOMEM; 377 return -ENOMEM;
376 svm_data->cpu = cpu; 378 sd->cpu = cpu;
377 svm_data->save_area = alloc_page(GFP_KERNEL); 379 sd->save_area = alloc_page(GFP_KERNEL);
378 r = -ENOMEM; 380 r = -ENOMEM;
379 if (!svm_data->save_area) 381 if (!sd->save_area)
380 goto err_1; 382 goto err_1;
381 383
382 per_cpu(svm_data, cpu) = svm_data; 384 per_cpu(svm_data, cpu) = sd;
383 385
384 return 0; 386 return 0;
385 387
386err_1: 388err_1:
387 kfree(svm_data); 389 kfree(sd);
388 return r; 390 return r;
389 391
390} 392}
@@ -476,7 +478,7 @@ static __init int svm_hardware_setup(void)
476 kvm_enable_efer_bits(EFER_SVME); 478 kvm_enable_efer_bits(EFER_SVME);
477 } 479 }
478 480
479 for_each_online_cpu(cpu) { 481 for_each_possible_cpu(cpu) {
480 r = svm_cpu_init(cpu); 482 r = svm_cpu_init(cpu);
481 if (r) 483 if (r)
482 goto err; 484 goto err;
@@ -510,7 +512,7 @@ static __exit void svm_hardware_unsetup(void)
510{ 512{
511 int cpu; 513 int cpu;
512 514
513 for_each_online_cpu(cpu) 515 for_each_possible_cpu(cpu)
514 svm_cpu_uninit(cpu); 516 svm_cpu_uninit(cpu);
515 517
516 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); 518 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
@@ -539,6 +541,8 @@ static void init_vmcb(struct vcpu_svm *svm)
539 struct vmcb_control_area *control = &svm->vmcb->control; 541 struct vmcb_control_area *control = &svm->vmcb->control;
540 struct vmcb_save_area *save = &svm->vmcb->save; 542 struct vmcb_save_area *save = &svm->vmcb->save;
541 543
544 svm->vcpu.fpu_active = 1;
545
542 control->intercept_cr_read = INTERCEPT_CR0_MASK | 546 control->intercept_cr_read = INTERCEPT_CR0_MASK |
543 INTERCEPT_CR3_MASK | 547 INTERCEPT_CR3_MASK |
544 INTERCEPT_CR4_MASK; 548 INTERCEPT_CR4_MASK;
@@ -551,13 +555,19 @@ static void init_vmcb(struct vcpu_svm *svm)
551 control->intercept_dr_read = INTERCEPT_DR0_MASK | 555 control->intercept_dr_read = INTERCEPT_DR0_MASK |
552 INTERCEPT_DR1_MASK | 556 INTERCEPT_DR1_MASK |
553 INTERCEPT_DR2_MASK | 557 INTERCEPT_DR2_MASK |
554 INTERCEPT_DR3_MASK; 558 INTERCEPT_DR3_MASK |
559 INTERCEPT_DR4_MASK |
560 INTERCEPT_DR5_MASK |
561 INTERCEPT_DR6_MASK |
562 INTERCEPT_DR7_MASK;
555 563
556 control->intercept_dr_write = INTERCEPT_DR0_MASK | 564 control->intercept_dr_write = INTERCEPT_DR0_MASK |
557 INTERCEPT_DR1_MASK | 565 INTERCEPT_DR1_MASK |
558 INTERCEPT_DR2_MASK | 566 INTERCEPT_DR2_MASK |
559 INTERCEPT_DR3_MASK | 567 INTERCEPT_DR3_MASK |
568 INTERCEPT_DR4_MASK |
560 INTERCEPT_DR5_MASK | 569 INTERCEPT_DR5_MASK |
570 INTERCEPT_DR6_MASK |
561 INTERCEPT_DR7_MASK; 571 INTERCEPT_DR7_MASK;
562 572
563 control->intercept_exceptions = (1 << PF_VECTOR) | 573 control->intercept_exceptions = (1 << PF_VECTOR) |
@@ -568,6 +578,7 @@ static void init_vmcb(struct vcpu_svm *svm)
568 control->intercept = (1ULL << INTERCEPT_INTR) | 578 control->intercept = (1ULL << INTERCEPT_INTR) |
569 (1ULL << INTERCEPT_NMI) | 579 (1ULL << INTERCEPT_NMI) |
570 (1ULL << INTERCEPT_SMI) | 580 (1ULL << INTERCEPT_SMI) |
581 (1ULL << INTERCEPT_SELECTIVE_CR0) |
571 (1ULL << INTERCEPT_CPUID) | 582 (1ULL << INTERCEPT_CPUID) |
572 (1ULL << INTERCEPT_INVD) | 583 (1ULL << INTERCEPT_INVD) |
573 (1ULL << INTERCEPT_HLT) | 584 (1ULL << INTERCEPT_HLT) |
@@ -625,11 +636,12 @@ static void init_vmcb(struct vcpu_svm *svm)
625 save->rip = 0x0000fff0; 636 save->rip = 0x0000fff0;
626 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; 637 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
627 638
628 /* 639 /* This is the guest-visible cr0 value.
629 * cr0 val on cpu init should be 0x60000010, we enable cpu 640 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
630 * cache by default. the orderly way is to enable cache in bios.
631 */ 641 */
632 save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; 642 svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
643 kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
644
633 save->cr4 = X86_CR4_PAE; 645 save->cr4 = X86_CR4_PAE;
634 /* rdx = ?? */ 646 /* rdx = ?? */
635 647
@@ -639,13 +651,9 @@ static void init_vmcb(struct vcpu_svm *svm)
639 control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | 651 control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
640 (1ULL << INTERCEPT_INVLPG)); 652 (1ULL << INTERCEPT_INVLPG));
641 control->intercept_exceptions &= ~(1 << PF_VECTOR); 653 control->intercept_exceptions &= ~(1 << PF_VECTOR);
642 control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| 654 control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
643 INTERCEPT_CR3_MASK); 655 control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
644 control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
645 INTERCEPT_CR3_MASK);
646 save->g_pat = 0x0007040600070406ULL; 656 save->g_pat = 0x0007040600070406ULL;
647 /* enable caching because the QEMU Bios doesn't enable it */
648 save->cr0 = X86_CR0_ET;
649 save->cr3 = 0; 657 save->cr3 = 0;
650 save->cr4 = 0; 658 save->cr4 = 0;
651 } 659 }
@@ -654,6 +662,11 @@ static void init_vmcb(struct vcpu_svm *svm)
654 svm->nested.vmcb = 0; 662 svm->nested.vmcb = 0;
655 svm->vcpu.arch.hflags = 0; 663 svm->vcpu.arch.hflags = 0;
656 664
665 if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
666 control->pause_filter_count = 3000;
667 control->intercept |= (1ULL << INTERCEPT_PAUSE);
668 }
669
657 enable_gif(svm); 670 enable_gif(svm);
658} 671}
659 672
@@ -693,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
693 if (err) 706 if (err)
694 goto free_svm; 707 goto free_svm;
695 708
709 err = -ENOMEM;
696 page = alloc_page(GFP_KERNEL); 710 page = alloc_page(GFP_KERNEL);
697 if (!page) { 711 if (!page)
698 err = -ENOMEM;
699 goto uninit; 712 goto uninit;
700 }
701 713
702 err = -ENOMEM;
703 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 714 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
704 if (!msrpm_pages) 715 if (!msrpm_pages)
705 goto uninit; 716 goto free_page1;
706 717
707 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 718 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
708 if (!nested_msrpm_pages) 719 if (!nested_msrpm_pages)
709 goto uninit; 720 goto free_page2;
710
711 svm->msrpm = page_address(msrpm_pages);
712 svm_vcpu_init_msrpm(svm->msrpm);
713 721
714 hsave_page = alloc_page(GFP_KERNEL); 722 hsave_page = alloc_page(GFP_KERNEL);
715 if (!hsave_page) 723 if (!hsave_page)
716 goto uninit; 724 goto free_page3;
725
717 svm->nested.hsave = page_address(hsave_page); 726 svm->nested.hsave = page_address(hsave_page);
718 727
728 svm->msrpm = page_address(msrpm_pages);
729 svm_vcpu_init_msrpm(svm->msrpm);
730
719 svm->nested.msrpm = page_address(nested_msrpm_pages); 731 svm->nested.msrpm = page_address(nested_msrpm_pages);
720 732
721 svm->vmcb = page_address(page); 733 svm->vmcb = page_address(page);
@@ -725,13 +737,18 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
725 init_vmcb(svm); 737 init_vmcb(svm);
726 738
727 fx_init(&svm->vcpu); 739 fx_init(&svm->vcpu);
728 svm->vcpu.fpu_active = 1;
729 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 740 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
730 if (kvm_vcpu_is_bsp(&svm->vcpu)) 741 if (kvm_vcpu_is_bsp(&svm->vcpu))
731 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 742 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
732 743
733 return &svm->vcpu; 744 return &svm->vcpu;
734 745
746free_page3:
747 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
748free_page2:
749 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
750free_page1:
751 __free_page(page);
735uninit: 752uninit:
736 kvm_vcpu_uninit(&svm->vcpu); 753 kvm_vcpu_uninit(&svm->vcpu);
737free_svm: 754free_svm:
@@ -758,17 +775,18 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
758 int i; 775 int i;
759 776
760 if (unlikely(cpu != vcpu->cpu)) { 777 if (unlikely(cpu != vcpu->cpu)) {
761 u64 tsc_this, delta; 778 u64 delta;
762 779
763 /* 780 if (check_tsc_unstable()) {
764 * Make sure that the guest sees a monotonically 781 /*
765 * increasing TSC. 782 * Make sure that the guest sees a monotonically
766 */ 783 * increasing TSC.
767 rdtscll(tsc_this); 784 */
768 delta = vcpu->arch.host_tsc - tsc_this; 785 delta = vcpu->arch.host_tsc - native_read_tsc();
769 svm->vmcb->control.tsc_offset += delta; 786 svm->vmcb->control.tsc_offset += delta;
770 if (is_nested(svm)) 787 if (is_nested(svm))
771 svm->nested.hsave->control.tsc_offset += delta; 788 svm->nested.hsave->control.tsc_offset += delta;
789 }
772 vcpu->cpu = cpu; 790 vcpu->cpu = cpu;
773 kvm_migrate_timers(vcpu); 791 kvm_migrate_timers(vcpu);
774 svm->asid_generation = 0; 792 svm->asid_generation = 0;
@@ -787,7 +805,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
787 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 805 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
788 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 806 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
789 807
790 rdtscll(vcpu->arch.host_tsc); 808 vcpu->arch.host_tsc = native_read_tsc();
791} 809}
792 810
793static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 811static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@ -950,42 +968,59 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
950 svm->vmcb->save.gdtr.base = dt->base ; 968 svm->vmcb->save.gdtr.base = dt->base ;
951} 969}
952 970
971static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
972{
973}
974
953static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 975static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
954{ 976{
955} 977}
956 978
979static void update_cr0_intercept(struct vcpu_svm *svm)
980{
981 ulong gcr0 = svm->vcpu.arch.cr0;
982 u64 *hcr0 = &svm->vmcb->save.cr0;
983
984 if (!svm->vcpu.fpu_active)
985 *hcr0 |= SVM_CR0_SELECTIVE_MASK;
986 else
987 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
988 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
989
990
991 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
992 svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
993 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
994 } else {
995 svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
996 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
997 }
998}
999
957static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1000static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
958{ 1001{
959 struct vcpu_svm *svm = to_svm(vcpu); 1002 struct vcpu_svm *svm = to_svm(vcpu);
960 1003
961#ifdef CONFIG_X86_64 1004#ifdef CONFIG_X86_64
962 if (vcpu->arch.shadow_efer & EFER_LME) { 1005 if (vcpu->arch.efer & EFER_LME) {
963 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 1006 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
964 vcpu->arch.shadow_efer |= EFER_LMA; 1007 vcpu->arch.efer |= EFER_LMA;
965 svm->vmcb->save.efer |= EFER_LMA | EFER_LME; 1008 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
966 } 1009 }
967 1010
968 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { 1011 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
969 vcpu->arch.shadow_efer &= ~EFER_LMA; 1012 vcpu->arch.efer &= ~EFER_LMA;
970 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); 1013 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
971 } 1014 }
972 } 1015 }
973#endif 1016#endif
974 if (npt_enabled) 1017 vcpu->arch.cr0 = cr0;
975 goto set;
976 1018
977 if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { 1019 if (!npt_enabled)
978 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1020 cr0 |= X86_CR0_PG | X86_CR0_WP;
979 vcpu->fpu_active = 1;
980 }
981 1021
982 vcpu->arch.cr0 = cr0; 1022 if (!vcpu->fpu_active)
983 cr0 |= X86_CR0_PG | X86_CR0_WP;
984 if (!vcpu->fpu_active) {
985 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
986 cr0 |= X86_CR0_TS; 1023 cr0 |= X86_CR0_TS;
987 }
988set:
989 /* 1024 /*
990 * re-enable caching here because the QEMU bios 1025 * re-enable caching here because the QEMU bios
991 * does not do it - this results in some delay at 1026 * does not do it - this results in some delay at
@@ -993,6 +1028,7 @@ set:
993 */ 1028 */
994 cr0 &= ~(X86_CR0_CD | X86_CR0_NW); 1029 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
995 svm->vmcb->save.cr0 = cr0; 1030 svm->vmcb->save.cr0 = cr0;
1031 update_cr0_intercept(svm);
996} 1032}
997 1033
998static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1034static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1045,7 +1081,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
1045 svm->vmcb->control.intercept_exceptions &= 1081 svm->vmcb->control.intercept_exceptions &=
1046 ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); 1082 ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
1047 1083
1048 if (vcpu->arch.singlestep) 1084 if (svm->nmi_singlestep)
1049 svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); 1085 svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
1050 1086
1051 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { 1087 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
@@ -1060,26 +1096,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
1060 vcpu->guest_debug = 0; 1096 vcpu->guest_debug = 0;
1061} 1097}
1062 1098
1063static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) 1099static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1064{ 1100{
1065 int old_debug = vcpu->guest_debug;
1066 struct vcpu_svm *svm = to_svm(vcpu); 1101 struct vcpu_svm *svm = to_svm(vcpu);
1067 1102
1068 vcpu->guest_debug = dbg->control;
1069
1070 update_db_intercept(vcpu);
1071
1072 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1103 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1073 svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; 1104 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
1074 else 1105 else
1075 svm->vmcb->save.dr7 = vcpu->arch.dr7; 1106 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1076 1107
1077 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 1108 update_db_intercept(vcpu);
1078 svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
1079 else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
1080 svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1081
1082 return 0;
1083} 1109}
1084 1110
1085static void load_host_msrs(struct kvm_vcpu *vcpu) 1111static void load_host_msrs(struct kvm_vcpu *vcpu)
@@ -1096,91 +1122,85 @@ static void save_host_msrs(struct kvm_vcpu *vcpu)
1096#endif 1122#endif
1097} 1123}
1098 1124
1099static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) 1125static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1100{ 1126{
1101 if (svm_data->next_asid > svm_data->max_asid) { 1127 if (sd->next_asid > sd->max_asid) {
1102 ++svm_data->asid_generation; 1128 ++sd->asid_generation;
1103 svm_data->next_asid = 1; 1129 sd->next_asid = 1;
1104 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 1130 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1105 } 1131 }
1106 1132
1107 svm->asid_generation = svm_data->asid_generation; 1133 svm->asid_generation = sd->asid_generation;
1108 svm->vmcb->control.asid = svm_data->next_asid++; 1134 svm->vmcb->control.asid = sd->next_asid++;
1109} 1135}
1110 1136
1111static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 1137static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest)
1112{ 1138{
1113 struct vcpu_svm *svm = to_svm(vcpu); 1139 struct vcpu_svm *svm = to_svm(vcpu);
1114 unsigned long val;
1115 1140
1116 switch (dr) { 1141 switch (dr) {
1117 case 0 ... 3: 1142 case 0 ... 3:
1118 val = vcpu->arch.db[dr]; 1143 *dest = vcpu->arch.db[dr];
1119 break; 1144 break;
1145 case 4:
1146 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1147 return EMULATE_FAIL; /* will re-inject UD */
1148 /* fall through */
1120 case 6: 1149 case 6:
1121 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1150 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1122 val = vcpu->arch.dr6; 1151 *dest = vcpu->arch.dr6;
1123 else 1152 else
1124 val = svm->vmcb->save.dr6; 1153 *dest = svm->vmcb->save.dr6;
1125 break; 1154 break;
1155 case 5:
1156 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1157 return EMULATE_FAIL; /* will re-inject UD */
1158 /* fall through */
1126 case 7: 1159 case 7:
1127 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1160 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1128 val = vcpu->arch.dr7; 1161 *dest = vcpu->arch.dr7;
1129 else 1162 else
1130 val = svm->vmcb->save.dr7; 1163 *dest = svm->vmcb->save.dr7;
1131 break; 1164 break;
1132 default:
1133 val = 0;
1134 } 1165 }
1135 1166
1136 return val; 1167 return EMULATE_DONE;
1137} 1168}
1138 1169
1139static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, 1170static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value)
1140 int *exception)
1141{ 1171{
1142 struct vcpu_svm *svm = to_svm(vcpu); 1172 struct vcpu_svm *svm = to_svm(vcpu);
1143 1173
1144 *exception = 0;
1145
1146 switch (dr) { 1174 switch (dr) {
1147 case 0 ... 3: 1175 case 0 ... 3:
1148 vcpu->arch.db[dr] = value; 1176 vcpu->arch.db[dr] = value;
1149 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 1177 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1150 vcpu->arch.eff_db[dr] = value; 1178 vcpu->arch.eff_db[dr] = value;
1151 return; 1179 break;
1152 case 4 ... 5: 1180 case 4:
1153 if (vcpu->arch.cr4 & X86_CR4_DE) 1181 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1154 *exception = UD_VECTOR; 1182 return EMULATE_FAIL; /* will re-inject UD */
1155 return; 1183 /* fall through */
1156 case 6: 1184 case 6:
1157 if (value & 0xffffffff00000000ULL) {
1158 *exception = GP_VECTOR;
1159 return;
1160 }
1161 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; 1185 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
1162 return; 1186 break;
1187 case 5:
1188 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1189 return EMULATE_FAIL; /* will re-inject UD */
1190 /* fall through */
1163 case 7: 1191 case 7:
1164 if (value & 0xffffffff00000000ULL) {
1165 *exception = GP_VECTOR;
1166 return;
1167 }
1168 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; 1192 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
1169 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 1193 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1170 svm->vmcb->save.dr7 = vcpu->arch.dr7; 1194 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1171 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); 1195 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
1172 } 1196 }
1173 return; 1197 break;
1174 default:
1175 /* FIXME: Possible case? */
1176 printk(KERN_DEBUG "%s: unexpected dr %u\n",
1177 __func__, dr);
1178 *exception = UD_VECTOR;
1179 return;
1180 } 1198 }
1199
1200 return EMULATE_DONE;
1181} 1201}
1182 1202
1183static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1203static int pf_interception(struct vcpu_svm *svm)
1184{ 1204{
1185 u64 fault_address; 1205 u64 fault_address;
1186 u32 error_code; 1206 u32 error_code;
@@ -1194,17 +1214,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1194 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1214 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1195} 1215}
1196 1216
1197static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1217static int db_interception(struct vcpu_svm *svm)
1198{ 1218{
1219 struct kvm_run *kvm_run = svm->vcpu.run;
1220
1199 if (!(svm->vcpu.guest_debug & 1221 if (!(svm->vcpu.guest_debug &
1200 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && 1222 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1201 !svm->vcpu.arch.singlestep) { 1223 !svm->nmi_singlestep) {
1202 kvm_queue_exception(&svm->vcpu, DB_VECTOR); 1224 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1203 return 1; 1225 return 1;
1204 } 1226 }
1205 1227
1206 if (svm->vcpu.arch.singlestep) { 1228 if (svm->nmi_singlestep) {
1207 svm->vcpu.arch.singlestep = false; 1229 svm->nmi_singlestep = false;
1208 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) 1230 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1209 svm->vmcb->save.rflags &= 1231 svm->vmcb->save.rflags &=
1210 ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 1232 ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
@@ -1223,35 +1245,41 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1223 return 1; 1245 return 1;
1224} 1246}
1225 1247
1226static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1248static int bp_interception(struct vcpu_svm *svm)
1227{ 1249{
1250 struct kvm_run *kvm_run = svm->vcpu.run;
1251
1228 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1252 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1229 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1253 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1230 kvm_run->debug.arch.exception = BP_VECTOR; 1254 kvm_run->debug.arch.exception = BP_VECTOR;
1231 return 0; 1255 return 0;
1232} 1256}
1233 1257
1234static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1258static int ud_interception(struct vcpu_svm *svm)
1235{ 1259{
1236 int er; 1260 int er;
1237 1261
1238 er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); 1262 er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
1239 if (er != EMULATE_DONE) 1263 if (er != EMULATE_DONE)
1240 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1264 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1241 return 1; 1265 return 1;
1242} 1266}
1243 1267
1244static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1268static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1245{ 1269{
1270 struct vcpu_svm *svm = to_svm(vcpu);
1246 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1271 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
1247 if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
1248 svm->vmcb->save.cr0 &= ~X86_CR0_TS;
1249 svm->vcpu.fpu_active = 1; 1272 svm->vcpu.fpu_active = 1;
1273 update_cr0_intercept(svm);
1274}
1250 1275
1276static int nm_interception(struct vcpu_svm *svm)
1277{
1278 svm_fpu_activate(&svm->vcpu);
1251 return 1; 1279 return 1;
1252} 1280}
1253 1281
1254static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1282static int mc_interception(struct vcpu_svm *svm)
1255{ 1283{
1256 /* 1284 /*
1257 * On an #MC intercept the MCE handler is not called automatically in 1285 * On an #MC intercept the MCE handler is not called automatically in
@@ -1264,8 +1292,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1264 return 1; 1292 return 1;
1265} 1293}
1266 1294
1267static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1295static int shutdown_interception(struct vcpu_svm *svm)
1268{ 1296{
1297 struct kvm_run *kvm_run = svm->vcpu.run;
1298
1269 /* 1299 /*
1270 * VMCB is undefined after a SHUTDOWN intercept 1300 * VMCB is undefined after a SHUTDOWN intercept
1271 * so reinitialize it. 1301 * so reinitialize it.
@@ -1277,7 +1307,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1277 return 0; 1307 return 0;
1278} 1308}
1279 1309
1280static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1310static int io_interception(struct vcpu_svm *svm)
1281{ 1311{
1282 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1312 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1283 int size, in, string; 1313 int size, in, string;
@@ -1291,7 +1321,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1291 1321
1292 if (string) { 1322 if (string) {
1293 if (emulate_instruction(&svm->vcpu, 1323 if (emulate_instruction(&svm->vcpu,
1294 kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) 1324 0, 0, 0) == EMULATE_DO_MMIO)
1295 return 0; 1325 return 0;
1296 return 1; 1326 return 1;
1297 } 1327 }
@@ -1301,33 +1331,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1301 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1331 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1302 1332
1303 skip_emulated_instruction(&svm->vcpu); 1333 skip_emulated_instruction(&svm->vcpu);
1304 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1334 return kvm_emulate_pio(&svm->vcpu, in, size, port);
1305} 1335}
1306 1336
1307static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1337static int nmi_interception(struct vcpu_svm *svm)
1308{ 1338{
1309 return 1; 1339 return 1;
1310} 1340}
1311 1341
1312static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1342static int intr_interception(struct vcpu_svm *svm)
1313{ 1343{
1314 ++svm->vcpu.stat.irq_exits; 1344 ++svm->vcpu.stat.irq_exits;
1315 return 1; 1345 return 1;
1316} 1346}
1317 1347
1318static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1348static int nop_on_interception(struct vcpu_svm *svm)
1319{ 1349{
1320 return 1; 1350 return 1;
1321} 1351}
1322 1352
1323static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1353static int halt_interception(struct vcpu_svm *svm)
1324{ 1354{
1325 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; 1355 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1326 skip_emulated_instruction(&svm->vcpu); 1356 skip_emulated_instruction(&svm->vcpu);
1327 return kvm_emulate_halt(&svm->vcpu); 1357 return kvm_emulate_halt(&svm->vcpu);
1328} 1358}
1329 1359
1330static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1360static int vmmcall_interception(struct vcpu_svm *svm)
1331{ 1361{
1332 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1362 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1333 skip_emulated_instruction(&svm->vcpu); 1363 skip_emulated_instruction(&svm->vcpu);
@@ -1337,7 +1367,7 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1337 1367
1338static int nested_svm_check_permissions(struct vcpu_svm *svm) 1368static int nested_svm_check_permissions(struct vcpu_svm *svm)
1339{ 1369{
1340 if (!(svm->vcpu.arch.shadow_efer & EFER_SVME) 1370 if (!(svm->vcpu.arch.efer & EFER_SVME)
1341 || !is_paging(&svm->vcpu)) { 1371 || !is_paging(&svm->vcpu)) {
1342 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1372 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1343 return 1; 1373 return 1;
@@ -1378,8 +1408,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
1378 1408
1379 svm->vmcb->control.exit_code = SVM_EXIT_INTR; 1409 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
1380 1410
1381 if (nested_svm_exit_handled(svm)) { 1411 if (svm->nested.intercept & 1ULL) {
1382 nsvm_printk("VMexit -> INTR\n"); 1412 /*
1413 * The #vmexit can't be emulated here directly because this
1414 * code path runs with irqs and preemtion disabled. A
1415 * #vmexit emulation might sleep. Only signal request for
1416 * the #vmexit here.
1417 */
1418 svm->nested.exit_required = true;
1419 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1383 return 1; 1420 return 1;
1384 } 1421 }
1385 1422
@@ -1390,10 +1427,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
1390{ 1427{
1391 struct page *page; 1428 struct page *page;
1392 1429
1393 down_read(&current->mm->mmap_sem);
1394 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); 1430 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1395 up_read(&current->mm->mmap_sem);
1396
1397 if (is_error_page(page)) 1431 if (is_error_page(page))
1398 goto error; 1432 goto error;
1399 1433
@@ -1532,14 +1566,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
1532 } 1566 }
1533 default: { 1567 default: {
1534 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 1568 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1535 nsvm_printk("exit code: 0x%x\n", exit_code);
1536 if (svm->nested.intercept & exit_bits) 1569 if (svm->nested.intercept & exit_bits)
1537 vmexit = NESTED_EXIT_DONE; 1570 vmexit = NESTED_EXIT_DONE;
1538 } 1571 }
1539 } 1572 }
1540 1573
1541 if (vmexit == NESTED_EXIT_DONE) { 1574 if (vmexit == NESTED_EXIT_DONE) {
1542 nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
1543 nested_svm_vmexit(svm); 1575 nested_svm_vmexit(svm);
1544 } 1576 }
1545 1577
@@ -1584,6 +1616,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1584 struct vmcb *hsave = svm->nested.hsave; 1616 struct vmcb *hsave = svm->nested.hsave;
1585 struct vmcb *vmcb = svm->vmcb; 1617 struct vmcb *vmcb = svm->vmcb;
1586 1618
1619 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
1620 vmcb->control.exit_info_1,
1621 vmcb->control.exit_info_2,
1622 vmcb->control.exit_int_info,
1623 vmcb->control.exit_int_info_err);
1624
1587 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); 1625 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
1588 if (!nested_vmcb) 1626 if (!nested_vmcb)
1589 return 1; 1627 return 1;
@@ -1617,6 +1655,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1617 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; 1655 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
1618 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; 1656 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
1619 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; 1657 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
1658
1659 /*
1660 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
1661 * to make sure that we do not lose injected events. So check event_inj
1662 * here and copy it to exit_int_info if it is valid.
1663 * Exit_int_info and event_inj can't be both valid because the case
1664 * below only happens on a VMRUN instruction intercept which has
1665 * no valid exit_int_info set.
1666 */
1667 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
1668 struct vmcb_control_area *nc = &nested_vmcb->control;
1669
1670 nc->exit_int_info = vmcb->control.event_inj;
1671 nc->exit_int_info_err = vmcb->control.event_inj_err;
1672 }
1673
1620 nested_vmcb->control.tlb_ctl = 0; 1674 nested_vmcb->control.tlb_ctl = 0;
1621 nested_vmcb->control.event_inj = 0; 1675 nested_vmcb->control.event_inj = 0;
1622 nested_vmcb->control.event_inj_err = 0; 1676 nested_vmcb->control.event_inj_err = 0;
@@ -1628,10 +1682,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1628 /* Restore the original control entries */ 1682 /* Restore the original control entries */
1629 copy_vmcb_control_area(vmcb, hsave); 1683 copy_vmcb_control_area(vmcb, hsave);
1630 1684
1631 /* Kill any pending exceptions */
1632 if (svm->vcpu.arch.exception.pending == true)
1633 nsvm_printk("WARNING: Pending Exception\n");
1634
1635 kvm_clear_exception_queue(&svm->vcpu); 1685 kvm_clear_exception_queue(&svm->vcpu);
1636 kvm_clear_interrupt_queue(&svm->vcpu); 1686 kvm_clear_interrupt_queue(&svm->vcpu);
1637 1687
@@ -1702,6 +1752,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1702 /* nested_vmcb is our indicator if nested SVM is activated */ 1752 /* nested_vmcb is our indicator if nested SVM is activated */
1703 svm->nested.vmcb = svm->vmcb->save.rax; 1753 svm->nested.vmcb = svm->vmcb->save.rax;
1704 1754
1755 trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
1756 nested_vmcb->save.rip,
1757 nested_vmcb->control.int_ctl,
1758 nested_vmcb->control.event_inj,
1759 nested_vmcb->control.nested_ctl);
1760
1705 /* Clear internal status */ 1761 /* Clear internal status */
1706 kvm_clear_exception_queue(&svm->vcpu); 1762 kvm_clear_exception_queue(&svm->vcpu);
1707 kvm_clear_interrupt_queue(&svm->vcpu); 1763 kvm_clear_interrupt_queue(&svm->vcpu);
@@ -1714,8 +1770,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1714 hsave->save.ds = vmcb->save.ds; 1770 hsave->save.ds = vmcb->save.ds;
1715 hsave->save.gdtr = vmcb->save.gdtr; 1771 hsave->save.gdtr = vmcb->save.gdtr;
1716 hsave->save.idtr = vmcb->save.idtr; 1772 hsave->save.idtr = vmcb->save.idtr;
1717 hsave->save.efer = svm->vcpu.arch.shadow_efer; 1773 hsave->save.efer = svm->vcpu.arch.efer;
1718 hsave->save.cr0 = svm->vcpu.arch.cr0; 1774 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
1719 hsave->save.cr4 = svm->vcpu.arch.cr4; 1775 hsave->save.cr4 = svm->vcpu.arch.cr4;
1720 hsave->save.rflags = vmcb->save.rflags; 1776 hsave->save.rflags = vmcb->save.rflags;
1721 hsave->save.rip = svm->next_rip; 1777 hsave->save.rip = svm->next_rip;
@@ -1789,28 +1845,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1789 svm->nested.intercept = nested_vmcb->control.intercept; 1845 svm->nested.intercept = nested_vmcb->control.intercept;
1790 1846
1791 force_new_asid(&svm->vcpu); 1847 force_new_asid(&svm->vcpu);
1792 svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
1793 svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
1794 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; 1848 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
1795 if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
1796 nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
1797 nested_vmcb->control.int_ctl);
1798 }
1799 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) 1849 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
1800 svm->vcpu.arch.hflags |= HF_VINTR_MASK; 1850 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
1801 else 1851 else
1802 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; 1852 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
1803 1853
1804 nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
1805 nested_vmcb->control.exit_int_info,
1806 nested_vmcb->control.int_state);
1807
1808 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 1854 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
1809 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 1855 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
1810 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; 1856 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
1811 if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
1812 nsvm_printk("Injecting Event: 0x%x\n",
1813 nested_vmcb->control.event_inj);
1814 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 1857 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
1815 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 1858 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
1816 1859
@@ -1837,7 +1880,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1837 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; 1880 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
1838} 1881}
1839 1882
1840static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1883static int vmload_interception(struct vcpu_svm *svm)
1841{ 1884{
1842 struct vmcb *nested_vmcb; 1885 struct vmcb *nested_vmcb;
1843 1886
@@ -1857,7 +1900,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1857 return 1; 1900 return 1;
1858} 1901}
1859 1902
1860static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1903static int vmsave_interception(struct vcpu_svm *svm)
1861{ 1904{
1862 struct vmcb *nested_vmcb; 1905 struct vmcb *nested_vmcb;
1863 1906
@@ -1877,10 +1920,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1877 return 1; 1920 return 1;
1878} 1921}
1879 1922
1880static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1923static int vmrun_interception(struct vcpu_svm *svm)
1881{ 1924{
1882 nsvm_printk("VMrun\n");
1883
1884 if (nested_svm_check_permissions(svm)) 1925 if (nested_svm_check_permissions(svm))
1885 return 1; 1926 return 1;
1886 1927
@@ -1907,7 +1948,7 @@ failed:
1907 return 1; 1948 return 1;
1908} 1949}
1909 1950
1910static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1951static int stgi_interception(struct vcpu_svm *svm)
1911{ 1952{
1912 if (nested_svm_check_permissions(svm)) 1953 if (nested_svm_check_permissions(svm))
1913 return 1; 1954 return 1;
@@ -1920,7 +1961,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1920 return 1; 1961 return 1;
1921} 1962}
1922 1963
1923static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1964static int clgi_interception(struct vcpu_svm *svm)
1924{ 1965{
1925 if (nested_svm_check_permissions(svm)) 1966 if (nested_svm_check_permissions(svm))
1926 return 1; 1967 return 1;
@@ -1937,10 +1978,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1937 return 1; 1978 return 1;
1938} 1979}
1939 1980
1940static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1981static int invlpga_interception(struct vcpu_svm *svm)
1941{ 1982{
1942 struct kvm_vcpu *vcpu = &svm->vcpu; 1983 struct kvm_vcpu *vcpu = &svm->vcpu;
1943 nsvm_printk("INVLPGA\n"); 1984
1985 trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
1986 vcpu->arch.regs[VCPU_REGS_RAX]);
1944 1987
1945 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ 1988 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
1946 kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); 1989 kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
@@ -1950,15 +1993,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1950 return 1; 1993 return 1;
1951} 1994}
1952 1995
1953static int invalid_op_interception(struct vcpu_svm *svm, 1996static int skinit_interception(struct vcpu_svm *svm)
1954 struct kvm_run *kvm_run) 1997{
1998 trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
1999
2000 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2001 return 1;
2002}
2003
2004static int invalid_op_interception(struct vcpu_svm *svm)
1955{ 2005{
1956 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2006 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1957 return 1; 2007 return 1;
1958} 2008}
1959 2009
1960static int task_switch_interception(struct vcpu_svm *svm, 2010static int task_switch_interception(struct vcpu_svm *svm)
1961 struct kvm_run *kvm_run)
1962{ 2011{
1963 u16 tss_selector; 2012 u16 tss_selector;
1964 int reason; 2013 int reason;
@@ -2008,41 +2057,42 @@ static int task_switch_interception(struct vcpu_svm *svm,
2008 return kvm_task_switch(&svm->vcpu, tss_selector, reason); 2057 return kvm_task_switch(&svm->vcpu, tss_selector, reason);
2009} 2058}
2010 2059
2011static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2060static int cpuid_interception(struct vcpu_svm *svm)
2012{ 2061{
2013 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 2062 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2014 kvm_emulate_cpuid(&svm->vcpu); 2063 kvm_emulate_cpuid(&svm->vcpu);
2015 return 1; 2064 return 1;
2016} 2065}
2017 2066
2018static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2067static int iret_interception(struct vcpu_svm *svm)
2019{ 2068{
2020 ++svm->vcpu.stat.nmi_window_exits; 2069 ++svm->vcpu.stat.nmi_window_exits;
2021 svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); 2070 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET);
2022 svm->vcpu.arch.hflags |= HF_IRET_MASK; 2071 svm->vcpu.arch.hflags |= HF_IRET_MASK;
2023 return 1; 2072 return 1;
2024} 2073}
2025 2074
2026static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2075static int invlpg_interception(struct vcpu_svm *svm)
2027{ 2076{
2028 if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) 2077 if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
2029 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); 2078 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
2030 return 1; 2079 return 1;
2031} 2080}
2032 2081
2033static int emulate_on_interception(struct vcpu_svm *svm, 2082static int emulate_on_interception(struct vcpu_svm *svm)
2034 struct kvm_run *kvm_run)
2035{ 2083{
2036 if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) 2084 if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
2037 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); 2085 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
2038 return 1; 2086 return 1;
2039} 2087}
2040 2088
2041static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2089static int cr8_write_interception(struct vcpu_svm *svm)
2042{ 2090{
2091 struct kvm_run *kvm_run = svm->vcpu.run;
2092
2043 u8 cr8_prev = kvm_get_cr8(&svm->vcpu); 2093 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
2044 /* instruction emulation calls kvm_set_cr8() */ 2094 /* instruction emulation calls kvm_set_cr8() */
2045 emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); 2095 emulate_instruction(&svm->vcpu, 0, 0, 0);
2046 if (irqchip_in_kernel(svm->vcpu.kvm)) { 2096 if (irqchip_in_kernel(svm->vcpu.kvm)) {
2047 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; 2097 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
2048 return 1; 2098 return 1;
@@ -2128,14 +2178,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2128 return 0; 2178 return 0;
2129} 2179}
2130 2180
2131static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2181static int rdmsr_interception(struct vcpu_svm *svm)
2132{ 2182{
2133 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 2183 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2134 u64 data; 2184 u64 data;
2135 2185
2136 if (svm_get_msr(&svm->vcpu, ecx, &data)) 2186 if (svm_get_msr(&svm->vcpu, ecx, &data)) {
2187 trace_kvm_msr_read_ex(ecx);
2137 kvm_inject_gp(&svm->vcpu, 0); 2188 kvm_inject_gp(&svm->vcpu, 0);
2138 else { 2189 } else {
2139 trace_kvm_msr_read(ecx, data); 2190 trace_kvm_msr_read(ecx, data);
2140 2191
2141 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; 2192 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
@@ -2221,33 +2272,36 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2221 return 0; 2272 return 0;
2222} 2273}
2223 2274
2224static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2275static int wrmsr_interception(struct vcpu_svm *svm)
2225{ 2276{
2226 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 2277 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2227 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) 2278 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
2228 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); 2279 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2229 2280
2230 trace_kvm_msr_write(ecx, data);
2231 2281
2232 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 2282 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2233 if (svm_set_msr(&svm->vcpu, ecx, data)) 2283 if (svm_set_msr(&svm->vcpu, ecx, data)) {
2284 trace_kvm_msr_write_ex(ecx, data);
2234 kvm_inject_gp(&svm->vcpu, 0); 2285 kvm_inject_gp(&svm->vcpu, 0);
2235 else 2286 } else {
2287 trace_kvm_msr_write(ecx, data);
2236 skip_emulated_instruction(&svm->vcpu); 2288 skip_emulated_instruction(&svm->vcpu);
2289 }
2237 return 1; 2290 return 1;
2238} 2291}
2239 2292
2240static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 2293static int msr_interception(struct vcpu_svm *svm)
2241{ 2294{
2242 if (svm->vmcb->control.exit_info_1) 2295 if (svm->vmcb->control.exit_info_1)
2243 return wrmsr_interception(svm, kvm_run); 2296 return wrmsr_interception(svm);
2244 else 2297 else
2245 return rdmsr_interception(svm, kvm_run); 2298 return rdmsr_interception(svm);
2246} 2299}
2247 2300
2248static int interrupt_window_interception(struct vcpu_svm *svm, 2301static int interrupt_window_interception(struct vcpu_svm *svm)
2249 struct kvm_run *kvm_run)
2250{ 2302{
2303 struct kvm_run *kvm_run = svm->vcpu.run;
2304
2251 svm_clear_vintr(svm); 2305 svm_clear_vintr(svm);
2252 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 2306 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2253 /* 2307 /*
@@ -2265,13 +2319,18 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
2265 return 1; 2319 return 1;
2266} 2320}
2267 2321
2268static int (*svm_exit_handlers[])(struct vcpu_svm *svm, 2322static int pause_interception(struct vcpu_svm *svm)
2269 struct kvm_run *kvm_run) = { 2323{
2324 kvm_vcpu_on_spin(&(svm->vcpu));
2325 return 1;
2326}
2327
2328static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2270 [SVM_EXIT_READ_CR0] = emulate_on_interception, 2329 [SVM_EXIT_READ_CR0] = emulate_on_interception,
2271 [SVM_EXIT_READ_CR3] = emulate_on_interception, 2330 [SVM_EXIT_READ_CR3] = emulate_on_interception,
2272 [SVM_EXIT_READ_CR4] = emulate_on_interception, 2331 [SVM_EXIT_READ_CR4] = emulate_on_interception,
2273 [SVM_EXIT_READ_CR8] = emulate_on_interception, 2332 [SVM_EXIT_READ_CR8] = emulate_on_interception,
2274 /* for now: */ 2333 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
2275 [SVM_EXIT_WRITE_CR0] = emulate_on_interception, 2334 [SVM_EXIT_WRITE_CR0] = emulate_on_interception,
2276 [SVM_EXIT_WRITE_CR3] = emulate_on_interception, 2335 [SVM_EXIT_WRITE_CR3] = emulate_on_interception,
2277 [SVM_EXIT_WRITE_CR4] = emulate_on_interception, 2336 [SVM_EXIT_WRITE_CR4] = emulate_on_interception,
@@ -2280,11 +2339,17 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
2280 [SVM_EXIT_READ_DR1] = emulate_on_interception, 2339 [SVM_EXIT_READ_DR1] = emulate_on_interception,
2281 [SVM_EXIT_READ_DR2] = emulate_on_interception, 2340 [SVM_EXIT_READ_DR2] = emulate_on_interception,
2282 [SVM_EXIT_READ_DR3] = emulate_on_interception, 2341 [SVM_EXIT_READ_DR3] = emulate_on_interception,
2342 [SVM_EXIT_READ_DR4] = emulate_on_interception,
2343 [SVM_EXIT_READ_DR5] = emulate_on_interception,
2344 [SVM_EXIT_READ_DR6] = emulate_on_interception,
2345 [SVM_EXIT_READ_DR7] = emulate_on_interception,
2283 [SVM_EXIT_WRITE_DR0] = emulate_on_interception, 2346 [SVM_EXIT_WRITE_DR0] = emulate_on_interception,
2284 [SVM_EXIT_WRITE_DR1] = emulate_on_interception, 2347 [SVM_EXIT_WRITE_DR1] = emulate_on_interception,
2285 [SVM_EXIT_WRITE_DR2] = emulate_on_interception, 2348 [SVM_EXIT_WRITE_DR2] = emulate_on_interception,
2286 [SVM_EXIT_WRITE_DR3] = emulate_on_interception, 2349 [SVM_EXIT_WRITE_DR3] = emulate_on_interception,
2350 [SVM_EXIT_WRITE_DR4] = emulate_on_interception,
2287 [SVM_EXIT_WRITE_DR5] = emulate_on_interception, 2351 [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
2352 [SVM_EXIT_WRITE_DR6] = emulate_on_interception,
2288 [SVM_EXIT_WRITE_DR7] = emulate_on_interception, 2353 [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
2289 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, 2354 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
2290 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, 2355 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
@@ -2301,6 +2366,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
2301 [SVM_EXIT_CPUID] = cpuid_interception, 2366 [SVM_EXIT_CPUID] = cpuid_interception,
2302 [SVM_EXIT_IRET] = iret_interception, 2367 [SVM_EXIT_IRET] = iret_interception,
2303 [SVM_EXIT_INVD] = emulate_on_interception, 2368 [SVM_EXIT_INVD] = emulate_on_interception,
2369 [SVM_EXIT_PAUSE] = pause_interception,
2304 [SVM_EXIT_HLT] = halt_interception, 2370 [SVM_EXIT_HLT] = halt_interception,
2305 [SVM_EXIT_INVLPG] = invlpg_interception, 2371 [SVM_EXIT_INVLPG] = invlpg_interception,
2306 [SVM_EXIT_INVLPGA] = invlpga_interception, 2372 [SVM_EXIT_INVLPGA] = invlpga_interception,
@@ -2314,26 +2380,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
2314 [SVM_EXIT_VMSAVE] = vmsave_interception, 2380 [SVM_EXIT_VMSAVE] = vmsave_interception,
2315 [SVM_EXIT_STGI] = stgi_interception, 2381 [SVM_EXIT_STGI] = stgi_interception,
2316 [SVM_EXIT_CLGI] = clgi_interception, 2382 [SVM_EXIT_CLGI] = clgi_interception,
2317 [SVM_EXIT_SKINIT] = invalid_op_interception, 2383 [SVM_EXIT_SKINIT] = skinit_interception,
2318 [SVM_EXIT_WBINVD] = emulate_on_interception, 2384 [SVM_EXIT_WBINVD] = emulate_on_interception,
2319 [SVM_EXIT_MONITOR] = invalid_op_interception, 2385 [SVM_EXIT_MONITOR] = invalid_op_interception,
2320 [SVM_EXIT_MWAIT] = invalid_op_interception, 2386 [SVM_EXIT_MWAIT] = invalid_op_interception,
2321 [SVM_EXIT_NPF] = pf_interception, 2387 [SVM_EXIT_NPF] = pf_interception,
2322}; 2388};
2323 2389
2324static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 2390static int handle_exit(struct kvm_vcpu *vcpu)
2325{ 2391{
2326 struct vcpu_svm *svm = to_svm(vcpu); 2392 struct vcpu_svm *svm = to_svm(vcpu);
2393 struct kvm_run *kvm_run = vcpu->run;
2327 u32 exit_code = svm->vmcb->control.exit_code; 2394 u32 exit_code = svm->vmcb->control.exit_code;
2328 2395
2329 trace_kvm_exit(exit_code, svm->vmcb->save.rip); 2396 trace_kvm_exit(exit_code, svm->vmcb->save.rip);
2330 2397
2398 if (unlikely(svm->nested.exit_required)) {
2399 nested_svm_vmexit(svm);
2400 svm->nested.exit_required = false;
2401
2402 return 1;
2403 }
2404
2331 if (is_nested(svm)) { 2405 if (is_nested(svm)) {
2332 int vmexit; 2406 int vmexit;
2333 2407
2334 nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", 2408 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
2335 exit_code, svm->vmcb->control.exit_info_1, 2409 svm->vmcb->control.exit_info_1,
2336 svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); 2410 svm->vmcb->control.exit_info_2,
2411 svm->vmcb->control.exit_int_info,
2412 svm->vmcb->control.exit_int_info_err);
2337 2413
2338 vmexit = nested_svm_exit_special(svm); 2414 vmexit = nested_svm_exit_special(svm);
2339 2415
@@ -2346,20 +2422,10 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2346 2422
2347 svm_complete_interrupts(svm); 2423 svm_complete_interrupts(svm);
2348 2424
2349 if (npt_enabled) { 2425 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
2350 int mmu_reload = 0;
2351 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
2352 svm_set_cr0(vcpu, svm->vmcb->save.cr0);
2353 mmu_reload = 1;
2354 }
2355 vcpu->arch.cr0 = svm->vmcb->save.cr0; 2426 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2427 if (npt_enabled)
2356 vcpu->arch.cr3 = svm->vmcb->save.cr3; 2428 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2357 if (mmu_reload) {
2358 kvm_mmu_reset_context(vcpu);
2359 kvm_mmu_load(vcpu);
2360 }
2361 }
2362
2363 2429
2364 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 2430 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
2365 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2431 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2383,15 +2449,15 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2383 return 0; 2449 return 0;
2384 } 2450 }
2385 2451
2386 return svm_exit_handlers[exit_code](svm, kvm_run); 2452 return svm_exit_handlers[exit_code](svm);
2387} 2453}
2388 2454
2389static void reload_tss(struct kvm_vcpu *vcpu) 2455static void reload_tss(struct kvm_vcpu *vcpu)
2390{ 2456{
2391 int cpu = raw_smp_processor_id(); 2457 int cpu = raw_smp_processor_id();
2392 2458
2393 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 2459 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2394 svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */ 2460 sd->tss_desc->type = 9; /* available 32/64-bit TSS */
2395 load_TR_desc(); 2461 load_TR_desc();
2396} 2462}
2397 2463
@@ -2399,12 +2465,12 @@ static void pre_svm_run(struct vcpu_svm *svm)
2399{ 2465{
2400 int cpu = raw_smp_processor_id(); 2466 int cpu = raw_smp_processor_id();
2401 2467
2402 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 2468 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2403 2469
2404 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 2470 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
2405 /* FIXME: handle wraparound of asid_generation */ 2471 /* FIXME: handle wraparound of asid_generation */
2406 if (svm->asid_generation != svm_data->asid_generation) 2472 if (svm->asid_generation != sd->asid_generation)
2407 new_asid(svm, svm_data); 2473 new_asid(svm, sd);
2408} 2474}
2409 2475
2410static void svm_inject_nmi(struct kvm_vcpu *vcpu) 2476static void svm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -2413,7 +2479,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
2413 2479
2414 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; 2480 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
2415 vcpu->arch.hflags |= HF_NMI_MASK; 2481 vcpu->arch.hflags |= HF_NMI_MASK;
2416 svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); 2482 svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET);
2417 ++vcpu->stat.nmi_injections; 2483 ++vcpu->stat.nmi_injections;
2418} 2484}
2419 2485
@@ -2460,20 +2526,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
2460 !(svm->vcpu.arch.hflags & HF_NMI_MASK); 2526 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
2461} 2527}
2462 2528
2529static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
2530{
2531 struct vcpu_svm *svm = to_svm(vcpu);
2532
2533 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
2534}
2535
2536static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
2537{
2538 struct vcpu_svm *svm = to_svm(vcpu);
2539
2540 if (masked) {
2541 svm->vcpu.arch.hflags |= HF_NMI_MASK;
2542 svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET);
2543 } else {
2544 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
2545 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET);
2546 }
2547}
2548
2463static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) 2549static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
2464{ 2550{
2465 struct vcpu_svm *svm = to_svm(vcpu); 2551 struct vcpu_svm *svm = to_svm(vcpu);
2466 struct vmcb *vmcb = svm->vmcb; 2552 struct vmcb *vmcb = svm->vmcb;
2467 return (vmcb->save.rflags & X86_EFLAGS_IF) && 2553 int ret;
2468 !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && 2554
2469 gif_set(svm) && 2555 if (!gif_set(svm) ||
2470 !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)); 2556 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
2557 return 0;
2558
2559 ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
2560
2561 if (is_nested(svm))
2562 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
2563
2564 return ret;
2471} 2565}
2472 2566
2473static void enable_irq_window(struct kvm_vcpu *vcpu) 2567static void enable_irq_window(struct kvm_vcpu *vcpu)
2474{ 2568{
2475 struct vcpu_svm *svm = to_svm(vcpu); 2569 struct vcpu_svm *svm = to_svm(vcpu);
2476 nsvm_printk("Trying to open IRQ window\n");
2477 2570
2478 nested_svm_intr(svm); 2571 nested_svm_intr(svm);
2479 2572
@@ -2498,7 +2591,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
2498 /* Something prevents NMI from been injected. Single step over 2591 /* Something prevents NMI from been injected. Single step over
2499 possible problem (IRET or exception injection or interrupt 2592 possible problem (IRET or exception injection or interrupt
2500 shadow) */ 2593 shadow) */
2501 vcpu->arch.singlestep = true; 2594 svm->nmi_singlestep = true;
2502 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); 2595 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2503 update_db_intercept(vcpu); 2596 update_db_intercept(vcpu);
2504} 2597}
@@ -2588,13 +2681,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
2588#define R "e" 2681#define R "e"
2589#endif 2682#endif
2590 2683
2591static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2684static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2592{ 2685{
2593 struct vcpu_svm *svm = to_svm(vcpu); 2686 struct vcpu_svm *svm = to_svm(vcpu);
2594 u16 fs_selector; 2687 u16 fs_selector;
2595 u16 gs_selector; 2688 u16 gs_selector;
2596 u16 ldt_selector; 2689 u16 ldt_selector;
2597 2690
2691 /*
2692 * A vmexit emulation is required before the vcpu can be executed
2693 * again.
2694 */
2695 if (unlikely(svm->nested.exit_required))
2696 return;
2697
2598 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; 2698 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
2599 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; 2699 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
2600 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; 2700 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
@@ -2727,12 +2827,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
2727 2827
2728 svm->vmcb->save.cr3 = root; 2828 svm->vmcb->save.cr3 = root;
2729 force_new_asid(vcpu); 2829 force_new_asid(vcpu);
2730
2731 if (vcpu->fpu_active) {
2732 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
2733 svm->vmcb->save.cr0 |= X86_CR0_TS;
2734 vcpu->fpu_active = 0;
2735 }
2736} 2830}
2737 2831
2738static int is_disabled(void) 2832static int is_disabled(void)
@@ -2781,6 +2875,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
2781 return 0; 2875 return 0;
2782} 2876}
2783 2877
2878static void svm_cpuid_update(struct kvm_vcpu *vcpu)
2879{
2880}
2881
2784static const struct trace_print_flags svm_exit_reasons_str[] = { 2882static const struct trace_print_flags svm_exit_reasons_str[] = {
2785 { SVM_EXIT_READ_CR0, "read_cr0" }, 2883 { SVM_EXIT_READ_CR0, "read_cr0" },
2786 { SVM_EXIT_READ_CR3, "read_cr3" }, 2884 { SVM_EXIT_READ_CR3, "read_cr3" },
@@ -2834,9 +2932,22 @@ static const struct trace_print_flags svm_exit_reasons_str[] = {
2834 { -1, NULL } 2932 { -1, NULL }
2835}; 2933};
2836 2934
2837static bool svm_gb_page_enable(void) 2935static int svm_get_lpage_level(void)
2838{ 2936{
2839 return true; 2937 return PT_PDPE_LEVEL;
2938}
2939
2940static bool svm_rdtscp_supported(void)
2941{
2942 return false;
2943}
2944
2945static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
2946{
2947 struct vcpu_svm *svm = to_svm(vcpu);
2948
2949 update_cr0_intercept(svm);
2950 svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
2840} 2951}
2841 2952
2842static struct kvm_x86_ops svm_x86_ops = { 2953static struct kvm_x86_ops svm_x86_ops = {
@@ -2865,6 +2976,7 @@ static struct kvm_x86_ops svm_x86_ops = {
2865 .set_segment = svm_set_segment, 2976 .set_segment = svm_set_segment,
2866 .get_cpl = svm_get_cpl, 2977 .get_cpl = svm_get_cpl,
2867 .get_cs_db_l_bits = kvm_get_cs_db_l_bits, 2978 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
2979 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
2868 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 2980 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
2869 .set_cr0 = svm_set_cr0, 2981 .set_cr0 = svm_set_cr0,
2870 .set_cr3 = svm_set_cr3, 2982 .set_cr3 = svm_set_cr3,
@@ -2879,6 +2991,8 @@ static struct kvm_x86_ops svm_x86_ops = {
2879 .cache_reg = svm_cache_reg, 2991 .cache_reg = svm_cache_reg,
2880 .get_rflags = svm_get_rflags, 2992 .get_rflags = svm_get_rflags,
2881 .set_rflags = svm_set_rflags, 2993 .set_rflags = svm_set_rflags,
2994 .fpu_activate = svm_fpu_activate,
2995 .fpu_deactivate = svm_fpu_deactivate,
2882 2996
2883 .tlb_flush = svm_flush_tlb, 2997 .tlb_flush = svm_flush_tlb,
2884 2998
@@ -2893,6 +3007,8 @@ static struct kvm_x86_ops svm_x86_ops = {
2893 .queue_exception = svm_queue_exception, 3007 .queue_exception = svm_queue_exception,
2894 .interrupt_allowed = svm_interrupt_allowed, 3008 .interrupt_allowed = svm_interrupt_allowed,
2895 .nmi_allowed = svm_nmi_allowed, 3009 .nmi_allowed = svm_nmi_allowed,
3010 .get_nmi_mask = svm_get_nmi_mask,
3011 .set_nmi_mask = svm_set_nmi_mask,
2896 .enable_nmi_window = enable_nmi_window, 3012 .enable_nmi_window = enable_nmi_window,
2897 .enable_irq_window = enable_irq_window, 3013 .enable_irq_window = enable_irq_window,
2898 .update_cr8_intercept = update_cr8_intercept, 3014 .update_cr8_intercept = update_cr8_intercept,
@@ -2902,7 +3018,11 @@ static struct kvm_x86_ops svm_x86_ops = {
2902 .get_mt_mask = svm_get_mt_mask, 3018 .get_mt_mask = svm_get_mt_mask,
2903 3019
2904 .exit_reasons_str = svm_exit_reasons_str, 3020 .exit_reasons_str = svm_exit_reasons_str,
2905 .gb_page_enable = svm_gb_page_enable, 3021 .get_lpage_level = svm_get_lpage_level,
3022
3023 .cpuid_update = svm_cpuid_update,
3024
3025 .rdtscp_supported = svm_rdtscp_supported,
2906}; 3026};
2907 3027
2908static int __init svm_init(void) 3028static int __init svm_init(void)