aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c352
1 files changed, 266 insertions, 86 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1a582f1090e8..89e0be2c10d0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -47,6 +47,18 @@ MODULE_LICENSE("GPL");
47#define SVM_FEATURE_LBRV (1 << 1) 47#define SVM_FEATURE_LBRV (1 << 1)
48#define SVM_DEATURE_SVML (1 << 2) 48#define SVM_DEATURE_SVML (1 << 2)
49 49
50#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
51
52/* enable NPT for AMD64 and X86 with PAE */
53#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
54static bool npt_enabled = true;
55#else
56static bool npt_enabled = false;
57#endif
58static int npt = 1;
59
60module_param(npt, int, S_IRUGO);
61
50static void kvm_reput_irq(struct vcpu_svm *svm); 62static void kvm_reput_irq(struct vcpu_svm *svm);
51 63
52static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 64static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
@@ -54,8 +66,7 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
54 return container_of(vcpu, struct vcpu_svm, vcpu); 66 return container_of(vcpu, struct vcpu_svm, vcpu);
55} 67}
56 68
57unsigned long iopm_base; 69static unsigned long iopm_base;
58unsigned long msrpm_base;
59 70
60struct kvm_ldttss_desc { 71struct kvm_ldttss_desc {
61 u16 limit0; 72 u16 limit0;
@@ -182,7 +193,7 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
182 193
183static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) 194static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
184{ 195{
185 if (!(efer & EFER_LMA)) 196 if (!npt_enabled && !(efer & EFER_LMA))
186 efer &= ~EFER_LME; 197 efer &= ~EFER_LME;
187 198
188 to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; 199 to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
@@ -219,12 +230,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
219 struct vcpu_svm *svm = to_svm(vcpu); 230 struct vcpu_svm *svm = to_svm(vcpu);
220 231
221 if (!svm->next_rip) { 232 if (!svm->next_rip) {
222 printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); 233 printk(KERN_DEBUG "%s: NOP\n", __func__);
223 return; 234 return;
224 } 235 }
225 if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) 236 if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
226 printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", 237 printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
227 __FUNCTION__, 238 __func__,
228 svm->vmcb->save.rip, 239 svm->vmcb->save.rip,
229 svm->next_rip); 240 svm->next_rip);
230 241
@@ -279,11 +290,7 @@ static void svm_hardware_enable(void *garbage)
279 290
280 struct svm_cpu_data *svm_data; 291 struct svm_cpu_data *svm_data;
281 uint64_t efer; 292 uint64_t efer;
282#ifdef CONFIG_X86_64
283 struct desc_ptr gdt_descr;
284#else
285 struct desc_ptr gdt_descr; 293 struct desc_ptr gdt_descr;
286#endif
287 struct desc_struct *gdt; 294 struct desc_struct *gdt;
288 int me = raw_smp_processor_id(); 295 int me = raw_smp_processor_id();
289 296
@@ -302,7 +309,6 @@ static void svm_hardware_enable(void *garbage)
302 svm_data->asid_generation = 1; 309 svm_data->asid_generation = 1;
303 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 310 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
304 svm_data->next_asid = svm_data->max_asid + 1; 311 svm_data->next_asid = svm_data->max_asid + 1;
305 svm_features = cpuid_edx(SVM_CPUID_FUNC);
306 312
307 asm volatile ("sgdt %0" : "=m"(gdt_descr)); 313 asm volatile ("sgdt %0" : "=m"(gdt_descr));
308 gdt = (struct desc_struct *)gdt_descr.address; 314 gdt = (struct desc_struct *)gdt_descr.address;
@@ -361,12 +367,51 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
361 BUG(); 367 BUG();
362} 368}
363 369
370static void svm_vcpu_init_msrpm(u32 *msrpm)
371{
372 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
373
374#ifdef CONFIG_X86_64
375 set_msr_interception(msrpm, MSR_GS_BASE, 1, 1);
376 set_msr_interception(msrpm, MSR_FS_BASE, 1, 1);
377 set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1);
378 set_msr_interception(msrpm, MSR_LSTAR, 1, 1);
379 set_msr_interception(msrpm, MSR_CSTAR, 1, 1);
380 set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1);
381#endif
382 set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
383 set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
384 set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
385 set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
386}
387
388static void svm_enable_lbrv(struct vcpu_svm *svm)
389{
390 u32 *msrpm = svm->msrpm;
391
392 svm->vmcb->control.lbr_ctl = 1;
393 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
394 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
395 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
396 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
397}
398
399static void svm_disable_lbrv(struct vcpu_svm *svm)
400{
401 u32 *msrpm = svm->msrpm;
402
403 svm->vmcb->control.lbr_ctl = 0;
404 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
405 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
406 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
407 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
408}
409
364static __init int svm_hardware_setup(void) 410static __init int svm_hardware_setup(void)
365{ 411{
366 int cpu; 412 int cpu;
367 struct page *iopm_pages; 413 struct page *iopm_pages;
368 struct page *msrpm_pages; 414 void *iopm_va;
369 void *iopm_va, *msrpm_va;
370 int r; 415 int r;
371 416
372 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); 417 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
@@ -379,41 +424,33 @@ static __init int svm_hardware_setup(void)
379 clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */ 424 clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */
380 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; 425 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
381 426
427 if (boot_cpu_has(X86_FEATURE_NX))
428 kvm_enable_efer_bits(EFER_NX);
382 429
383 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 430 for_each_online_cpu(cpu) {
431 r = svm_cpu_init(cpu);
432 if (r)
433 goto err;
434 }
384 435
385 r = -ENOMEM; 436 svm_features = cpuid_edx(SVM_CPUID_FUNC);
386 if (!msrpm_pages)
387 goto err_1;
388 437
389 msrpm_va = page_address(msrpm_pages); 438 if (!svm_has(SVM_FEATURE_NPT))
390 memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); 439 npt_enabled = false;
391 msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT;
392 440
393#ifdef CONFIG_X86_64 441 if (npt_enabled && !npt) {
394 set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1); 442 printk(KERN_INFO "kvm: Nested Paging disabled\n");
395 set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1); 443 npt_enabled = false;
396 set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1); 444 }
397 set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1);
398 set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1);
399 set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1);
400#endif
401 set_msr_interception(msrpm_va, MSR_K6_STAR, 1, 1);
402 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1);
403 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1);
404 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1);
405 445
406 for_each_online_cpu(cpu) { 446 if (npt_enabled) {
407 r = svm_cpu_init(cpu); 447 printk(KERN_INFO "kvm: Nested Paging enabled\n");
408 if (r) 448 kvm_enable_tdp();
409 goto err_2;
410 } 449 }
450
411 return 0; 451 return 0;
412 452
413err_2: 453err:
414 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
415 msrpm_base = 0;
416err_1:
417 __free_pages(iopm_pages, IOPM_ALLOC_ORDER); 454 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
418 iopm_base = 0; 455 iopm_base = 0;
419 return r; 456 return r;
@@ -421,9 +458,8 @@ err_1:
421 458
422static __exit void svm_hardware_unsetup(void) 459static __exit void svm_hardware_unsetup(void)
423{ 460{
424 __free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER);
425 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); 461 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
426 iopm_base = msrpm_base = 0; 462 iopm_base = 0;
427} 463}
428 464
429static void init_seg(struct vmcb_seg *seg) 465static void init_seg(struct vmcb_seg *seg)
@@ -443,15 +479,14 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
443 seg->base = 0; 479 seg->base = 0;
444} 480}
445 481
446static void init_vmcb(struct vmcb *vmcb) 482static void init_vmcb(struct vcpu_svm *svm)
447{ 483{
448 struct vmcb_control_area *control = &vmcb->control; 484 struct vmcb_control_area *control = &svm->vmcb->control;
449 struct vmcb_save_area *save = &vmcb->save; 485 struct vmcb_save_area *save = &svm->vmcb->save;
450 486
451 control->intercept_cr_read = INTERCEPT_CR0_MASK | 487 control->intercept_cr_read = INTERCEPT_CR0_MASK |
452 INTERCEPT_CR3_MASK | 488 INTERCEPT_CR3_MASK |
453 INTERCEPT_CR4_MASK | 489 INTERCEPT_CR4_MASK;
454 INTERCEPT_CR8_MASK;
455 490
456 control->intercept_cr_write = INTERCEPT_CR0_MASK | 491 control->intercept_cr_write = INTERCEPT_CR0_MASK |
457 INTERCEPT_CR3_MASK | 492 INTERCEPT_CR3_MASK |
@@ -471,23 +506,13 @@ static void init_vmcb(struct vmcb *vmcb)
471 INTERCEPT_DR7_MASK; 506 INTERCEPT_DR7_MASK;
472 507
473 control->intercept_exceptions = (1 << PF_VECTOR) | 508 control->intercept_exceptions = (1 << PF_VECTOR) |
474 (1 << UD_VECTOR); 509 (1 << UD_VECTOR) |
510 (1 << MC_VECTOR);
475 511
476 512
477 control->intercept = (1ULL << INTERCEPT_INTR) | 513 control->intercept = (1ULL << INTERCEPT_INTR) |
478 (1ULL << INTERCEPT_NMI) | 514 (1ULL << INTERCEPT_NMI) |
479 (1ULL << INTERCEPT_SMI) | 515 (1ULL << INTERCEPT_SMI) |
480 /*
481 * selective cr0 intercept bug?
482 * 0: 0f 22 d8 mov %eax,%cr3
483 * 3: 0f 20 c0 mov %cr0,%eax
484 * 6: 0d 00 00 00 80 or $0x80000000,%eax
485 * b: 0f 22 c0 mov %eax,%cr0
486 * set cr3 ->interception
487 * get cr0 ->interception
488 * set cr0 -> no interception
489 */
490 /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */
491 (1ULL << INTERCEPT_CPUID) | 516 (1ULL << INTERCEPT_CPUID) |
492 (1ULL << INTERCEPT_INVD) | 517 (1ULL << INTERCEPT_INVD) |
493 (1ULL << INTERCEPT_HLT) | 518 (1ULL << INTERCEPT_HLT) |
@@ -508,7 +533,7 @@ static void init_vmcb(struct vmcb *vmcb)
508 (1ULL << INTERCEPT_MWAIT); 533 (1ULL << INTERCEPT_MWAIT);
509 534
510 control->iopm_base_pa = iopm_base; 535 control->iopm_base_pa = iopm_base;
511 control->msrpm_base_pa = msrpm_base; 536 control->msrpm_base_pa = __pa(svm->msrpm);
512 control->tsc_offset = 0; 537 control->tsc_offset = 0;
513 control->int_ctl = V_INTR_MASKING_MASK; 538 control->int_ctl = V_INTR_MASKING_MASK;
514 539
@@ -550,13 +575,30 @@ static void init_vmcb(struct vmcb *vmcb)
550 save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; 575 save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
551 save->cr4 = X86_CR4_PAE; 576 save->cr4 = X86_CR4_PAE;
552 /* rdx = ?? */ 577 /* rdx = ?? */
578
579 if (npt_enabled) {
580 /* Setup VMCB for Nested Paging */
581 control->nested_ctl = 1;
582 control->intercept &= ~(1ULL << INTERCEPT_TASK_SWITCH);
583 control->intercept_exceptions &= ~(1 << PF_VECTOR);
584 control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
585 INTERCEPT_CR3_MASK);
586 control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
587 INTERCEPT_CR3_MASK);
588 save->g_pat = 0x0007040600070406ULL;
589 /* enable caching because the QEMU Bios doesn't enable it */
590 save->cr0 = X86_CR0_ET;
591 save->cr3 = 0;
592 save->cr4 = 0;
593 }
594 force_new_asid(&svm->vcpu);
553} 595}
554 596
555static int svm_vcpu_reset(struct kvm_vcpu *vcpu) 597static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
556{ 598{
557 struct vcpu_svm *svm = to_svm(vcpu); 599 struct vcpu_svm *svm = to_svm(vcpu);
558 600
559 init_vmcb(svm->vmcb); 601 init_vmcb(svm);
560 602
561 if (vcpu->vcpu_id != 0) { 603 if (vcpu->vcpu_id != 0) {
562 svm->vmcb->save.rip = 0; 604 svm->vmcb->save.rip = 0;
@@ -571,6 +613,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
571{ 613{
572 struct vcpu_svm *svm; 614 struct vcpu_svm *svm;
573 struct page *page; 615 struct page *page;
616 struct page *msrpm_pages;
574 int err; 617 int err;
575 618
576 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 619 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -589,12 +632,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
589 goto uninit; 632 goto uninit;
590 } 633 }
591 634
635 err = -ENOMEM;
636 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
637 if (!msrpm_pages)
638 goto uninit;
639 svm->msrpm = page_address(msrpm_pages);
640 svm_vcpu_init_msrpm(svm->msrpm);
641
592 svm->vmcb = page_address(page); 642 svm->vmcb = page_address(page);
593 clear_page(svm->vmcb); 643 clear_page(svm->vmcb);
594 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 644 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
595 svm->asid_generation = 0; 645 svm->asid_generation = 0;
596 memset(svm->db_regs, 0, sizeof(svm->db_regs)); 646 memset(svm->db_regs, 0, sizeof(svm->db_regs));
597 init_vmcb(svm->vmcb); 647 init_vmcb(svm);
598 648
599 fx_init(&svm->vcpu); 649 fx_init(&svm->vcpu);
600 svm->vcpu.fpu_active = 1; 650 svm->vcpu.fpu_active = 1;
@@ -617,6 +667,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
617 struct vcpu_svm *svm = to_svm(vcpu); 667 struct vcpu_svm *svm = to_svm(vcpu);
618 668
619 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); 669 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
670 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
620 kvm_vcpu_uninit(vcpu); 671 kvm_vcpu_uninit(vcpu);
621 kmem_cache_free(kvm_vcpu_cache, svm); 672 kmem_cache_free(kvm_vcpu_cache, svm);
622} 673}
@@ -731,6 +782,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
731 var->unusable = !var->present; 782 var->unusable = !var->present;
732} 783}
733 784
785static int svm_get_cpl(struct kvm_vcpu *vcpu)
786{
787 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
788
789 return save->cpl;
790}
791
734static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 792static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
735{ 793{
736 struct vcpu_svm *svm = to_svm(vcpu); 794 struct vcpu_svm *svm = to_svm(vcpu);
@@ -784,6 +842,9 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
784 } 842 }
785 } 843 }
786#endif 844#endif
845 if (npt_enabled)
846 goto set;
847
787 if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { 848 if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
788 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 849 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
789 vcpu->fpu_active = 1; 850 vcpu->fpu_active = 1;
@@ -791,18 +852,29 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
791 852
792 vcpu->arch.cr0 = cr0; 853 vcpu->arch.cr0 = cr0;
793 cr0 |= X86_CR0_PG | X86_CR0_WP; 854 cr0 |= X86_CR0_PG | X86_CR0_WP;
794 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
795 if (!vcpu->fpu_active) { 855 if (!vcpu->fpu_active) {
796 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); 856 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
797 cr0 |= X86_CR0_TS; 857 cr0 |= X86_CR0_TS;
798 } 858 }
859set:
860 /*
861 * re-enable caching here because the QEMU bios
862 * does not do it - this results in some delay at
863 * reboot
864 */
865 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
799 svm->vmcb->save.cr0 = cr0; 866 svm->vmcb->save.cr0 = cr0;
800} 867}
801 868
802static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 869static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
803{ 870{
804 vcpu->arch.cr4 = cr4; 871 unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
805 to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE; 872
873 vcpu->arch.cr4 = cr4;
874 if (!npt_enabled)
875 cr4 |= X86_CR4_PAE;
876 cr4 |= host_cr4_mce;
877 to_svm(vcpu)->vmcb->save.cr4 = cr4;
806} 878}
807 879
808static void svm_set_segment(struct kvm_vcpu *vcpu, 880static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -833,13 +905,6 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
833 905
834} 906}
835 907
836/* FIXME:
837
838 svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK;
839 svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK);
840
841*/
842
843static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 908static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
844{ 909{
845 return -EOPNOTSUPP; 910 return -EOPNOTSUPP;
@@ -920,7 +985,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
920 } 985 }
921 default: 986 default:
922 printk(KERN_DEBUG "%s: unexpected dr %u\n", 987 printk(KERN_DEBUG "%s: unexpected dr %u\n",
923 __FUNCTION__, dr); 988 __func__, dr);
924 *exception = UD_VECTOR; 989 *exception = UD_VECTOR;
925 return; 990 return;
926 } 991 }
@@ -962,6 +1027,19 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
962 return 1; 1027 return 1;
963} 1028}
964 1029
1030static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1031{
1032 /*
1033 * On an #MC intercept the MCE handler is not called automatically in
1034 * the host. So do it by hand here.
1035 */
1036 asm volatile (
1037 "int $0x12\n");
1038 /* not sure if we ever come back to this point */
1039
1040 return 1;
1041}
1042
965static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1043static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
966{ 1044{
967 /* 1045 /*
@@ -969,7 +1047,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
969 * so reinitialize it. 1047 * so reinitialize it.
970 */ 1048 */
971 clear_page(svm->vmcb); 1049 clear_page(svm->vmcb);
972 init_vmcb(svm->vmcb); 1050 init_vmcb(svm);
973 1051
974 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 1052 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
975 return 0; 1053 return 0;
@@ -1033,9 +1111,18 @@ static int invalid_op_interception(struct vcpu_svm *svm,
1033static int task_switch_interception(struct vcpu_svm *svm, 1111static int task_switch_interception(struct vcpu_svm *svm,
1034 struct kvm_run *kvm_run) 1112 struct kvm_run *kvm_run)
1035{ 1113{
1036 pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__); 1114 u16 tss_selector;
1037 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 1115
1038 return 0; 1116 tss_selector = (u16)svm->vmcb->control.exit_info_1;
1117 if (svm->vmcb->control.exit_info_2 &
1118 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
1119 return kvm_task_switch(&svm->vcpu, tss_selector,
1120 TASK_SWITCH_IRET);
1121 if (svm->vmcb->control.exit_info_2 &
1122 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
1123 return kvm_task_switch(&svm->vcpu, tss_selector,
1124 TASK_SWITCH_JMP);
1125 return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
1039} 1126}
1040 1127
1041static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1128static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1049,7 +1136,7 @@ static int emulate_on_interception(struct vcpu_svm *svm,
1049 struct kvm_run *kvm_run) 1136 struct kvm_run *kvm_run)
1050{ 1137{
1051 if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) 1138 if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
1052 pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__); 1139 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
1053 return 1; 1140 return 1;
1054} 1141}
1055 1142
@@ -1179,8 +1266,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
1179 svm->vmcb->save.sysenter_esp = data; 1266 svm->vmcb->save.sysenter_esp = data;
1180 break; 1267 break;
1181 case MSR_IA32_DEBUGCTLMSR: 1268 case MSR_IA32_DEBUGCTLMSR:
1182 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", 1269 if (!svm_has(SVM_FEATURE_LBRV)) {
1183 __FUNCTION__, data); 1270 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
1271 __func__, data);
1272 break;
1273 }
1274 if (data & DEBUGCTL_RESERVED_BITS)
1275 return 1;
1276
1277 svm->vmcb->save.dbgctl = data;
1278 if (data & (1ULL<<0))
1279 svm_enable_lbrv(svm);
1280 else
1281 svm_disable_lbrv(svm);
1184 break; 1282 break;
1185 case MSR_K7_EVNTSEL0: 1283 case MSR_K7_EVNTSEL0:
1186 case MSR_K7_EVNTSEL1: 1284 case MSR_K7_EVNTSEL1:
@@ -1265,6 +1363,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1265 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 1363 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
1266 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 1364 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
1267 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 1365 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
1366 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
1268 [SVM_EXIT_INTR] = nop_on_interception, 1367 [SVM_EXIT_INTR] = nop_on_interception,
1269 [SVM_EXIT_NMI] = nop_on_interception, 1368 [SVM_EXIT_NMI] = nop_on_interception,
1270 [SVM_EXIT_SMI] = nop_on_interception, 1369 [SVM_EXIT_SMI] = nop_on_interception,
@@ -1290,14 +1389,34 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1290 [SVM_EXIT_WBINVD] = emulate_on_interception, 1389 [SVM_EXIT_WBINVD] = emulate_on_interception,
1291 [SVM_EXIT_MONITOR] = invalid_op_interception, 1390 [SVM_EXIT_MONITOR] = invalid_op_interception,
1292 [SVM_EXIT_MWAIT] = invalid_op_interception, 1391 [SVM_EXIT_MWAIT] = invalid_op_interception,
1392 [SVM_EXIT_NPF] = pf_interception,
1293}; 1393};
1294 1394
1295
1296static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1395static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1297{ 1396{
1298 struct vcpu_svm *svm = to_svm(vcpu); 1397 struct vcpu_svm *svm = to_svm(vcpu);
1299 u32 exit_code = svm->vmcb->control.exit_code; 1398 u32 exit_code = svm->vmcb->control.exit_code;
1300 1399
1400 if (npt_enabled) {
1401 int mmu_reload = 0;
1402 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
1403 svm_set_cr0(vcpu, svm->vmcb->save.cr0);
1404 mmu_reload = 1;
1405 }
1406 vcpu->arch.cr0 = svm->vmcb->save.cr0;
1407 vcpu->arch.cr3 = svm->vmcb->save.cr3;
1408 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
1409 if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
1410 kvm_inject_gp(vcpu, 0);
1411 return 1;
1412 }
1413 }
1414 if (mmu_reload) {
1415 kvm_mmu_reset_context(vcpu);
1416 kvm_mmu_load(vcpu);
1417 }
1418 }
1419
1301 kvm_reput_irq(svm); 1420 kvm_reput_irq(svm);
1302 1421
1303 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 1422 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
@@ -1308,10 +1427,11 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1308 } 1427 }
1309 1428
1310 if (is_external_interrupt(svm->vmcb->control.exit_int_info) && 1429 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
1311 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) 1430 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
1431 exit_code != SVM_EXIT_NPF)
1312 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " 1432 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
1313 "exit_code 0x%x\n", 1433 "exit_code 0x%x\n",
1314 __FUNCTION__, svm->vmcb->control.exit_int_info, 1434 __func__, svm->vmcb->control.exit_int_info,
1315 exit_code); 1435 exit_code);
1316 1436
1317 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 1437 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
@@ -1364,6 +1484,27 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
1364 svm_inject_irq(svm, irq); 1484 svm_inject_irq(svm, irq);
1365} 1485}
1366 1486
1487static void update_cr8_intercept(struct kvm_vcpu *vcpu)
1488{
1489 struct vcpu_svm *svm = to_svm(vcpu);
1490 struct vmcb *vmcb = svm->vmcb;
1491 int max_irr, tpr;
1492
1493 if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
1494 return;
1495
1496 vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
1497
1498 max_irr = kvm_lapic_find_highest_irr(vcpu);
1499 if (max_irr == -1)
1500 return;
1501
1502 tpr = kvm_lapic_get_cr8(vcpu) << 4;
1503
1504 if (tpr >= (max_irr & 0xf0))
1505 vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
1506}
1507
1367static void svm_intr_assist(struct kvm_vcpu *vcpu) 1508static void svm_intr_assist(struct kvm_vcpu *vcpu)
1368{ 1509{
1369 struct vcpu_svm *svm = to_svm(vcpu); 1510 struct vcpu_svm *svm = to_svm(vcpu);
@@ -1376,14 +1517,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1376 SVM_EVTINJ_VEC_MASK; 1517 SVM_EVTINJ_VEC_MASK;
1377 vmcb->control.exit_int_info = 0; 1518 vmcb->control.exit_int_info = 0;
1378 svm_inject_irq(svm, intr_vector); 1519 svm_inject_irq(svm, intr_vector);
1379 return; 1520 goto out;
1380 } 1521 }
1381 1522
1382 if (vmcb->control.int_ctl & V_IRQ_MASK) 1523 if (vmcb->control.int_ctl & V_IRQ_MASK)
1383 return; 1524 goto out;
1384 1525
1385 if (!kvm_cpu_has_interrupt(vcpu)) 1526 if (!kvm_cpu_has_interrupt(vcpu))
1386 return; 1527 goto out;
1387 1528
1388 if (!(vmcb->save.rflags & X86_EFLAGS_IF) || 1529 if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
1389 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || 1530 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
@@ -1391,12 +1532,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1391 /* unable to deliver irq, set pending irq */ 1532 /* unable to deliver irq, set pending irq */
1392 vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); 1533 vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
1393 svm_inject_irq(svm, 0x0); 1534 svm_inject_irq(svm, 0x0);
1394 return; 1535 goto out;
1395 } 1536 }
1396 /* Okay, we can deliver the interrupt: grab it and update PIC state. */ 1537 /* Okay, we can deliver the interrupt: grab it and update PIC state. */
1397 intr_vector = kvm_cpu_get_interrupt(vcpu); 1538 intr_vector = kvm_cpu_get_interrupt(vcpu);
1398 svm_inject_irq(svm, intr_vector); 1539 svm_inject_irq(svm, intr_vector);
1399 kvm_timer_intr_post(vcpu, intr_vector); 1540 kvm_timer_intr_post(vcpu, intr_vector);
1541out:
1542 update_cr8_intercept(vcpu);
1400} 1543}
1401 1544
1402static void kvm_reput_irq(struct vcpu_svm *svm) 1545static void kvm_reput_irq(struct vcpu_svm *svm)
@@ -1482,6 +1625,29 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
1482{ 1625{
1483} 1626}
1484 1627
1628static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
1629{
1630 struct vcpu_svm *svm = to_svm(vcpu);
1631
1632 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
1633 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
1634 kvm_lapic_set_tpr(vcpu, cr8);
1635 }
1636}
1637
1638static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
1639{
1640 struct vcpu_svm *svm = to_svm(vcpu);
1641 u64 cr8;
1642
1643 if (!irqchip_in_kernel(vcpu->kvm))
1644 return;
1645
1646 cr8 = kvm_get_cr8(vcpu);
1647 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
1648 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
1649}
1650
1485static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1651static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1486{ 1652{
1487 struct vcpu_svm *svm = to_svm(vcpu); 1653 struct vcpu_svm *svm = to_svm(vcpu);
@@ -1491,6 +1657,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1491 1657
1492 pre_svm_run(svm); 1658 pre_svm_run(svm);
1493 1659
1660 sync_lapic_to_cr8(vcpu);
1661
1494 save_host_msrs(vcpu); 1662 save_host_msrs(vcpu);
1495 fs_selector = read_fs(); 1663 fs_selector = read_fs();
1496 gs_selector = read_gs(); 1664 gs_selector = read_gs();
@@ -1499,6 +1667,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1499 svm->host_dr6 = read_dr6(); 1667 svm->host_dr6 = read_dr6();
1500 svm->host_dr7 = read_dr7(); 1668 svm->host_dr7 = read_dr7();
1501 svm->vmcb->save.cr2 = vcpu->arch.cr2; 1669 svm->vmcb->save.cr2 = vcpu->arch.cr2;
1670 /* required for live migration with NPT */
1671 if (npt_enabled)
1672 svm->vmcb->save.cr3 = vcpu->arch.cr3;
1502 1673
1503 if (svm->vmcb->save.dr7 & 0xff) { 1674 if (svm->vmcb->save.dr7 & 0xff) {
1504 write_dr7(0); 1675 write_dr7(0);
@@ -1635,6 +1806,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1635 1806
1636 stgi(); 1807 stgi();
1637 1808
1809 sync_cr8_to_lapic(vcpu);
1810
1638 svm->next_rip = 0; 1811 svm->next_rip = 0;
1639} 1812}
1640 1813
@@ -1642,6 +1815,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
1642{ 1815{
1643 struct vcpu_svm *svm = to_svm(vcpu); 1816 struct vcpu_svm *svm = to_svm(vcpu);
1644 1817
1818 if (npt_enabled) {
1819 svm->vmcb->control.nested_cr3 = root;
1820 force_new_asid(vcpu);
1821 return;
1822 }
1823
1645 svm->vmcb->save.cr3 = root; 1824 svm->vmcb->save.cr3 = root;
1646 force_new_asid(vcpu); 1825 force_new_asid(vcpu);
1647 1826
@@ -1709,6 +1888,7 @@ static struct kvm_x86_ops svm_x86_ops = {
1709 .get_segment_base = svm_get_segment_base, 1888 .get_segment_base = svm_get_segment_base,
1710 .get_segment = svm_get_segment, 1889 .get_segment = svm_get_segment,
1711 .set_segment = svm_set_segment, 1890 .set_segment = svm_set_segment,
1891 .get_cpl = svm_get_cpl,
1712 .get_cs_db_l_bits = kvm_get_cs_db_l_bits, 1892 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
1713 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 1893 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
1714 .set_cr0 = svm_set_cr0, 1894 .set_cr0 = svm_set_cr0,