aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c944
1 files changed, 619 insertions, 325 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 737361fcd503..96dc232bfc56 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -44,10 +44,11 @@ MODULE_LICENSE("GPL");
44#define SEG_TYPE_LDT 2 44#define SEG_TYPE_LDT 2
45#define SEG_TYPE_BUSY_TSS16 3 45#define SEG_TYPE_BUSY_TSS16 3
46 46
47#define SVM_FEATURE_NPT (1 << 0) 47#define SVM_FEATURE_NPT (1 << 0)
48#define SVM_FEATURE_LBRV (1 << 1) 48#define SVM_FEATURE_LBRV (1 << 1)
49#define SVM_FEATURE_SVML (1 << 2) 49#define SVM_FEATURE_SVML (1 << 2)
50#define SVM_FEATURE_PAUSE_FILTER (1 << 10) 50#define SVM_FEATURE_NRIP (1 << 3)
51#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
51 52
52#define NESTED_EXIT_HOST 0 /* Exit handled on host level */ 53#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
53#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ 54#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
@@ -70,6 +71,7 @@ struct kvm_vcpu;
70struct nested_state { 71struct nested_state {
71 struct vmcb *hsave; 72 struct vmcb *hsave;
72 u64 hsave_msr; 73 u64 hsave_msr;
74 u64 vm_cr_msr;
73 u64 vmcb; 75 u64 vmcb;
74 76
75 /* These are the merged vectors */ 77 /* These are the merged vectors */
@@ -77,6 +79,7 @@ struct nested_state {
77 79
78 /* gpa pointers to the real vectors */ 80 /* gpa pointers to the real vectors */
79 u64 vmcb_msrpm; 81 u64 vmcb_msrpm;
82 u64 vmcb_iopm;
80 83
81 /* A VMEXIT is required but not yet emulated */ 84 /* A VMEXIT is required but not yet emulated */
82 bool exit_required; 85 bool exit_required;
@@ -91,6 +94,9 @@ struct nested_state {
91 94
92}; 95};
93 96
97#define MSRPM_OFFSETS 16
98static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
99
94struct vcpu_svm { 100struct vcpu_svm {
95 struct kvm_vcpu vcpu; 101 struct kvm_vcpu vcpu;
96 struct vmcb *vmcb; 102 struct vmcb *vmcb;
@@ -110,13 +116,39 @@ struct vcpu_svm {
110 struct nested_state nested; 116 struct nested_state nested;
111 117
112 bool nmi_singlestep; 118 bool nmi_singlestep;
119
120 unsigned int3_injected;
121 unsigned long int3_rip;
122};
123
124#define MSR_INVALID 0xffffffffU
125
126static struct svm_direct_access_msrs {
127 u32 index; /* Index of the MSR */
128 bool always; /* True if intercept is always on */
129} direct_access_msrs[] = {
130 { .index = MSR_K6_STAR, .always = true },
131 { .index = MSR_IA32_SYSENTER_CS, .always = true },
132#ifdef CONFIG_X86_64
133 { .index = MSR_GS_BASE, .always = true },
134 { .index = MSR_FS_BASE, .always = true },
135 { .index = MSR_KERNEL_GS_BASE, .always = true },
136 { .index = MSR_LSTAR, .always = true },
137 { .index = MSR_CSTAR, .always = true },
138 { .index = MSR_SYSCALL_MASK, .always = true },
139#endif
140 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
141 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
142 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
143 { .index = MSR_IA32_LASTINTTOIP, .always = false },
144 { .index = MSR_INVALID, .always = false },
113}; 145};
114 146
115/* enable NPT for AMD64 and X86 with PAE */ 147/* enable NPT for AMD64 and X86 with PAE */
116#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 148#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
117static bool npt_enabled = true; 149static bool npt_enabled = true;
118#else 150#else
119static bool npt_enabled = false; 151static bool npt_enabled;
120#endif 152#endif
121static int npt = 1; 153static int npt = 1;
122 154
@@ -129,6 +161,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu);
129static void svm_complete_interrupts(struct vcpu_svm *svm); 161static void svm_complete_interrupts(struct vcpu_svm *svm);
130 162
131static int nested_svm_exit_handled(struct vcpu_svm *svm); 163static int nested_svm_exit_handled(struct vcpu_svm *svm);
164static int nested_svm_intercept(struct vcpu_svm *svm);
132static int nested_svm_vmexit(struct vcpu_svm *svm); 165static int nested_svm_vmexit(struct vcpu_svm *svm);
133static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 166static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
134 bool has_error_code, u32 error_code); 167 bool has_error_code, u32 error_code);
@@ -163,8 +196,8 @@ static unsigned long iopm_base;
163struct kvm_ldttss_desc { 196struct kvm_ldttss_desc {
164 u16 limit0; 197 u16 limit0;
165 u16 base0; 198 u16 base0;
166 unsigned base1 : 8, type : 5, dpl : 2, p : 1; 199 unsigned base1:8, type:5, dpl:2, p:1;
167 unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; 200 unsigned limit1:4, zero0:3, g:1, base2:8;
168 u32 base3; 201 u32 base3;
169 u32 zero1; 202 u32 zero1;
170} __attribute__((packed)); 203} __attribute__((packed));
@@ -194,6 +227,27 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
194#define MSRS_RANGE_SIZE 2048 227#define MSRS_RANGE_SIZE 2048
195#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) 228#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
196 229
230static u32 svm_msrpm_offset(u32 msr)
231{
232 u32 offset;
233 int i;
234
235 for (i = 0; i < NUM_MSR_MAPS; i++) {
236 if (msr < msrpm_ranges[i] ||
237 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
238 continue;
239
240 offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
241 offset += (i * MSRS_RANGE_SIZE); /* add range offset */
242
243 /* Now we have the u8 offset - but need the u32 offset */
244 return offset / 4;
245 }
246
247 /* MSR not in any range */
248 return MSR_INVALID;
249}
250
197#define MAX_INST_SIZE 15 251#define MAX_INST_SIZE 15
198 252
199static inline u32 svm_has(u32 feat) 253static inline u32 svm_has(u32 feat)
@@ -213,7 +267,7 @@ static inline void stgi(void)
213 267
214static inline void invlpga(unsigned long addr, u32 asid) 268static inline void invlpga(unsigned long addr, u32 asid)
215{ 269{
216 asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); 270 asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
217} 271}
218 272
219static inline void force_new_asid(struct kvm_vcpu *vcpu) 273static inline void force_new_asid(struct kvm_vcpu *vcpu)
@@ -235,23 +289,6 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
235 vcpu->arch.efer = efer; 289 vcpu->arch.efer = efer;
236} 290}
237 291
238static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
239 bool has_error_code, u32 error_code)
240{
241 struct vcpu_svm *svm = to_svm(vcpu);
242
243 /* If we are within a nested VM we'd better #VMEXIT and let the
244 guest handle the exception */
245 if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
246 return;
247
248 svm->vmcb->control.event_inj = nr
249 | SVM_EVTINJ_VALID
250 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
251 | SVM_EVTINJ_TYPE_EXEPT;
252 svm->vmcb->control.event_inj_err = error_code;
253}
254
255static int is_external_interrupt(u32 info) 292static int is_external_interrupt(u32 info)
256{ 293{
257 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; 294 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
@@ -264,7 +301,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
264 u32 ret = 0; 301 u32 ret = 0;
265 302
266 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) 303 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
267 ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; 304 ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
268 return ret & mask; 305 return ret & mask;
269} 306}
270 307
@@ -283,6 +320,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
283{ 320{
284 struct vcpu_svm *svm = to_svm(vcpu); 321 struct vcpu_svm *svm = to_svm(vcpu);
285 322
323 if (svm->vmcb->control.next_rip != 0)
324 svm->next_rip = svm->vmcb->control.next_rip;
325
286 if (!svm->next_rip) { 326 if (!svm->next_rip) {
287 if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != 327 if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
288 EMULATE_DONE) 328 EMULATE_DONE)
@@ -297,6 +337,43 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
297 svm_set_interrupt_shadow(vcpu, 0); 337 svm_set_interrupt_shadow(vcpu, 0);
298} 338}
299 339
340static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
341 bool has_error_code, u32 error_code,
342 bool reinject)
343{
344 struct vcpu_svm *svm = to_svm(vcpu);
345
346 /*
347 * If we are within a nested VM we'd better #VMEXIT and let the guest
348 * handle the exception
349 */
350 if (!reinject &&
351 nested_svm_check_exception(svm, nr, has_error_code, error_code))
352 return;
353
354 if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) {
355 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
356
357 /*
358 * For guest debugging where we have to reinject #BP if some
359 * INT3 is guest-owned:
360 * Emulate nRIP by moving RIP forward. Will fail if injection
361 * raises a fault that is not intercepted. Still better than
362 * failing in all cases.
363 */
364 skip_emulated_instruction(&svm->vcpu);
365 rip = kvm_rip_read(&svm->vcpu);
366 svm->int3_rip = rip + svm->vmcb->save.cs.base;
367 svm->int3_injected = rip - old_rip;
368 }
369
370 svm->vmcb->control.event_inj = nr
371 | SVM_EVTINJ_VALID
372 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
373 | SVM_EVTINJ_TYPE_EXEPT;
374 svm->vmcb->control.event_inj_err = error_code;
375}
376
300static int has_svm(void) 377static int has_svm(void)
301{ 378{
302 const char *msg; 379 const char *msg;
@@ -319,7 +396,7 @@ static int svm_hardware_enable(void *garbage)
319 396
320 struct svm_cpu_data *sd; 397 struct svm_cpu_data *sd;
321 uint64_t efer; 398 uint64_t efer;
322 struct descriptor_table gdt_descr; 399 struct desc_ptr gdt_descr;
323 struct desc_struct *gdt; 400 struct desc_struct *gdt;
324 int me = raw_smp_processor_id(); 401 int me = raw_smp_processor_id();
325 402
@@ -344,8 +421,8 @@ static int svm_hardware_enable(void *garbage)
344 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 421 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
345 sd->next_asid = sd->max_asid + 1; 422 sd->next_asid = sd->max_asid + 1;
346 423
347 kvm_get_gdt(&gdt_descr); 424 native_store_gdt(&gdt_descr);
348 gdt = (struct desc_struct *)gdt_descr.base; 425 gdt = (struct desc_struct *)gdt_descr.address;
349 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 426 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
350 427
351 wrmsrl(MSR_EFER, efer | EFER_SVME); 428 wrmsrl(MSR_EFER, efer | EFER_SVME);
@@ -391,42 +468,98 @@ err_1:
391 468
392} 469}
393 470
471static bool valid_msr_intercept(u32 index)
472{
473 int i;
474
475 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
476 if (direct_access_msrs[i].index == index)
477 return true;
478
479 return false;
480}
481
394static void set_msr_interception(u32 *msrpm, unsigned msr, 482static void set_msr_interception(u32 *msrpm, unsigned msr,
395 int read, int write) 483 int read, int write)
396{ 484{
485 u8 bit_read, bit_write;
486 unsigned long tmp;
487 u32 offset;
488
489 /*
490 * If this warning triggers extend the direct_access_msrs list at the
491 * beginning of the file
492 */
493 WARN_ON(!valid_msr_intercept(msr));
494
495 offset = svm_msrpm_offset(msr);
496 bit_read = 2 * (msr & 0x0f);
497 bit_write = 2 * (msr & 0x0f) + 1;
498 tmp = msrpm[offset];
499
500 BUG_ON(offset == MSR_INVALID);
501
502 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
503 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
504
505 msrpm[offset] = tmp;
506}
507
508static void svm_vcpu_init_msrpm(u32 *msrpm)
509{
397 int i; 510 int i;
398 511
399 for (i = 0; i < NUM_MSR_MAPS; i++) { 512 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
400 if (msr >= msrpm_ranges[i] && 513
401 msr < msrpm_ranges[i] + MSRS_IN_RANGE) { 514 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
402 u32 msr_offset = (i * MSRS_IN_RANGE + msr - 515 if (!direct_access_msrs[i].always)
403 msrpm_ranges[i]) * 2; 516 continue;
404 517
405 u32 *base = msrpm + (msr_offset / 32); 518 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
406 u32 msr_shift = msr_offset % 32; 519 }
407 u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); 520}
408 *base = (*base & ~(0x3 << msr_shift)) | 521
409 (mask << msr_shift); 522static void add_msr_offset(u32 offset)
523{
524 int i;
525
526 for (i = 0; i < MSRPM_OFFSETS; ++i) {
527
528 /* Offset already in list? */
529 if (msrpm_offsets[i] == offset)
410 return; 530 return;
411 } 531
532 /* Slot used by another offset? */
533 if (msrpm_offsets[i] != MSR_INVALID)
534 continue;
535
536 /* Add offset to list */
537 msrpm_offsets[i] = offset;
538
539 return;
412 } 540 }
541
542 /*
543 * If this BUG triggers the msrpm_offsets table has an overflow. Just
544 * increase MSRPM_OFFSETS in this case.
545 */
413 BUG(); 546 BUG();
414} 547}
415 548
416static void svm_vcpu_init_msrpm(u32 *msrpm) 549static void init_msrpm_offsets(void)
417{ 550{
418 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); 551 int i;
419 552
420#ifdef CONFIG_X86_64 553 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
421 set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); 554
422 set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); 555 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
423 set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); 556 u32 offset;
424 set_msr_interception(msrpm, MSR_LSTAR, 1, 1); 557
425 set_msr_interception(msrpm, MSR_CSTAR, 1, 1); 558 offset = svm_msrpm_offset(direct_access_msrs[i].index);
426 set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); 559 BUG_ON(offset == MSR_INVALID);
427#endif 560
428 set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); 561 add_msr_offset(offset);
429 set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); 562 }
430} 563}
431 564
432static void svm_enable_lbrv(struct vcpu_svm *svm) 565static void svm_enable_lbrv(struct vcpu_svm *svm)
@@ -467,6 +600,8 @@ static __init int svm_hardware_setup(void)
467 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); 600 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
468 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; 601 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
469 602
603 init_msrpm_offsets();
604
470 if (boot_cpu_has(X86_FEATURE_NX)) 605 if (boot_cpu_has(X86_FEATURE_NX))
471 kvm_enable_efer_bits(EFER_NX); 606 kvm_enable_efer_bits(EFER_NX);
472 607
@@ -523,7 +658,7 @@ static void init_seg(struct vmcb_seg *seg)
523{ 658{
524 seg->selector = 0; 659 seg->selector = 0;
525 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | 660 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
526 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ 661 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
527 seg->limit = 0xffff; 662 seg->limit = 0xffff;
528 seg->base = 0; 663 seg->base = 0;
529} 664}
@@ -543,16 +678,16 @@ static void init_vmcb(struct vcpu_svm *svm)
543 678
544 svm->vcpu.fpu_active = 1; 679 svm->vcpu.fpu_active = 1;
545 680
546 control->intercept_cr_read = INTERCEPT_CR0_MASK | 681 control->intercept_cr_read = INTERCEPT_CR0_MASK |
547 INTERCEPT_CR3_MASK | 682 INTERCEPT_CR3_MASK |
548 INTERCEPT_CR4_MASK; 683 INTERCEPT_CR4_MASK;
549 684
550 control->intercept_cr_write = INTERCEPT_CR0_MASK | 685 control->intercept_cr_write = INTERCEPT_CR0_MASK |
551 INTERCEPT_CR3_MASK | 686 INTERCEPT_CR3_MASK |
552 INTERCEPT_CR4_MASK | 687 INTERCEPT_CR4_MASK |
553 INTERCEPT_CR8_MASK; 688 INTERCEPT_CR8_MASK;
554 689
555 control->intercept_dr_read = INTERCEPT_DR0_MASK | 690 control->intercept_dr_read = INTERCEPT_DR0_MASK |
556 INTERCEPT_DR1_MASK | 691 INTERCEPT_DR1_MASK |
557 INTERCEPT_DR2_MASK | 692 INTERCEPT_DR2_MASK |
558 INTERCEPT_DR3_MASK | 693 INTERCEPT_DR3_MASK |
@@ -561,7 +696,7 @@ static void init_vmcb(struct vcpu_svm *svm)
561 INTERCEPT_DR6_MASK | 696 INTERCEPT_DR6_MASK |
562 INTERCEPT_DR7_MASK; 697 INTERCEPT_DR7_MASK;
563 698
564 control->intercept_dr_write = INTERCEPT_DR0_MASK | 699 control->intercept_dr_write = INTERCEPT_DR0_MASK |
565 INTERCEPT_DR1_MASK | 700 INTERCEPT_DR1_MASK |
566 INTERCEPT_DR2_MASK | 701 INTERCEPT_DR2_MASK |
567 INTERCEPT_DR3_MASK | 702 INTERCEPT_DR3_MASK |
@@ -575,7 +710,7 @@ static void init_vmcb(struct vcpu_svm *svm)
575 (1 << MC_VECTOR); 710 (1 << MC_VECTOR);
576 711
577 712
578 control->intercept = (1ULL << INTERCEPT_INTR) | 713 control->intercept = (1ULL << INTERCEPT_INTR) |
579 (1ULL << INTERCEPT_NMI) | 714 (1ULL << INTERCEPT_NMI) |
580 (1ULL << INTERCEPT_SMI) | 715 (1ULL << INTERCEPT_SMI) |
581 (1ULL << INTERCEPT_SELECTIVE_CR0) | 716 (1ULL << INTERCEPT_SELECTIVE_CR0) |
@@ -636,7 +771,8 @@ static void init_vmcb(struct vcpu_svm *svm)
636 save->rip = 0x0000fff0; 771 save->rip = 0x0000fff0;
637 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; 772 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
638 773
639 /* This is the guest-visible cr0 value. 774 /*
775 * This is the guest-visible cr0 value.
640 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. 776 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
641 */ 777 */
642 svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 778 svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
@@ -729,6 +865,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
729 svm_vcpu_init_msrpm(svm->msrpm); 865 svm_vcpu_init_msrpm(svm->msrpm);
730 866
731 svm->nested.msrpm = page_address(nested_msrpm_pages); 867 svm->nested.msrpm = page_address(nested_msrpm_pages);
868 svm_vcpu_init_msrpm(svm->nested.msrpm);
732 869
733 svm->vmcb = page_address(page); 870 svm->vmcb = page_address(page);
734 clear_page(svm->vmcb); 871 clear_page(svm->vmcb);
@@ -882,7 +1019,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
882 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 1019 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
883 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 1020 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
884 1021
885 /* AMD's VMCB does not have an explicit unusable field, so emulate it 1022 /*
1023 * AMD's VMCB does not have an explicit unusable field, so emulate it
886 * for cross vendor migration purposes by "not present" 1024 * for cross vendor migration purposes by "not present"
887 */ 1025 */
888 var->unusable = !var->present || (var->type == 0); 1026 var->unusable = !var->present || (var->type == 0);
@@ -918,7 +1056,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
918 var->type |= 0x1; 1056 var->type |= 0x1;
919 break; 1057 break;
920 case VCPU_SREG_SS: 1058 case VCPU_SREG_SS:
921 /* On AMD CPUs sometimes the DB bit in the segment 1059 /*
1060 * On AMD CPUs sometimes the DB bit in the segment
922 * descriptor is left as 1, although the whole segment has 1061 * descriptor is left as 1, although the whole segment has
923 * been made unusable. Clear it here to pass an Intel VMX 1062 * been made unusable. Clear it here to pass an Intel VMX
924 * entry check when cross vendor migrating. 1063 * entry check when cross vendor migrating.
@@ -936,36 +1075,36 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu)
936 return save->cpl; 1075 return save->cpl;
937} 1076}
938 1077
939static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1078static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
940{ 1079{
941 struct vcpu_svm *svm = to_svm(vcpu); 1080 struct vcpu_svm *svm = to_svm(vcpu);
942 1081
943 dt->limit = svm->vmcb->save.idtr.limit; 1082 dt->size = svm->vmcb->save.idtr.limit;
944 dt->base = svm->vmcb->save.idtr.base; 1083 dt->address = svm->vmcb->save.idtr.base;
945} 1084}
946 1085
947static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1086static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
948{ 1087{
949 struct vcpu_svm *svm = to_svm(vcpu); 1088 struct vcpu_svm *svm = to_svm(vcpu);
950 1089
951 svm->vmcb->save.idtr.limit = dt->limit; 1090 svm->vmcb->save.idtr.limit = dt->size;
952 svm->vmcb->save.idtr.base = dt->base ; 1091 svm->vmcb->save.idtr.base = dt->address ;
953} 1092}
954 1093
955static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1094static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
956{ 1095{
957 struct vcpu_svm *svm = to_svm(vcpu); 1096 struct vcpu_svm *svm = to_svm(vcpu);
958 1097
959 dt->limit = svm->vmcb->save.gdtr.limit; 1098 dt->size = svm->vmcb->save.gdtr.limit;
960 dt->base = svm->vmcb->save.gdtr.base; 1099 dt->address = svm->vmcb->save.gdtr.base;
961} 1100}
962 1101
963static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1102static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
964{ 1103{
965 struct vcpu_svm *svm = to_svm(vcpu); 1104 struct vcpu_svm *svm = to_svm(vcpu);
966 1105
967 svm->vmcb->save.gdtr.limit = dt->limit; 1106 svm->vmcb->save.gdtr.limit = dt->size;
968 svm->vmcb->save.gdtr.base = dt->base ; 1107 svm->vmcb->save.gdtr.base = dt->address ;
969} 1108}
970 1109
971static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) 1110static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
@@ -978,6 +1117,7 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
978 1117
979static void update_cr0_intercept(struct vcpu_svm *svm) 1118static void update_cr0_intercept(struct vcpu_svm *svm)
980{ 1119{
1120 struct vmcb *vmcb = svm->vmcb;
981 ulong gcr0 = svm->vcpu.arch.cr0; 1121 ulong gcr0 = svm->vcpu.arch.cr0;
982 u64 *hcr0 = &svm->vmcb->save.cr0; 1122 u64 *hcr0 = &svm->vmcb->save.cr0;
983 1123
@@ -989,11 +1129,25 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
989 1129
990 1130
991 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { 1131 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
992 svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; 1132 vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
993 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; 1133 vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
1134 if (is_nested(svm)) {
1135 struct vmcb *hsave = svm->nested.hsave;
1136
1137 hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
1138 hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
1139 vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read;
1140 vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write;
1141 }
994 } else { 1142 } else {
995 svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; 1143 svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
996 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; 1144 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
1145 if (is_nested(svm)) {
1146 struct vmcb *hsave = svm->nested.hsave;
1147
1148 hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
1149 hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
1150 }
997 } 1151 }
998} 1152}
999 1153
@@ -1001,6 +1155,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1001{ 1155{
1002 struct vcpu_svm *svm = to_svm(vcpu); 1156 struct vcpu_svm *svm = to_svm(vcpu);
1003 1157
1158 if (is_nested(svm)) {
1159 /*
1160 * We are here because we run in nested mode, the host kvm
1161 * intercepts cr0 writes but the l1 hypervisor does not.
1162 * But the L1 hypervisor may intercept selective cr0 writes.
1163 * This needs to be checked here.
1164 */
1165 unsigned long old, new;
1166
1167 /* Remove bits that would trigger a real cr0 write intercept */
1168 old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
1169 new = cr0 & SVM_CR0_SELECTIVE_MASK;
1170
1171 if (old == new) {
1172 /* cr0 write with ts and mp unchanged */
1173 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
1174 if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
1175 return;
1176 }
1177 }
1178
1004#ifdef CONFIG_X86_64 1179#ifdef CONFIG_X86_64
1005 if (vcpu->arch.efer & EFER_LME) { 1180 if (vcpu->arch.efer & EFER_LME) {
1006 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 1181 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
@@ -1134,70 +1309,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1134 svm->vmcb->control.asid = sd->next_asid++; 1309 svm->vmcb->control.asid = sd->next_asid++;
1135} 1310}
1136 1311
1137static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) 1312static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1138{ 1313{
1139 struct vcpu_svm *svm = to_svm(vcpu); 1314 struct vcpu_svm *svm = to_svm(vcpu);
1140 1315
1141 switch (dr) { 1316 svm->vmcb->save.dr7 = value;
1142 case 0 ... 3:
1143 *dest = vcpu->arch.db[dr];
1144 break;
1145 case 4:
1146 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1147 return EMULATE_FAIL; /* will re-inject UD */
1148 /* fall through */
1149 case 6:
1150 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1151 *dest = vcpu->arch.dr6;
1152 else
1153 *dest = svm->vmcb->save.dr6;
1154 break;
1155 case 5:
1156 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1157 return EMULATE_FAIL; /* will re-inject UD */
1158 /* fall through */
1159 case 7:
1160 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1161 *dest = vcpu->arch.dr7;
1162 else
1163 *dest = svm->vmcb->save.dr7;
1164 break;
1165 }
1166
1167 return EMULATE_DONE;
1168}
1169
1170static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value)
1171{
1172 struct vcpu_svm *svm = to_svm(vcpu);
1173
1174 switch (dr) {
1175 case 0 ... 3:
1176 vcpu->arch.db[dr] = value;
1177 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1178 vcpu->arch.eff_db[dr] = value;
1179 break;
1180 case 4:
1181 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1182 return EMULATE_FAIL; /* will re-inject UD */
1183 /* fall through */
1184 case 6:
1185 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
1186 break;
1187 case 5:
1188 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1189 return EMULATE_FAIL; /* will re-inject UD */
1190 /* fall through */
1191 case 7:
1192 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
1193 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1194 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1195 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
1196 }
1197 break;
1198 }
1199
1200 return EMULATE_DONE;
1201} 1317}
1202 1318
1203static int pf_interception(struct vcpu_svm *svm) 1319static int pf_interception(struct vcpu_svm *svm)
@@ -1234,7 +1350,7 @@ static int db_interception(struct vcpu_svm *svm)
1234 } 1350 }
1235 1351
1236 if (svm->vcpu.guest_debug & 1352 if (svm->vcpu.guest_debug &
1237 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ 1353 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1238 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1354 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1239 kvm_run->debug.arch.pc = 1355 kvm_run->debug.arch.pc =
1240 svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1356 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
@@ -1268,7 +1384,22 @@ static int ud_interception(struct vcpu_svm *svm)
1268static void svm_fpu_activate(struct kvm_vcpu *vcpu) 1384static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1269{ 1385{
1270 struct vcpu_svm *svm = to_svm(vcpu); 1386 struct vcpu_svm *svm = to_svm(vcpu);
1271 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1387 u32 excp;
1388
1389 if (is_nested(svm)) {
1390 u32 h_excp, n_excp;
1391
1392 h_excp = svm->nested.hsave->control.intercept_exceptions;
1393 n_excp = svm->nested.intercept_exceptions;
1394 h_excp &= ~(1 << NM_VECTOR);
1395 excp = h_excp | n_excp;
1396 } else {
1397 excp = svm->vmcb->control.intercept_exceptions;
1398 excp &= ~(1 << NM_VECTOR);
1399 }
1400
1401 svm->vmcb->control.intercept_exceptions = excp;
1402
1272 svm->vcpu.fpu_active = 1; 1403 svm->vcpu.fpu_active = 1;
1273 update_cr0_intercept(svm); 1404 update_cr0_intercept(svm);
1274} 1405}
@@ -1309,29 +1440,23 @@ static int shutdown_interception(struct vcpu_svm *svm)
1309 1440
1310static int io_interception(struct vcpu_svm *svm) 1441static int io_interception(struct vcpu_svm *svm)
1311{ 1442{
1443 struct kvm_vcpu *vcpu = &svm->vcpu;
1312 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1444 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1313 int size, in, string; 1445 int size, in, string;
1314 unsigned port; 1446 unsigned port;
1315 1447
1316 ++svm->vcpu.stat.io_exits; 1448 ++svm->vcpu.stat.io_exits;
1317
1318 svm->next_rip = svm->vmcb->control.exit_info_2;
1319
1320 string = (io_info & SVM_IOIO_STR_MASK) != 0; 1449 string = (io_info & SVM_IOIO_STR_MASK) != 0;
1321
1322 if (string) {
1323 if (emulate_instruction(&svm->vcpu,
1324 0, 0, 0) == EMULATE_DO_MMIO)
1325 return 0;
1326 return 1;
1327 }
1328
1329 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1450 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1451 if (string || in)
1452 return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO);
1453
1330 port = io_info >> 16; 1454 port = io_info >> 16;
1331 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1455 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1332 1456 svm->next_rip = svm->vmcb->control.exit_info_2;
1333 skip_emulated_instruction(&svm->vcpu); 1457 skip_emulated_instruction(&svm->vcpu);
1334 return kvm_emulate_pio(&svm->vcpu, in, size, port); 1458
1459 return kvm_fast_pio_out(vcpu, size, port);
1335} 1460}
1336 1461
1337static int nmi_interception(struct vcpu_svm *svm) 1462static int nmi_interception(struct vcpu_svm *svm)
@@ -1384,6 +1509,8 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm)
1384static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 1509static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1385 bool has_error_code, u32 error_code) 1510 bool has_error_code, u32 error_code)
1386{ 1511{
1512 int vmexit;
1513
1387 if (!is_nested(svm)) 1514 if (!is_nested(svm))
1388 return 0; 1515 return 0;
1389 1516
@@ -1392,21 +1519,28 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1392 svm->vmcb->control.exit_info_1 = error_code; 1519 svm->vmcb->control.exit_info_1 = error_code;
1393 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; 1520 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1394 1521
1395 return nested_svm_exit_handled(svm); 1522 vmexit = nested_svm_intercept(svm);
1523 if (vmexit == NESTED_EXIT_DONE)
1524 svm->nested.exit_required = true;
1525
1526 return vmexit;
1396} 1527}
1397 1528
1398static inline int nested_svm_intr(struct vcpu_svm *svm) 1529/* This function returns true if it is save to enable the irq window */
1530static inline bool nested_svm_intr(struct vcpu_svm *svm)
1399{ 1531{
1400 if (!is_nested(svm)) 1532 if (!is_nested(svm))
1401 return 0; 1533 return true;
1402 1534
1403 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 1535 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1404 return 0; 1536 return true;
1405 1537
1406 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) 1538 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1407 return 0; 1539 return false;
1408 1540
1409 svm->vmcb->control.exit_code = SVM_EXIT_INTR; 1541 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
1542 svm->vmcb->control.exit_info_1 = 0;
1543 svm->vmcb->control.exit_info_2 = 0;
1410 1544
1411 if (svm->nested.intercept & 1ULL) { 1545 if (svm->nested.intercept & 1ULL) {
1412 /* 1546 /*
@@ -1417,21 +1551,40 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
1417 */ 1551 */
1418 svm->nested.exit_required = true; 1552 svm->nested.exit_required = true;
1419 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); 1553 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1420 return 1; 1554 return false;
1421 } 1555 }
1422 1556
1423 return 0; 1557 return true;
1558}
1559
1560/* This function returns true if it is save to enable the nmi window */
1561static inline bool nested_svm_nmi(struct vcpu_svm *svm)
1562{
1563 if (!is_nested(svm))
1564 return true;
1565
1566 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
1567 return true;
1568
1569 svm->vmcb->control.exit_code = SVM_EXIT_NMI;
1570 svm->nested.exit_required = true;
1571
1572 return false;
1424} 1573}
1425 1574
1426static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) 1575static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
1427{ 1576{
1428 struct page *page; 1577 struct page *page;
1429 1578
1579 might_sleep();
1580
1430 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); 1581 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1431 if (is_error_page(page)) 1582 if (is_error_page(page))
1432 goto error; 1583 goto error;
1433 1584
1434 return kmap_atomic(page, idx); 1585 *_page = page;
1586
1587 return kmap(page);
1435 1588
1436error: 1589error:
1437 kvm_release_page_clean(page); 1590 kvm_release_page_clean(page);
@@ -1440,61 +1593,55 @@ error:
1440 return NULL; 1593 return NULL;
1441} 1594}
1442 1595
1443static void nested_svm_unmap(void *addr, enum km_type idx) 1596static void nested_svm_unmap(struct page *page)
1444{ 1597{
1445 struct page *page; 1598 kunmap(page);
1599 kvm_release_page_dirty(page);
1600}
1446 1601
1447 if (!addr) 1602static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
1448 return; 1603{
1604 unsigned port;
1605 u8 val, bit;
1606 u64 gpa;
1449 1607
1450 page = kmap_atomic_to_page(addr); 1608 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
1609 return NESTED_EXIT_HOST;
1451 1610
1452 kunmap_atomic(addr, idx); 1611 port = svm->vmcb->control.exit_info_1 >> 16;
1453 kvm_release_page_dirty(page); 1612 gpa = svm->nested.vmcb_iopm + (port / 8);
1613 bit = port % 8;
1614 val = 0;
1615
1616 if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1))
1617 val &= (1 << bit);
1618
1619 return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1454} 1620}
1455 1621
1456static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) 1622static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
1457{ 1623{
1458 u32 param = svm->vmcb->control.exit_info_1 & 1; 1624 u32 offset, msr, value;
1459 u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 1625 int write, mask;
1460 bool ret = false;
1461 u32 t0, t1;
1462 u8 *msrpm;
1463 1626
1464 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 1627 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1465 return false; 1628 return NESTED_EXIT_HOST;
1466 1629
1467 msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); 1630 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1631 offset = svm_msrpm_offset(msr);
1632 write = svm->vmcb->control.exit_info_1 & 1;
1633 mask = 1 << ((2 * (msr & 0xf)) + write);
1468 1634
1469 if (!msrpm) 1635 if (offset == MSR_INVALID)
1470 goto out; 1636 return NESTED_EXIT_DONE;
1471 1637
1472 switch (msr) { 1638 /* Offset is in 32 bit units but need in 8 bit units */
1473 case 0 ... 0x1fff: 1639 offset *= 4;
1474 t0 = (msr * 2) % 8;
1475 t1 = msr / 8;
1476 break;
1477 case 0xc0000000 ... 0xc0001fff:
1478 t0 = (8192 + msr - 0xc0000000) * 2;
1479 t1 = (t0 / 8);
1480 t0 %= 8;
1481 break;
1482 case 0xc0010000 ... 0xc0011fff:
1483 t0 = (16384 + msr - 0xc0010000) * 2;
1484 t1 = (t0 / 8);
1485 t0 %= 8;
1486 break;
1487 default:
1488 ret = true;
1489 goto out;
1490 }
1491 1640
1492 ret = msrpm[t1] & ((1 << param) << t0); 1641 if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
1493 1642 return NESTED_EXIT_DONE;
1494out:
1495 nested_svm_unmap(msrpm, KM_USER0);
1496 1643
1497 return ret; 1644 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1498} 1645}
1499 1646
1500static int nested_svm_exit_special(struct vcpu_svm *svm) 1647static int nested_svm_exit_special(struct vcpu_svm *svm)
@@ -1504,17 +1651,21 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
1504 switch (exit_code) { 1651 switch (exit_code) {
1505 case SVM_EXIT_INTR: 1652 case SVM_EXIT_INTR:
1506 case SVM_EXIT_NMI: 1653 case SVM_EXIT_NMI:
1654 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
1507 return NESTED_EXIT_HOST; 1655 return NESTED_EXIT_HOST;
1508 /* For now we are always handling NPFs when using them */
1509 case SVM_EXIT_NPF: 1656 case SVM_EXIT_NPF:
1657 /* For now we are always handling NPFs when using them */
1510 if (npt_enabled) 1658 if (npt_enabled)
1511 return NESTED_EXIT_HOST; 1659 return NESTED_EXIT_HOST;
1512 break; 1660 break;
1513 /* When we're shadowing, trap PFs */
1514 case SVM_EXIT_EXCP_BASE + PF_VECTOR: 1661 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1662 /* When we're shadowing, trap PFs */
1515 if (!npt_enabled) 1663 if (!npt_enabled)
1516 return NESTED_EXIT_HOST; 1664 return NESTED_EXIT_HOST;
1517 break; 1665 break;
1666 case SVM_EXIT_EXCP_BASE + NM_VECTOR:
1667 nm_interception(svm);
1668 break;
1518 default: 1669 default:
1519 break; 1670 break;
1520 } 1671 }
@@ -1525,7 +1676,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
1525/* 1676/*
1526 * If this function returns true, this #vmexit was already handled 1677 * If this function returns true, this #vmexit was already handled
1527 */ 1678 */
1528static int nested_svm_exit_handled(struct vcpu_svm *svm) 1679static int nested_svm_intercept(struct vcpu_svm *svm)
1529{ 1680{
1530 u32 exit_code = svm->vmcb->control.exit_code; 1681 u32 exit_code = svm->vmcb->control.exit_code;
1531 int vmexit = NESTED_EXIT_HOST; 1682 int vmexit = NESTED_EXIT_HOST;
@@ -1534,6 +1685,9 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
1534 case SVM_EXIT_MSR: 1685 case SVM_EXIT_MSR:
1535 vmexit = nested_svm_exit_handled_msr(svm); 1686 vmexit = nested_svm_exit_handled_msr(svm);
1536 break; 1687 break;
1688 case SVM_EXIT_IOIO:
1689 vmexit = nested_svm_intercept_ioio(svm);
1690 break;
1537 case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { 1691 case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
1538 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); 1692 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
1539 if (svm->nested.intercept_cr_read & cr_bits) 1693 if (svm->nested.intercept_cr_read & cr_bits)
@@ -1564,6 +1718,10 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
1564 vmexit = NESTED_EXIT_DONE; 1718 vmexit = NESTED_EXIT_DONE;
1565 break; 1719 break;
1566 } 1720 }
1721 case SVM_EXIT_ERR: {
1722 vmexit = NESTED_EXIT_DONE;
1723 break;
1724 }
1567 default: { 1725 default: {
1568 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 1726 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1569 if (svm->nested.intercept & exit_bits) 1727 if (svm->nested.intercept & exit_bits)
@@ -1571,9 +1729,17 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
1571 } 1729 }
1572 } 1730 }
1573 1731
1574 if (vmexit == NESTED_EXIT_DONE) { 1732 return vmexit;
1733}
1734
1735static int nested_svm_exit_handled(struct vcpu_svm *svm)
1736{
1737 int vmexit;
1738
1739 vmexit = nested_svm_intercept(svm);
1740
1741 if (vmexit == NESTED_EXIT_DONE)
1575 nested_svm_vmexit(svm); 1742 nested_svm_vmexit(svm);
1576 }
1577 1743
1578 return vmexit; 1744 return vmexit;
1579} 1745}
@@ -1615,6 +1781,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1615 struct vmcb *nested_vmcb; 1781 struct vmcb *nested_vmcb;
1616 struct vmcb *hsave = svm->nested.hsave; 1782 struct vmcb *hsave = svm->nested.hsave;
1617 struct vmcb *vmcb = svm->vmcb; 1783 struct vmcb *vmcb = svm->vmcb;
1784 struct page *page;
1618 1785
1619 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, 1786 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
1620 vmcb->control.exit_info_1, 1787 vmcb->control.exit_info_1,
@@ -1622,10 +1789,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1622 vmcb->control.exit_int_info, 1789 vmcb->control.exit_int_info,
1623 vmcb->control.exit_int_info_err); 1790 vmcb->control.exit_int_info_err);
1624 1791
1625 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); 1792 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
1626 if (!nested_vmcb) 1793 if (!nested_vmcb)
1627 return 1; 1794 return 1;
1628 1795
1796 /* Exit nested SVM mode */
1797 svm->nested.vmcb = 0;
1798
1629 /* Give the current vmcb to the guest */ 1799 /* Give the current vmcb to the guest */
1630 disable_gif(svm); 1800 disable_gif(svm);
1631 1801
@@ -1635,9 +1805,10 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1635 nested_vmcb->save.ds = vmcb->save.ds; 1805 nested_vmcb->save.ds = vmcb->save.ds;
1636 nested_vmcb->save.gdtr = vmcb->save.gdtr; 1806 nested_vmcb->save.gdtr = vmcb->save.gdtr;
1637 nested_vmcb->save.idtr = vmcb->save.idtr; 1807 nested_vmcb->save.idtr = vmcb->save.idtr;
1638 if (npt_enabled) 1808 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
1639 nested_vmcb->save.cr3 = vmcb->save.cr3; 1809 nested_vmcb->save.cr3 = svm->vcpu.arch.cr3;
1640 nested_vmcb->save.cr2 = vmcb->save.cr2; 1810 nested_vmcb->save.cr2 = vmcb->save.cr2;
1811 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
1641 nested_vmcb->save.rflags = vmcb->save.rflags; 1812 nested_vmcb->save.rflags = vmcb->save.rflags;
1642 nested_vmcb->save.rip = vmcb->save.rip; 1813 nested_vmcb->save.rip = vmcb->save.rip;
1643 nested_vmcb->save.rsp = vmcb->save.rsp; 1814 nested_vmcb->save.rsp = vmcb->save.rsp;
@@ -1709,10 +1880,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1709 svm->vmcb->save.cpl = 0; 1880 svm->vmcb->save.cpl = 0;
1710 svm->vmcb->control.exit_int_info = 0; 1881 svm->vmcb->control.exit_int_info = 0;
1711 1882
1712 /* Exit nested SVM mode */ 1883 nested_svm_unmap(page);
1713 svm->nested.vmcb = 0;
1714
1715 nested_svm_unmap(nested_vmcb, KM_USER0);
1716 1884
1717 kvm_mmu_reset_context(&svm->vcpu); 1885 kvm_mmu_reset_context(&svm->vcpu);
1718 kvm_mmu_load(&svm->vcpu); 1886 kvm_mmu_load(&svm->vcpu);
@@ -1722,19 +1890,33 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1722 1890
1723static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) 1891static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
1724{ 1892{
1725 u32 *nested_msrpm; 1893 /*
1894 * This function merges the msr permission bitmaps of kvm and the
1895 * nested vmcb. It is omptimized in that it only merges the parts where
1896 * the kvm msr permission bitmap may contain zero bits
1897 */
1726 int i; 1898 int i;
1727 1899
1728 nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); 1900 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1729 if (!nested_msrpm) 1901 return true;
1730 return false;
1731 1902
1732 for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) 1903 for (i = 0; i < MSRPM_OFFSETS; i++) {
1733 svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; 1904 u32 value, p;
1905 u64 offset;
1734 1906
1735 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); 1907 if (msrpm_offsets[i] == 0xffffffff)
1908 break;
1909
1910 p = msrpm_offsets[i];
1911 offset = svm->nested.vmcb_msrpm + (p * 4);
1912
1913 if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
1914 return false;
1915
1916 svm->nested.msrpm[p] = svm->msrpm[p] | value;
1917 }
1736 1918
1737 nested_svm_unmap(nested_msrpm, KM_USER0); 1919 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
1738 1920
1739 return true; 1921 return true;
1740} 1922}
@@ -1744,26 +1926,34 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1744 struct vmcb *nested_vmcb; 1926 struct vmcb *nested_vmcb;
1745 struct vmcb *hsave = svm->nested.hsave; 1927 struct vmcb *hsave = svm->nested.hsave;
1746 struct vmcb *vmcb = svm->vmcb; 1928 struct vmcb *vmcb = svm->vmcb;
1929 struct page *page;
1930 u64 vmcb_gpa;
1747 1931
1748 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); 1932 vmcb_gpa = svm->vmcb->save.rax;
1933
1934 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
1749 if (!nested_vmcb) 1935 if (!nested_vmcb)
1750 return false; 1936 return false;
1751 1937
1752 /* nested_vmcb is our indicator if nested SVM is activated */ 1938 trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa,
1753 svm->nested.vmcb = svm->vmcb->save.rax;
1754
1755 trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
1756 nested_vmcb->save.rip, 1939 nested_vmcb->save.rip,
1757 nested_vmcb->control.int_ctl, 1940 nested_vmcb->control.int_ctl,
1758 nested_vmcb->control.event_inj, 1941 nested_vmcb->control.event_inj,
1759 nested_vmcb->control.nested_ctl); 1942 nested_vmcb->control.nested_ctl);
1760 1943
1944 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read,
1945 nested_vmcb->control.intercept_cr_write,
1946 nested_vmcb->control.intercept_exceptions,
1947 nested_vmcb->control.intercept);
1948
1761 /* Clear internal status */ 1949 /* Clear internal status */
1762 kvm_clear_exception_queue(&svm->vcpu); 1950 kvm_clear_exception_queue(&svm->vcpu);
1763 kvm_clear_interrupt_queue(&svm->vcpu); 1951 kvm_clear_interrupt_queue(&svm->vcpu);
1764 1952
1765 /* Save the old vmcb, so we don't need to pick what we save, but 1953 /*
1766 can restore everything when a VMEXIT occurs */ 1954 * Save the old vmcb, so we don't need to pick what we save, but can
1955 * restore everything when a VMEXIT occurs
1956 */
1767 hsave->save.es = vmcb->save.es; 1957 hsave->save.es = vmcb->save.es;
1768 hsave->save.cs = vmcb->save.cs; 1958 hsave->save.cs = vmcb->save.cs;
1769 hsave->save.ss = vmcb->save.ss; 1959 hsave->save.ss = vmcb->save.ss;
@@ -1803,14 +1993,17 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1803 if (npt_enabled) { 1993 if (npt_enabled) {
1804 svm->vmcb->save.cr3 = nested_vmcb->save.cr3; 1994 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
1805 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; 1995 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
1806 } else { 1996 } else
1807 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 1997 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
1808 kvm_mmu_reset_context(&svm->vcpu); 1998
1809 } 1999 /* Guest paging mode is active - reset mmu */
2000 kvm_mmu_reset_context(&svm->vcpu);
2001
1810 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; 2002 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
1811 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); 2003 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
1812 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); 2004 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
1813 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); 2005 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
2006
1814 /* In case we don't even reach vcpu_run, the fields are not updated */ 2007 /* In case we don't even reach vcpu_run, the fields are not updated */
1815 svm->vmcb->save.rax = nested_vmcb->save.rax; 2008 svm->vmcb->save.rax = nested_vmcb->save.rax;
1816 svm->vmcb->save.rsp = nested_vmcb->save.rsp; 2009 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
@@ -1819,22 +2012,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1819 svm->vmcb->save.dr6 = nested_vmcb->save.dr6; 2012 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
1820 svm->vmcb->save.cpl = nested_vmcb->save.cpl; 2013 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
1821 2014
1822 /* We don't want a nested guest to be more powerful than the guest, 2015 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
1823 so all intercepts are ORed */ 2016 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
1824 svm->vmcb->control.intercept_cr_read |=
1825 nested_vmcb->control.intercept_cr_read;
1826 svm->vmcb->control.intercept_cr_write |=
1827 nested_vmcb->control.intercept_cr_write;
1828 svm->vmcb->control.intercept_dr_read |=
1829 nested_vmcb->control.intercept_dr_read;
1830 svm->vmcb->control.intercept_dr_write |=
1831 nested_vmcb->control.intercept_dr_write;
1832 svm->vmcb->control.intercept_exceptions |=
1833 nested_vmcb->control.intercept_exceptions;
1834
1835 svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
1836
1837 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
1838 2017
1839 /* cache intercepts */ 2018 /* cache intercepts */
1840 svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; 2019 svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read;
@@ -1851,13 +2030,43 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1851 else 2030 else
1852 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; 2031 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
1853 2032
2033 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
2034 /* We only want the cr8 intercept bits of the guest */
2035 svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK;
2036 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
2037 }
2038
2039 /* We don't want to see VMMCALLs from a nested guest */
2040 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL);
2041
2042 /*
2043 * We don't want a nested guest to be more powerful than the guest, so
2044 * all intercepts are ORed
2045 */
2046 svm->vmcb->control.intercept_cr_read |=
2047 nested_vmcb->control.intercept_cr_read;
2048 svm->vmcb->control.intercept_cr_write |=
2049 nested_vmcb->control.intercept_cr_write;
2050 svm->vmcb->control.intercept_dr_read |=
2051 nested_vmcb->control.intercept_dr_read;
2052 svm->vmcb->control.intercept_dr_write |=
2053 nested_vmcb->control.intercept_dr_write;
2054 svm->vmcb->control.intercept_exceptions |=
2055 nested_vmcb->control.intercept_exceptions;
2056
2057 svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
2058
2059 svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
1854 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 2060 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
1855 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 2061 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
1856 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; 2062 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
1857 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 2063 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
1858 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 2064 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
1859 2065
1860 nested_svm_unmap(nested_vmcb, KM_USER0); 2066 nested_svm_unmap(page);
2067
2068 /* nested_vmcb is our indicator if nested SVM is activated */
2069 svm->nested.vmcb = vmcb_gpa;
1861 2070
1862 enable_gif(svm); 2071 enable_gif(svm);
1863 2072
@@ -1883,6 +2092,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1883static int vmload_interception(struct vcpu_svm *svm) 2092static int vmload_interception(struct vcpu_svm *svm)
1884{ 2093{
1885 struct vmcb *nested_vmcb; 2094 struct vmcb *nested_vmcb;
2095 struct page *page;
1886 2096
1887 if (nested_svm_check_permissions(svm)) 2097 if (nested_svm_check_permissions(svm))
1888 return 1; 2098 return 1;
@@ -1890,12 +2100,12 @@ static int vmload_interception(struct vcpu_svm *svm)
1890 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2100 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1891 skip_emulated_instruction(&svm->vcpu); 2101 skip_emulated_instruction(&svm->vcpu);
1892 2102
1893 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); 2103 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
1894 if (!nested_vmcb) 2104 if (!nested_vmcb)
1895 return 1; 2105 return 1;
1896 2106
1897 nested_svm_vmloadsave(nested_vmcb, svm->vmcb); 2107 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
1898 nested_svm_unmap(nested_vmcb, KM_USER0); 2108 nested_svm_unmap(page);
1899 2109
1900 return 1; 2110 return 1;
1901} 2111}
@@ -1903,6 +2113,7 @@ static int vmload_interception(struct vcpu_svm *svm)
1903static int vmsave_interception(struct vcpu_svm *svm) 2113static int vmsave_interception(struct vcpu_svm *svm)
1904{ 2114{
1905 struct vmcb *nested_vmcb; 2115 struct vmcb *nested_vmcb;
2116 struct page *page;
1906 2117
1907 if (nested_svm_check_permissions(svm)) 2118 if (nested_svm_check_permissions(svm))
1908 return 1; 2119 return 1;
@@ -1910,12 +2121,12 @@ static int vmsave_interception(struct vcpu_svm *svm)
1910 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2121 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1911 skip_emulated_instruction(&svm->vcpu); 2122 skip_emulated_instruction(&svm->vcpu);
1912 2123
1913 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); 2124 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
1914 if (!nested_vmcb) 2125 if (!nested_vmcb)
1915 return 1; 2126 return 1;
1916 2127
1917 nested_svm_vmloadsave(svm->vmcb, nested_vmcb); 2128 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
1918 nested_svm_unmap(nested_vmcb, KM_USER0); 2129 nested_svm_unmap(page);
1919 2130
1920 return 1; 2131 return 1;
1921} 2132}
@@ -2018,6 +2229,8 @@ static int task_switch_interception(struct vcpu_svm *svm)
2018 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; 2229 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2019 uint32_t idt_v = 2230 uint32_t idt_v =
2020 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; 2231 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2232 bool has_error_code = false;
2233 u32 error_code = 0;
2021 2234
2022 tss_selector = (u16)svm->vmcb->control.exit_info_1; 2235 tss_selector = (u16)svm->vmcb->control.exit_info_1;
2023 2236
@@ -2038,6 +2251,12 @@ static int task_switch_interception(struct vcpu_svm *svm)
2038 svm->vcpu.arch.nmi_injected = false; 2251 svm->vcpu.arch.nmi_injected = false;
2039 break; 2252 break;
2040 case SVM_EXITINTINFO_TYPE_EXEPT: 2253 case SVM_EXITINTINFO_TYPE_EXEPT:
2254 if (svm->vmcb->control.exit_info_2 &
2255 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2256 has_error_code = true;
2257 error_code =
2258 (u32)svm->vmcb->control.exit_info_2;
2259 }
2041 kvm_clear_exception_queue(&svm->vcpu); 2260 kvm_clear_exception_queue(&svm->vcpu);
2042 break; 2261 break;
2043 case SVM_EXITINTINFO_TYPE_INTR: 2262 case SVM_EXITINTINFO_TYPE_INTR:
@@ -2054,7 +2273,14 @@ static int task_switch_interception(struct vcpu_svm *svm)
2054 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) 2273 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2055 skip_emulated_instruction(&svm->vcpu); 2274 skip_emulated_instruction(&svm->vcpu);
2056 2275
2057 return kvm_task_switch(&svm->vcpu, tss_selector, reason); 2276 if (kvm_task_switch(&svm->vcpu, tss_selector, reason,
2277 has_error_code, error_code) == EMULATE_FAIL) {
2278 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2279 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
2280 svm->vcpu.run->internal.ndata = 0;
2281 return 0;
2282 }
2283 return 1;
2058} 2284}
2059 2285
2060static int cpuid_interception(struct vcpu_svm *svm) 2286static int cpuid_interception(struct vcpu_svm *svm)
@@ -2145,9 +2371,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2145 case MSR_IA32_SYSENTER_ESP: 2371 case MSR_IA32_SYSENTER_ESP:
2146 *data = svm->sysenter_esp; 2372 *data = svm->sysenter_esp;
2147 break; 2373 break;
2148 /* Nobody will change the following 5 values in the VMCB so 2374 /*
2149 we can safely return them on rdmsr. They will always be 0 2375 * Nobody will change the following 5 values in the VMCB so we can
2150 until LBRV is implemented. */ 2376 * safely return them on rdmsr. They will always be 0 until LBRV is
2377 * implemented.
2378 */
2151 case MSR_IA32_DEBUGCTLMSR: 2379 case MSR_IA32_DEBUGCTLMSR:
2152 *data = svm->vmcb->save.dbgctl; 2380 *data = svm->vmcb->save.dbgctl;
2153 break; 2381 break;
@@ -2167,7 +2395,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2167 *data = svm->nested.hsave_msr; 2395 *data = svm->nested.hsave_msr;
2168 break; 2396 break;
2169 case MSR_VM_CR: 2397 case MSR_VM_CR:
2170 *data = 0; 2398 *data = svm->nested.vm_cr_msr;
2171 break; 2399 break;
2172 case MSR_IA32_UCODE_REV: 2400 case MSR_IA32_UCODE_REV:
2173 *data = 0x01000065; 2401 *data = 0x01000065;
@@ -2197,6 +2425,31 @@ static int rdmsr_interception(struct vcpu_svm *svm)
2197 return 1; 2425 return 1;
2198} 2426}
2199 2427
2428static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
2429{
2430 struct vcpu_svm *svm = to_svm(vcpu);
2431 int svm_dis, chg_mask;
2432
2433 if (data & ~SVM_VM_CR_VALID_MASK)
2434 return 1;
2435
2436 chg_mask = SVM_VM_CR_VALID_MASK;
2437
2438 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
2439 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
2440
2441 svm->nested.vm_cr_msr &= ~chg_mask;
2442 svm->nested.vm_cr_msr |= (data & chg_mask);
2443
2444 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
2445
2446 /* check for svm_disable while efer.svme is set */
2447 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
2448 return 1;
2449
2450 return 0;
2451}
2452
2200static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) 2453static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2201{ 2454{
2202 struct vcpu_svm *svm = to_svm(vcpu); 2455 struct vcpu_svm *svm = to_svm(vcpu);
@@ -2263,6 +2516,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2263 svm->nested.hsave_msr = data; 2516 svm->nested.hsave_msr = data;
2264 break; 2517 break;
2265 case MSR_VM_CR: 2518 case MSR_VM_CR:
2519 return svm_set_vm_cr(vcpu, data);
2266 case MSR_VM_IGNNE: 2520 case MSR_VM_IGNNE:
2267 pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 2521 pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
2268 break; 2522 break;
@@ -2326,16 +2580,16 @@ static int pause_interception(struct vcpu_svm *svm)
2326} 2580}
2327 2581
2328static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { 2582static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2329 [SVM_EXIT_READ_CR0] = emulate_on_interception, 2583 [SVM_EXIT_READ_CR0] = emulate_on_interception,
2330 [SVM_EXIT_READ_CR3] = emulate_on_interception, 2584 [SVM_EXIT_READ_CR3] = emulate_on_interception,
2331 [SVM_EXIT_READ_CR4] = emulate_on_interception, 2585 [SVM_EXIT_READ_CR4] = emulate_on_interception,
2332 [SVM_EXIT_READ_CR8] = emulate_on_interception, 2586 [SVM_EXIT_READ_CR8] = emulate_on_interception,
2333 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, 2587 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
2334 [SVM_EXIT_WRITE_CR0] = emulate_on_interception, 2588 [SVM_EXIT_WRITE_CR0] = emulate_on_interception,
2335 [SVM_EXIT_WRITE_CR3] = emulate_on_interception, 2589 [SVM_EXIT_WRITE_CR3] = emulate_on_interception,
2336 [SVM_EXIT_WRITE_CR4] = emulate_on_interception, 2590 [SVM_EXIT_WRITE_CR4] = emulate_on_interception,
2337 [SVM_EXIT_WRITE_CR8] = cr8_write_interception, 2591 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
2338 [SVM_EXIT_READ_DR0] = emulate_on_interception, 2592 [SVM_EXIT_READ_DR0] = emulate_on_interception,
2339 [SVM_EXIT_READ_DR1] = emulate_on_interception, 2593 [SVM_EXIT_READ_DR1] = emulate_on_interception,
2340 [SVM_EXIT_READ_DR2] = emulate_on_interception, 2594 [SVM_EXIT_READ_DR2] = emulate_on_interception,
2341 [SVM_EXIT_READ_DR3] = emulate_on_interception, 2595 [SVM_EXIT_READ_DR3] = emulate_on_interception,
@@ -2354,15 +2608,14 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2354 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, 2608 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
2355 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, 2609 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
2356 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 2610 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
2357 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 2611 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
2358 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 2612 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
2359 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, 2613 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
2360 [SVM_EXIT_INTR] = intr_interception, 2614 [SVM_EXIT_INTR] = intr_interception,
2361 [SVM_EXIT_NMI] = nmi_interception, 2615 [SVM_EXIT_NMI] = nmi_interception,
2362 [SVM_EXIT_SMI] = nop_on_interception, 2616 [SVM_EXIT_SMI] = nop_on_interception,
2363 [SVM_EXIT_INIT] = nop_on_interception, 2617 [SVM_EXIT_INIT] = nop_on_interception,
2364 [SVM_EXIT_VINTR] = interrupt_window_interception, 2618 [SVM_EXIT_VINTR] = interrupt_window_interception,
2365 /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */
2366 [SVM_EXIT_CPUID] = cpuid_interception, 2619 [SVM_EXIT_CPUID] = cpuid_interception,
2367 [SVM_EXIT_IRET] = iret_interception, 2620 [SVM_EXIT_IRET] = iret_interception,
2368 [SVM_EXIT_INVD] = emulate_on_interception, 2621 [SVM_EXIT_INVD] = emulate_on_interception,
@@ -2370,7 +2623,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2370 [SVM_EXIT_HLT] = halt_interception, 2623 [SVM_EXIT_HLT] = halt_interception,
2371 [SVM_EXIT_INVLPG] = invlpg_interception, 2624 [SVM_EXIT_INVLPG] = invlpg_interception,
2372 [SVM_EXIT_INVLPGA] = invlpga_interception, 2625 [SVM_EXIT_INVLPGA] = invlpga_interception,
2373 [SVM_EXIT_IOIO] = io_interception, 2626 [SVM_EXIT_IOIO] = io_interception,
2374 [SVM_EXIT_MSR] = msr_interception, 2627 [SVM_EXIT_MSR] = msr_interception,
2375 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 2628 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
2376 [SVM_EXIT_SHUTDOWN] = shutdown_interception, 2629 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
@@ -2393,7 +2646,12 @@ static int handle_exit(struct kvm_vcpu *vcpu)
2393 struct kvm_run *kvm_run = vcpu->run; 2646 struct kvm_run *kvm_run = vcpu->run;
2394 u32 exit_code = svm->vmcb->control.exit_code; 2647 u32 exit_code = svm->vmcb->control.exit_code;
2395 2648
2396 trace_kvm_exit(exit_code, svm->vmcb->save.rip); 2649 trace_kvm_exit(exit_code, vcpu);
2650
2651 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
2652 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2653 if (npt_enabled)
2654 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2397 2655
2398 if (unlikely(svm->nested.exit_required)) { 2656 if (unlikely(svm->nested.exit_required)) {
2399 nested_svm_vmexit(svm); 2657 nested_svm_vmexit(svm);
@@ -2422,11 +2680,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
2422 2680
2423 svm_complete_interrupts(svm); 2681 svm_complete_interrupts(svm);
2424 2682
2425 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
2426 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2427 if (npt_enabled)
2428 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2429
2430 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 2683 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
2431 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2684 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2432 kvm_run->fail_entry.hardware_entry_failure_reason 2685 kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2511,6 +2764,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
2511{ 2764{
2512 struct vcpu_svm *svm = to_svm(vcpu); 2765 struct vcpu_svm *svm = to_svm(vcpu);
2513 2766
2767 if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
2768 return;
2769
2514 if (irr == -1) 2770 if (irr == -1)
2515 return; 2771 return;
2516 2772
@@ -2522,8 +2778,12 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
2522{ 2778{
2523 struct vcpu_svm *svm = to_svm(vcpu); 2779 struct vcpu_svm *svm = to_svm(vcpu);
2524 struct vmcb *vmcb = svm->vmcb; 2780 struct vmcb *vmcb = svm->vmcb;
2525 return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && 2781 int ret;
2526 !(svm->vcpu.arch.hflags & HF_NMI_MASK); 2782 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2783 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
2784 ret = ret && gif_set(svm) && nested_svm_nmi(svm);
2785
2786 return ret;
2527} 2787}
2528 2788
2529static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) 2789static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -2568,13 +2828,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
2568{ 2828{
2569 struct vcpu_svm *svm = to_svm(vcpu); 2829 struct vcpu_svm *svm = to_svm(vcpu);
2570 2830
2571 nested_svm_intr(svm); 2831 /*
2572 2832 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
2573 /* In case GIF=0 we can't rely on the CPU to tell us when 2833 * 1, because that's a separate STGI/VMRUN intercept. The next time we
2574 * GIF becomes 1, because that's a separate STGI/VMRUN intercept. 2834 * get that intercept, this function will be called again though and
2575 * The next time we get that intercept, this function will be 2835 * we'll get the vintr intercept.
2576 * called again though and we'll get the vintr intercept. */ 2836 */
2577 if (gif_set(svm)) { 2837 if (gif_set(svm) && nested_svm_intr(svm)) {
2578 svm_set_vintr(svm); 2838 svm_set_vintr(svm);
2579 svm_inject_irq(svm, 0x0); 2839 svm_inject_irq(svm, 0x0);
2580 } 2840 }
@@ -2588,9 +2848,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
2588 == HF_NMI_MASK) 2848 == HF_NMI_MASK)
2589 return; /* IRET will cause a vm exit */ 2849 return; /* IRET will cause a vm exit */
2590 2850
2591 /* Something prevents NMI from been injected. Single step over 2851 /*
2592 possible problem (IRET or exception injection or interrupt 2852 * Something prevents NMI from been injected. Single step over possible
2593 shadow) */ 2853 * problem (IRET or exception injection or interrupt shadow)
2854 */
2594 svm->nmi_singlestep = true; 2855 svm->nmi_singlestep = true;
2595 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); 2856 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2596 update_db_intercept(vcpu); 2857 update_db_intercept(vcpu);
@@ -2614,6 +2875,9 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
2614{ 2875{
2615 struct vcpu_svm *svm = to_svm(vcpu); 2876 struct vcpu_svm *svm = to_svm(vcpu);
2616 2877
2878 if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
2879 return;
2880
2617 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { 2881 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
2618 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; 2882 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
2619 kvm_set_cr8(vcpu, cr8); 2883 kvm_set_cr8(vcpu, cr8);
@@ -2625,6 +2889,9 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
2625 struct vcpu_svm *svm = to_svm(vcpu); 2889 struct vcpu_svm *svm = to_svm(vcpu);
2626 u64 cr8; 2890 u64 cr8;
2627 2891
2892 if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
2893 return;
2894
2628 cr8 = kvm_get_cr8(vcpu); 2895 cr8 = kvm_get_cr8(vcpu);
2629 svm->vmcb->control.int_ctl &= ~V_TPR_MASK; 2896 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
2630 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; 2897 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
@@ -2635,6 +2902,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
2635 u8 vector; 2902 u8 vector;
2636 int type; 2903 int type;
2637 u32 exitintinfo = svm->vmcb->control.exit_int_info; 2904 u32 exitintinfo = svm->vmcb->control.exit_int_info;
2905 unsigned int3_injected = svm->int3_injected;
2906
2907 svm->int3_injected = 0;
2638 2908
2639 if (svm->vcpu.arch.hflags & HF_IRET_MASK) 2909 if (svm->vcpu.arch.hflags & HF_IRET_MASK)
2640 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); 2910 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
@@ -2654,18 +2924,25 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
2654 svm->vcpu.arch.nmi_injected = true; 2924 svm->vcpu.arch.nmi_injected = true;
2655 break; 2925 break;
2656 case SVM_EXITINTINFO_TYPE_EXEPT: 2926 case SVM_EXITINTINFO_TYPE_EXEPT:
2657 /* In case of software exception do not reinject an exception 2927 /*
2658 vector, but re-execute and instruction instead */ 2928 * In case of software exceptions, do not reinject the vector,
2659 if (is_nested(svm)) 2929 * but re-execute the instruction instead. Rewind RIP first
2660 break; 2930 * if we emulated INT3 before.
2661 if (kvm_exception_is_soft(vector)) 2931 */
2932 if (kvm_exception_is_soft(vector)) {
2933 if (vector == BP_VECTOR && int3_injected &&
2934 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
2935 kvm_rip_write(&svm->vcpu,
2936 kvm_rip_read(&svm->vcpu) -
2937 int3_injected);
2662 break; 2938 break;
2939 }
2663 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { 2940 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
2664 u32 err = svm->vmcb->control.exit_int_info_err; 2941 u32 err = svm->vmcb->control.exit_int_info_err;
2665 kvm_queue_exception_e(&svm->vcpu, vector, err); 2942 kvm_requeue_exception_e(&svm->vcpu, vector, err);
2666 2943
2667 } else 2944 } else
2668 kvm_queue_exception(&svm->vcpu, vector); 2945 kvm_requeue_exception(&svm->vcpu, vector);
2669 break; 2946 break;
2670 case SVM_EXITINTINFO_TYPE_INTR: 2947 case SVM_EXITINTINFO_TYPE_INTR:
2671 kvm_queue_interrupt(&svm->vcpu, vector, false); 2948 kvm_queue_interrupt(&svm->vcpu, vector, false);
@@ -2688,6 +2965,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2688 u16 gs_selector; 2965 u16 gs_selector;
2689 u16 ldt_selector; 2966 u16 ldt_selector;
2690 2967
2968 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
2969 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
2970 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
2971
2691 /* 2972 /*
2692 * A vmexit emulation is required before the vcpu can be executed 2973 * A vmexit emulation is required before the vcpu can be executed
2693 * again. 2974 * again.
@@ -2695,10 +2976,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2695 if (unlikely(svm->nested.exit_required)) 2976 if (unlikely(svm->nested.exit_required))
2696 return; 2977 return;
2697 2978
2698 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
2699 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
2700 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
2701
2702 pre_svm_run(svm); 2979 pre_svm_run(svm);
2703 2980
2704 sync_lapic_to_cr8(vcpu); 2981 sync_lapic_to_cr8(vcpu);
@@ -2879,25 +3156,39 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
2879{ 3156{
2880} 3157}
2881 3158
3159static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3160{
3161 switch (func) {
3162 case 0x8000000A:
3163 entry->eax = 1; /* SVM revision 1 */
3164 entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
3165 ASID emulation to nested SVM */
3166 entry->ecx = 0; /* Reserved */
3167 entry->edx = 0; /* Do not support any additional features */
3168
3169 break;
3170 }
3171}
3172
2882static const struct trace_print_flags svm_exit_reasons_str[] = { 3173static const struct trace_print_flags svm_exit_reasons_str[] = {
2883 { SVM_EXIT_READ_CR0, "read_cr0" }, 3174 { SVM_EXIT_READ_CR0, "read_cr0" },
2884 { SVM_EXIT_READ_CR3, "read_cr3" }, 3175 { SVM_EXIT_READ_CR3, "read_cr3" },
2885 { SVM_EXIT_READ_CR4, "read_cr4" }, 3176 { SVM_EXIT_READ_CR4, "read_cr4" },
2886 { SVM_EXIT_READ_CR8, "read_cr8" }, 3177 { SVM_EXIT_READ_CR8, "read_cr8" },
2887 { SVM_EXIT_WRITE_CR0, "write_cr0" }, 3178 { SVM_EXIT_WRITE_CR0, "write_cr0" },
2888 { SVM_EXIT_WRITE_CR3, "write_cr3" }, 3179 { SVM_EXIT_WRITE_CR3, "write_cr3" },
2889 { SVM_EXIT_WRITE_CR4, "write_cr4" }, 3180 { SVM_EXIT_WRITE_CR4, "write_cr4" },
2890 { SVM_EXIT_WRITE_CR8, "write_cr8" }, 3181 { SVM_EXIT_WRITE_CR8, "write_cr8" },
2891 { SVM_EXIT_READ_DR0, "read_dr0" }, 3182 { SVM_EXIT_READ_DR0, "read_dr0" },
2892 { SVM_EXIT_READ_DR1, "read_dr1" }, 3183 { SVM_EXIT_READ_DR1, "read_dr1" },
2893 { SVM_EXIT_READ_DR2, "read_dr2" }, 3184 { SVM_EXIT_READ_DR2, "read_dr2" },
2894 { SVM_EXIT_READ_DR3, "read_dr3" }, 3185 { SVM_EXIT_READ_DR3, "read_dr3" },
2895 { SVM_EXIT_WRITE_DR0, "write_dr0" }, 3186 { SVM_EXIT_WRITE_DR0, "write_dr0" },
2896 { SVM_EXIT_WRITE_DR1, "write_dr1" }, 3187 { SVM_EXIT_WRITE_DR1, "write_dr1" },
2897 { SVM_EXIT_WRITE_DR2, "write_dr2" }, 3188 { SVM_EXIT_WRITE_DR2, "write_dr2" },
2898 { SVM_EXIT_WRITE_DR3, "write_dr3" }, 3189 { SVM_EXIT_WRITE_DR3, "write_dr3" },
2899 { SVM_EXIT_WRITE_DR5, "write_dr5" }, 3190 { SVM_EXIT_WRITE_DR5, "write_dr5" },
2900 { SVM_EXIT_WRITE_DR7, "write_dr7" }, 3191 { SVM_EXIT_WRITE_DR7, "write_dr7" },
2901 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, 3192 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" },
2902 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, 3193 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" },
2903 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, 3194 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" },
@@ -2946,8 +3237,10 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
2946{ 3237{
2947 struct vcpu_svm *svm = to_svm(vcpu); 3238 struct vcpu_svm *svm = to_svm(vcpu);
2948 3239
2949 update_cr0_intercept(svm);
2950 svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; 3240 svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
3241 if (is_nested(svm))
3242 svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR;
3243 update_cr0_intercept(svm);
2951} 3244}
2952 3245
2953static struct kvm_x86_ops svm_x86_ops = { 3246static struct kvm_x86_ops svm_x86_ops = {
@@ -2986,8 +3279,7 @@ static struct kvm_x86_ops svm_x86_ops = {
2986 .set_idt = svm_set_idt, 3279 .set_idt = svm_set_idt,
2987 .get_gdt = svm_get_gdt, 3280 .get_gdt = svm_get_gdt,
2988 .set_gdt = svm_set_gdt, 3281 .set_gdt = svm_set_gdt,
2989 .get_dr = svm_get_dr, 3282 .set_dr7 = svm_set_dr7,
2990 .set_dr = svm_set_dr,
2991 .cache_reg = svm_cache_reg, 3283 .cache_reg = svm_cache_reg,
2992 .get_rflags = svm_get_rflags, 3284 .get_rflags = svm_get_rflags,
2993 .set_rflags = svm_set_rflags, 3285 .set_rflags = svm_set_rflags,
@@ -3023,12 +3315,14 @@ static struct kvm_x86_ops svm_x86_ops = {
3023 .cpuid_update = svm_cpuid_update, 3315 .cpuid_update = svm_cpuid_update,
3024 3316
3025 .rdtscp_supported = svm_rdtscp_supported, 3317 .rdtscp_supported = svm_rdtscp_supported,
3318
3319 .set_supported_cpuid = svm_set_supported_cpuid,
3026}; 3320};
3027 3321
3028static int __init svm_init(void) 3322static int __init svm_init(void)
3029{ 3323{
3030 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), 3324 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
3031 THIS_MODULE); 3325 __alignof__(struct vcpu_svm), THIS_MODULE);
3032} 3326}
3033 3327
3034static void __exit svm_exit(void) 3328static void __exit svm_exit(void)