aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-04 12:30:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-04 12:30:33 -0400
commitecefbd94b834fa32559d854646d777c56749ef1c (patch)
treeca8958900ad9e208a8e5fb7704f1b66dc76131b4 /arch/x86/kvm/vmx.c
parentce57e981f2b996aaca2031003b3f866368307766 (diff)
parent3d11df7abbff013b811d5615320580cd5d9d7d31 (diff)
Merge tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Avi Kivity: "Highlights of the changes for this release include support for vfio level triggered interrupts, improved big real mode support on older Intels, a streamlines guest page table walker, guest APIC speedups, PIO optimizations, better overcommit handling, and read-only memory." * tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits) KVM: s390: Fix vcpu_load handling in interrupt code KVM: x86: Fix guest debug across vcpu INIT reset KVM: Add resampling irqfds for level triggered interrupts KVM: optimize apic interrupt delivery KVM: MMU: Eliminate pointless temporary 'ac' KVM: MMU: Avoid access/dirty update loop if all is well KVM: MMU: Eliminate eperm temporary KVM: MMU: Optimize is_last_gpte() KVM: MMU: Simplify walk_addr_generic() loop KVM: MMU: Optimize pte permission checks KVM: MMU: Update accessed and dirty bits after guest pagetable walk KVM: MMU: Move gpte_access() out of paging_tmpl.h KVM: MMU: Optimize gpte_access() slightly KVM: MMU: Push clean gpte write protection out of gpte_access() KVM: clarify kvmclock documentation KVM: make processes waiting on vcpu mutex killable KVM: SVM: Make use of asm.h KVM: VMX: Make use of asm.h KVM: VMX: Make lto-friendly KVM: x86: lapic: Clean up find_highest_vector() and count_vectors() ... Conflicts: arch/s390/include/asm/processor.h arch/x86/kvm/i8259.c
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c233
1 files changed, 97 insertions, 136 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 851aa7c3b890..ad6b1dd06f8b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO);
127static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; 127static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
128module_param(ple_window, int, S_IRUGO); 128module_param(ple_window, int, S_IRUGO);
129 129
130extern const ulong vmx_return;
131
130#define NR_AUTOLOAD_MSRS 8 132#define NR_AUTOLOAD_MSRS 8
131#define VMCS02_POOL_SIZE 1 133#define VMCS02_POOL_SIZE 1
132 134
@@ -405,16 +407,16 @@ struct vcpu_vmx {
405 struct { 407 struct {
406 int vm86_active; 408 int vm86_active;
407 ulong save_rflags; 409 ulong save_rflags;
410 struct kvm_segment segs[8];
411 } rmode;
412 struct {
413 u32 bitmask; /* 4 bits per segment (1 bit per field) */
408 struct kvm_save_segment { 414 struct kvm_save_segment {
409 u16 selector; 415 u16 selector;
410 unsigned long base; 416 unsigned long base;
411 u32 limit; 417 u32 limit;
412 u32 ar; 418 u32 ar;
413 } tr, es, ds, fs, gs; 419 } seg[8];
414 } rmode;
415 struct {
416 u32 bitmask; /* 4 bits per segment (1 bit per field) */
417 struct kvm_save_segment seg[8];
418 } segment_cache; 420 } segment_cache;
419 int vpid; 421 int vpid;
420 bool emulation_required; 422 bool emulation_required;
@@ -450,7 +452,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
450#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ 452#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
451 [number##_HIGH] = VMCS12_OFFSET(name)+4 453 [number##_HIGH] = VMCS12_OFFSET(name)+4
452 454
453static unsigned short vmcs_field_to_offset_table[] = { 455static const unsigned short vmcs_field_to_offset_table[] = {
454 FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), 456 FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
455 FIELD(GUEST_ES_SELECTOR, guest_es_selector), 457 FIELD(GUEST_ES_SELECTOR, guest_es_selector),
456 FIELD(GUEST_CS_SELECTOR, guest_cs_selector), 458 FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
@@ -596,10 +598,9 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
596static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) 598static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
597{ 599{
598 struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT); 600 struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT);
599 if (is_error_page(page)) { 601 if (is_error_page(page))
600 kvm_release_page_clean(page);
601 return NULL; 602 return NULL;
602 } 603
603 return page; 604 return page;
604} 605}
605 606
@@ -667,7 +668,7 @@ static struct vmx_capability {
667 .ar_bytes = GUEST_##seg##_AR_BYTES, \ 668 .ar_bytes = GUEST_##seg##_AR_BYTES, \
668 } 669 }
669 670
670static struct kvm_vmx_segment_field { 671static const struct kvm_vmx_segment_field {
671 unsigned selector; 672 unsigned selector;
672 unsigned base; 673 unsigned base;
673 unsigned limit; 674 unsigned limit;
@@ -1343,7 +1344,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
1343 guest_efer = vmx->vcpu.arch.efer; 1344 guest_efer = vmx->vcpu.arch.efer;
1344 1345
1345 /* 1346 /*
1346 * NX is emulated; LMA and LME handled by hardware; SCE meaninless 1347 * NX is emulated; LMA and LME handled by hardware; SCE meaningless
1347 * outside long mode 1348 * outside long mode
1348 */ 1349 */
1349 ignore_bits = EFER_NX | EFER_SCE; 1350 ignore_bits = EFER_NX | EFER_SCE;
@@ -1995,7 +1996,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
1995#endif 1996#endif
1996 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | 1997 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
1997 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | 1998 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
1998 CPU_BASED_RDPMC_EXITING | 1999 CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
1999 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 2000 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
2000 /* 2001 /*
2001 * We can allow some features even when not supported by the 2002 * We can allow some features even when not supported by the
@@ -2291,16 +2292,6 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2291 } 2292 }
2292} 2293}
2293 2294
2294static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
2295{
2296 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
2297 vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
2298 else
2299 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
2300
2301 update_exception_bitmap(vcpu);
2302}
2303
2304static __init int cpu_has_kvm_support(void) 2295static __init int cpu_has_kvm_support(void)
2305{ 2296{
2306 return cpu_has_vmx(); 2297 return cpu_has_vmx();
@@ -2698,20 +2689,17 @@ static __exit void hardware_unsetup(void)
2698 free_kvm_area(); 2689 free_kvm_area();
2699} 2690}
2700 2691
2701static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) 2692static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
2702{ 2693{
2703 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2694 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2695 struct kvm_segment tmp = *save;
2704 2696
2705 if (vmcs_readl(sf->base) == save->base && (save->base & AR_S_MASK)) { 2697 if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
2706 vmcs_write16(sf->selector, save->selector); 2698 tmp.base = vmcs_readl(sf->base);
2707 vmcs_writel(sf->base, save->base); 2699 tmp.selector = vmcs_read16(sf->selector);
2708 vmcs_write32(sf->limit, save->limit); 2700 tmp.s = 1;
2709 vmcs_write32(sf->ar_bytes, save->ar);
2710 } else {
2711 u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK)
2712 << AR_DPL_SHIFT;
2713 vmcs_write32(sf->ar_bytes, 0x93 | dpl);
2714 } 2701 }
2702 vmx_set_segment(vcpu, &tmp, seg);
2715} 2703}
2716 2704
2717static void enter_pmode(struct kvm_vcpu *vcpu) 2705static void enter_pmode(struct kvm_vcpu *vcpu)
@@ -2724,10 +2712,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
2724 2712
2725 vmx_segment_cache_clear(vmx); 2713 vmx_segment_cache_clear(vmx);
2726 2714
2727 vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); 2715 vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2728 vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
2729 vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
2730 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
2731 2716
2732 flags = vmcs_readl(GUEST_RFLAGS); 2717 flags = vmcs_readl(GUEST_RFLAGS);
2733 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; 2718 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
@@ -2742,10 +2727,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
2742 if (emulate_invalid_guest_state) 2727 if (emulate_invalid_guest_state)
2743 return; 2728 return;
2744 2729
2745 fix_pmode_dataseg(VCPU_SREG_ES, &vmx->rmode.es); 2730 fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2746 fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds); 2731 fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2747 fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); 2732 fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2748 fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); 2733 fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2749 2734
2750 vmx_segment_cache_clear(vmx); 2735 vmx_segment_cache_clear(vmx);
2751 2736
@@ -2773,14 +2758,10 @@ static gva_t rmode_tss_base(struct kvm *kvm)
2773 return kvm->arch.tss_addr; 2758 return kvm->arch.tss_addr;
2774} 2759}
2775 2760
2776static void fix_rmode_seg(int seg, struct kvm_save_segment *save) 2761static void fix_rmode_seg(int seg, struct kvm_segment *save)
2777{ 2762{
2778 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2763 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2779 2764
2780 save->selector = vmcs_read16(sf->selector);
2781 save->base = vmcs_readl(sf->base);
2782 save->limit = vmcs_read32(sf->limit);
2783 save->ar = vmcs_read32(sf->ar_bytes);
2784 vmcs_write16(sf->selector, save->base >> 4); 2765 vmcs_write16(sf->selector, save->base >> 4);
2785 vmcs_write32(sf->base, save->base & 0xffff0); 2766 vmcs_write32(sf->base, save->base & 0xffff0);
2786 vmcs_write32(sf->limit, 0xffff); 2767 vmcs_write32(sf->limit, 0xffff);
@@ -2800,9 +2781,16 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
2800 if (enable_unrestricted_guest) 2781 if (enable_unrestricted_guest)
2801 return; 2782 return;
2802 2783
2784 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2785 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2786 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2787 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2788 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2789
2803 vmx->emulation_required = 1; 2790 vmx->emulation_required = 1;
2804 vmx->rmode.vm86_active = 1; 2791 vmx->rmode.vm86_active = 1;
2805 2792
2793
2806 /* 2794 /*
2807 * Very old userspace does not call KVM_SET_TSS_ADDR before entering 2795 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
2808 * vcpu. Call it here with phys address pointing 16M below 4G. 2796 * vcpu. Call it here with phys address pointing 16M below 4G.
@@ -2817,14 +2805,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
2817 2805
2818 vmx_segment_cache_clear(vmx); 2806 vmx_segment_cache_clear(vmx);
2819 2807
2820 vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
2821 vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
2822 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); 2808 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
2823
2824 vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
2825 vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); 2809 vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
2826
2827 vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
2828 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 2810 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
2829 2811
2830 flags = vmcs_readl(GUEST_RFLAGS); 2812 flags = vmcs_readl(GUEST_RFLAGS);
@@ -3117,35 +3099,24 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
3117 struct kvm_segment *var, int seg) 3099 struct kvm_segment *var, int seg)
3118{ 3100{
3119 struct vcpu_vmx *vmx = to_vmx(vcpu); 3101 struct vcpu_vmx *vmx = to_vmx(vcpu);
3120 struct kvm_save_segment *save;
3121 u32 ar; 3102 u32 ar;
3122 3103
3123 if (vmx->rmode.vm86_active 3104 if (vmx->rmode.vm86_active
3124 && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES 3105 && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
3125 || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS 3106 || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
3126 || seg == VCPU_SREG_GS) 3107 || seg == VCPU_SREG_GS)) {
3127 && !emulate_invalid_guest_state) { 3108 *var = vmx->rmode.segs[seg];
3128 switch (seg) {
3129 case VCPU_SREG_TR: save = &vmx->rmode.tr; break;
3130 case VCPU_SREG_ES: save = &vmx->rmode.es; break;
3131 case VCPU_SREG_DS: save = &vmx->rmode.ds; break;
3132 case VCPU_SREG_FS: save = &vmx->rmode.fs; break;
3133 case VCPU_SREG_GS: save = &vmx->rmode.gs; break;
3134 default: BUG();
3135 }
3136 var->selector = save->selector;
3137 var->base = save->base;
3138 var->limit = save->limit;
3139 ar = save->ar;
3140 if (seg == VCPU_SREG_TR 3109 if (seg == VCPU_SREG_TR
3141 || var->selector == vmx_read_guest_seg_selector(vmx, seg)) 3110 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
3142 goto use_saved_rmode_seg; 3111 return;
3112 var->base = vmx_read_guest_seg_base(vmx, seg);
3113 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3114 return;
3143 } 3115 }
3144 var->base = vmx_read_guest_seg_base(vmx, seg); 3116 var->base = vmx_read_guest_seg_base(vmx, seg);
3145 var->limit = vmx_read_guest_seg_limit(vmx, seg); 3117 var->limit = vmx_read_guest_seg_limit(vmx, seg);
3146 var->selector = vmx_read_guest_seg_selector(vmx, seg); 3118 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3147 ar = vmx_read_guest_seg_ar(vmx, seg); 3119 ar = vmx_read_guest_seg_ar(vmx, seg);
3148use_saved_rmode_seg:
3149 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) 3120 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
3150 ar = 0; 3121 ar = 0;
3151 var->type = ar & 15; 3122 var->type = ar & 15;
@@ -3227,23 +3198,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3227 struct kvm_segment *var, int seg) 3198 struct kvm_segment *var, int seg)
3228{ 3199{
3229 struct vcpu_vmx *vmx = to_vmx(vcpu); 3200 struct vcpu_vmx *vmx = to_vmx(vcpu);
3230 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 3201 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3231 u32 ar; 3202 u32 ar;
3232 3203
3233 vmx_segment_cache_clear(vmx); 3204 vmx_segment_cache_clear(vmx);
3234 3205
3235 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { 3206 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
3236 vmcs_write16(sf->selector, var->selector); 3207 vmcs_write16(sf->selector, var->selector);
3237 vmx->rmode.tr.selector = var->selector; 3208 vmx->rmode.segs[VCPU_SREG_TR] = *var;
3238 vmx->rmode.tr.base = var->base;
3239 vmx->rmode.tr.limit = var->limit;
3240 vmx->rmode.tr.ar = vmx_segment_access_rights(var);
3241 return; 3209 return;
3242 } 3210 }
3243 vmcs_writel(sf->base, var->base); 3211 vmcs_writel(sf->base, var->base);
3244 vmcs_write32(sf->limit, var->limit); 3212 vmcs_write32(sf->limit, var->limit);
3245 vmcs_write16(sf->selector, var->selector); 3213 vmcs_write16(sf->selector, var->selector);
3246 if (vmx->rmode.vm86_active && var->s) { 3214 if (vmx->rmode.vm86_active && var->s) {
3215 vmx->rmode.segs[seg] = *var;
3247 /* 3216 /*
3248 * Hack real-mode segments into vm86 compatibility. 3217 * Hack real-mode segments into vm86 compatibility.
3249 */ 3218 */
@@ -3258,7 +3227,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3258 * qemu binaries. 3227 * qemu binaries.
3259 * IA32 arch specifies that at the time of processor reset the 3228 * IA32 arch specifies that at the time of processor reset the
3260 * "Accessed" bit in the AR field of segment registers is 1. And qemu 3229 * "Accessed" bit in the AR field of segment registers is 1. And qemu
3261 * is setting it to 0 in the usedland code. This causes invalid guest 3230 * is setting it to 0 in the userland code. This causes invalid guest
3262 * state vmexit when "unrestricted guest" mode is turned on. 3231 * state vmexit when "unrestricted guest" mode is turned on.
3263 * Fix for this setup issue in cpu_reset is being pushed in the qemu 3232 * Fix for this setup issue in cpu_reset is being pushed in the qemu
3264 * tree. Newer qemu binaries with that qemu fix would not need this 3233 * tree. Newer qemu binaries with that qemu fix would not need this
@@ -3288,16 +3257,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3288 vmcs_readl(GUEST_CS_BASE) >> 4); 3257 vmcs_readl(GUEST_CS_BASE) >> 4);
3289 break; 3258 break;
3290 case VCPU_SREG_ES: 3259 case VCPU_SREG_ES:
3291 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
3292 break;
3293 case VCPU_SREG_DS: 3260 case VCPU_SREG_DS:
3294 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
3295 break;
3296 case VCPU_SREG_GS: 3261 case VCPU_SREG_GS:
3297 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
3298 break;
3299 case VCPU_SREG_FS: 3262 case VCPU_SREG_FS:
3300 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); 3263 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3301 break; 3264 break;
3302 case VCPU_SREG_SS: 3265 case VCPU_SREG_SS:
3303 vmcs_write16(GUEST_SS_SELECTOR, 3266 vmcs_write16(GUEST_SS_SELECTOR,
@@ -3351,9 +3314,9 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
3351 3314
3352 if (var.base != (var.selector << 4)) 3315 if (var.base != (var.selector << 4))
3353 return false; 3316 return false;
3354 if (var.limit != 0xffff) 3317 if (var.limit < 0xffff)
3355 return false; 3318 return false;
3356 if (ar != 0xf3) 3319 if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3)
3357 return false; 3320 return false;
3358 3321
3359 return true; 3322 return true;
@@ -3605,7 +3568,7 @@ out:
3605 3568
3606static void seg_setup(int seg) 3569static void seg_setup(int seg)
3607{ 3570{
3608 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 3571 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3609 unsigned int ar; 3572 unsigned int ar;
3610 3573
3611 vmcs_write16(sf->selector, 0); 3574 vmcs_write16(sf->selector, 0);
@@ -3770,8 +3733,7 @@ static void vmx_set_constant_host_state(void)
3770 native_store_idt(&dt); 3733 native_store_idt(&dt);
3771 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ 3734 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
3772 3735
3773 asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl)); 3736 vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
3774 vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */
3775 3737
3776 rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); 3738 rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
3777 vmcs_write32(HOST_IA32_SYSENTER_CS, low32); 3739 vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
@@ -4005,8 +3967,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4005 kvm_rip_write(vcpu, 0); 3967 kvm_rip_write(vcpu, 0);
4006 kvm_register_write(vcpu, VCPU_REGS_RSP, 0); 3968 kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
4007 3969
4008 vmcs_writel(GUEST_DR7, 0x400);
4009
4010 vmcs_writel(GUEST_GDTR_BASE, 0); 3970 vmcs_writel(GUEST_GDTR_BASE, 0);
4011 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); 3971 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
4012 3972
@@ -4456,7 +4416,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
4456 hypercall[2] = 0xc1; 4416 hypercall[2] = 0xc1;
4457} 4417}
4458 4418
4459/* called to set cr0 as approriate for a mov-to-cr0 exit. */ 4419/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
4460static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) 4420static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
4461{ 4421{
4462 if (to_vmx(vcpu)->nested.vmxon && 4422 if (to_vmx(vcpu)->nested.vmxon &&
@@ -5701,7 +5661,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
5701 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 5661 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
5702 * to be done to userspace and return 0. 5662 * to be done to userspace and return 0.
5703 */ 5663 */
5704static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { 5664static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
5705 [EXIT_REASON_EXCEPTION_NMI] = handle_exception, 5665 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
5706 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 5666 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
5707 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, 5667 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
@@ -6229,17 +6189,10 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
6229 msrs[i].host); 6189 msrs[i].host);
6230} 6190}
6231 6191
6232#ifdef CONFIG_X86_64
6233#define R "r"
6234#define Q "q"
6235#else
6236#define R "e"
6237#define Q "l"
6238#endif
6239
6240static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) 6192static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6241{ 6193{
6242 struct vcpu_vmx *vmx = to_vmx(vcpu); 6194 struct vcpu_vmx *vmx = to_vmx(vcpu);
6195 unsigned long debugctlmsr;
6243 6196
6244 if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { 6197 if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
6245 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 6198 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -6279,34 +6232,35 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6279 vmx_set_interrupt_shadow(vcpu, 0); 6232 vmx_set_interrupt_shadow(vcpu, 0);
6280 6233
6281 atomic_switch_perf_msrs(vmx); 6234 atomic_switch_perf_msrs(vmx);
6235 debugctlmsr = get_debugctlmsr();
6282 6236
6283 vmx->__launched = vmx->loaded_vmcs->launched; 6237 vmx->__launched = vmx->loaded_vmcs->launched;
6284 asm( 6238 asm(
6285 /* Store host registers */ 6239 /* Store host registers */
6286 "push %%"R"dx; push %%"R"bp;" 6240 "push %%" _ASM_DX "; push %%" _ASM_BP ";"
6287 "push %%"R"cx \n\t" /* placeholder for guest rcx */ 6241 "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
6288 "push %%"R"cx \n\t" 6242 "push %%" _ASM_CX " \n\t"
6289 "cmp %%"R"sp, %c[host_rsp](%0) \n\t" 6243 "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
6290 "je 1f \n\t" 6244 "je 1f \n\t"
6291 "mov %%"R"sp, %c[host_rsp](%0) \n\t" 6245 "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
6292 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" 6246 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
6293 "1: \n\t" 6247 "1: \n\t"
6294 /* Reload cr2 if changed */ 6248 /* Reload cr2 if changed */
6295 "mov %c[cr2](%0), %%"R"ax \n\t" 6249 "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
6296 "mov %%cr2, %%"R"dx \n\t" 6250 "mov %%cr2, %%" _ASM_DX " \n\t"
6297 "cmp %%"R"ax, %%"R"dx \n\t" 6251 "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
6298 "je 2f \n\t" 6252 "je 2f \n\t"
6299 "mov %%"R"ax, %%cr2 \n\t" 6253 "mov %%" _ASM_AX", %%cr2 \n\t"
6300 "2: \n\t" 6254 "2: \n\t"
6301 /* Check if vmlaunch of vmresume is needed */ 6255 /* Check if vmlaunch of vmresume is needed */
6302 "cmpl $0, %c[launched](%0) \n\t" 6256 "cmpl $0, %c[launched](%0) \n\t"
6303 /* Load guest registers. Don't clobber flags. */ 6257 /* Load guest registers. Don't clobber flags. */
6304 "mov %c[rax](%0), %%"R"ax \n\t" 6258 "mov %c[rax](%0), %%" _ASM_AX " \n\t"
6305 "mov %c[rbx](%0), %%"R"bx \n\t" 6259 "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
6306 "mov %c[rdx](%0), %%"R"dx \n\t" 6260 "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
6307 "mov %c[rsi](%0), %%"R"si \n\t" 6261 "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
6308 "mov %c[rdi](%0), %%"R"di \n\t" 6262 "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
6309 "mov %c[rbp](%0), %%"R"bp \n\t" 6263 "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
6310#ifdef CONFIG_X86_64 6264#ifdef CONFIG_X86_64
6311 "mov %c[r8](%0), %%r8 \n\t" 6265 "mov %c[r8](%0), %%r8 \n\t"
6312 "mov %c[r9](%0), %%r9 \n\t" 6266 "mov %c[r9](%0), %%r9 \n\t"
@@ -6317,24 +6271,24 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6317 "mov %c[r14](%0), %%r14 \n\t" 6271 "mov %c[r14](%0), %%r14 \n\t"
6318 "mov %c[r15](%0), %%r15 \n\t" 6272 "mov %c[r15](%0), %%r15 \n\t"
6319#endif 6273#endif
6320 "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ 6274 "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
6321 6275
6322 /* Enter guest mode */ 6276 /* Enter guest mode */
6323 "jne .Llaunched \n\t" 6277 "jne 1f \n\t"
6324 __ex(ASM_VMX_VMLAUNCH) "\n\t" 6278 __ex(ASM_VMX_VMLAUNCH) "\n\t"
6325 "jmp .Lkvm_vmx_return \n\t" 6279 "jmp 2f \n\t"
6326 ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" 6280 "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
6327 ".Lkvm_vmx_return: " 6281 "2: "
6328 /* Save guest registers, load host registers, keep flags */ 6282 /* Save guest registers, load host registers, keep flags */
6329 "mov %0, %c[wordsize](%%"R"sp) \n\t" 6283 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
6330 "pop %0 \n\t" 6284 "pop %0 \n\t"
6331 "mov %%"R"ax, %c[rax](%0) \n\t" 6285 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
6332 "mov %%"R"bx, %c[rbx](%0) \n\t" 6286 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
6333 "pop"Q" %c[rcx](%0) \n\t" 6287 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
6334 "mov %%"R"dx, %c[rdx](%0) \n\t" 6288 "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
6335 "mov %%"R"si, %c[rsi](%0) \n\t" 6289 "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
6336 "mov %%"R"di, %c[rdi](%0) \n\t" 6290 "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
6337 "mov %%"R"bp, %c[rbp](%0) \n\t" 6291 "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
6338#ifdef CONFIG_X86_64 6292#ifdef CONFIG_X86_64
6339 "mov %%r8, %c[r8](%0) \n\t" 6293 "mov %%r8, %c[r8](%0) \n\t"
6340 "mov %%r9, %c[r9](%0) \n\t" 6294 "mov %%r9, %c[r9](%0) \n\t"
@@ -6345,11 +6299,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6345 "mov %%r14, %c[r14](%0) \n\t" 6299 "mov %%r14, %c[r14](%0) \n\t"
6346 "mov %%r15, %c[r15](%0) \n\t" 6300 "mov %%r15, %c[r15](%0) \n\t"
6347#endif 6301#endif
6348 "mov %%cr2, %%"R"ax \n\t" 6302 "mov %%cr2, %%" _ASM_AX " \n\t"
6349 "mov %%"R"ax, %c[cr2](%0) \n\t" 6303 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
6350 6304
6351 "pop %%"R"bp; pop %%"R"dx \n\t" 6305 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
6352 "setbe %c[fail](%0) \n\t" 6306 "setbe %c[fail](%0) \n\t"
6307 ".pushsection .rodata \n\t"
6308 ".global vmx_return \n\t"
6309 "vmx_return: " _ASM_PTR " 2b \n\t"
6310 ".popsection"
6353 : : "c"(vmx), "d"((unsigned long)HOST_RSP), 6311 : : "c"(vmx), "d"((unsigned long)HOST_RSP),
6354 [launched]"i"(offsetof(struct vcpu_vmx, __launched)), 6312 [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
6355 [fail]"i"(offsetof(struct vcpu_vmx, fail)), 6313 [fail]"i"(offsetof(struct vcpu_vmx, fail)),
@@ -6374,12 +6332,18 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6374 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), 6332 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
6375 [wordsize]"i"(sizeof(ulong)) 6333 [wordsize]"i"(sizeof(ulong))
6376 : "cc", "memory" 6334 : "cc", "memory"
6377 , R"ax", R"bx", R"di", R"si"
6378#ifdef CONFIG_X86_64 6335#ifdef CONFIG_X86_64
6336 , "rax", "rbx", "rdi", "rsi"
6379 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" 6337 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
6338#else
6339 , "eax", "ebx", "edi", "esi"
6380#endif 6340#endif
6381 ); 6341 );
6382 6342
6343 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
6344 if (debugctlmsr)
6345 update_debugctlmsr(debugctlmsr);
6346
6383#ifndef CONFIG_X86_64 6347#ifndef CONFIG_X86_64
6384 /* 6348 /*
6385 * The sysexit path does not restore ds/es, so we must set them to 6349 * The sysexit path does not restore ds/es, so we must set them to
@@ -6424,9 +6388,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6424 vmx_complete_interrupts(vmx); 6388 vmx_complete_interrupts(vmx);
6425} 6389}
6426 6390
6427#undef R
6428#undef Q
6429
6430static void vmx_free_vcpu(struct kvm_vcpu *vcpu) 6391static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
6431{ 6392{
6432 struct vcpu_vmx *vmx = to_vmx(vcpu); 6393 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7281,7 +7242,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
7281 .vcpu_load = vmx_vcpu_load, 7242 .vcpu_load = vmx_vcpu_load,
7282 .vcpu_put = vmx_vcpu_put, 7243 .vcpu_put = vmx_vcpu_put,
7283 7244
7284 .set_guest_debug = set_guest_debug, 7245 .update_db_bp_intercept = update_exception_bitmap,
7285 .get_msr = vmx_get_msr, 7246 .get_msr = vmx_get_msr,
7286 .set_msr = vmx_set_msr, 7247 .set_msr = vmx_set_msr,
7287 .get_segment_base = vmx_get_segment_base, 7248 .get_segment_base = vmx_get_segment_base,