aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/kvm.c7
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/hyperv.c7
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c1
-rw-r--r--arch/x86/kvm/svm.c31
-rw-r--r--arch/x86/kvm/trace.h2
-rw-r--r--arch/x86/kvm/vmx/evmcs.c7
-rw-r--r--arch/x86/kvm/vmx/nested.c14
-rw-r--r--arch/x86/kvm/vmx/vmx.c147
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c9
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c6
13 files changed, 130 insertions, 115 deletions
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index ba4bfb7f6a36..5c93a65ee1e5 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -457,6 +457,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
457#else 457#else
458 u64 ipi_bitmap = 0; 458 u64 ipi_bitmap = 0;
459#endif 459#endif
460 long ret;
460 461
461 if (cpumask_empty(mask)) 462 if (cpumask_empty(mask))
462 return; 463 return;
@@ -482,8 +483,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
482 } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { 483 } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
483 max = apic_id < max ? max : apic_id; 484 max = apic_id < max ? max : apic_id;
484 } else { 485 } else {
485 kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, 486 ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
486 (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); 487 (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
488 WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
487 min = max = apic_id; 489 min = max = apic_id;
488 ipi_bitmap = 0; 490 ipi_bitmap = 0;
489 } 491 }
@@ -491,8 +493,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
491 } 493 }
492 494
493 if (ipi_bitmap) { 495 if (ipi_bitmap) {
494 kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, 496 ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
495 (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); 497 (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
498 WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
496 } 499 }
497 500
498 local_irq_restore(flags); 501 local_irq_restore(flags);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 69b3a7c30013..31ecf7a76d5a 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -2,10 +2,6 @@
2 2
3ccflags-y += -Iarch/x86/kvm 3ccflags-y += -Iarch/x86/kvm
4 4
5CFLAGS_x86.o := -I.
6CFLAGS_svm.o := -I.
7CFLAGS_vmx.o := -I.
8
9KVM := ../../../virt/kvm 5KVM := ../../../virt/kvm
10 6
11kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ 7kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index c90a5352d158..89d20ed1d2e8 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1636,7 +1636,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
1636 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); 1636 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
1637 if (ret != HV_STATUS_INVALID_PORT_ID) 1637 if (ret != HV_STATUS_INVALID_PORT_ID)
1638 break; 1638 break;
1639 /* maybe userspace knows this conn_id: fall through */ 1639 /* fall through - maybe userspace knows this conn_id. */
1640 case HVCALL_POST_MESSAGE: 1640 case HVCALL_POST_MESSAGE:
1641 /* don't bother userspace if it has no way to handle it */ 1641 /* don't bother userspace if it has no way to handle it */
1642 if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { 1642 if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
@@ -1832,7 +1832,6 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
1832 ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE; 1832 ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE;
1833 ent->eax |= HV_X64_MSR_RESET_AVAILABLE; 1833 ent->eax |= HV_X64_MSR_RESET_AVAILABLE;
1834 ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; 1834 ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
1835 ent->eax |= HV_X64_MSR_GUEST_IDLE_AVAILABLE;
1836 ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS; 1835 ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS;
1837 ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT; 1836 ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT;
1838 1837
@@ -1848,11 +1847,11 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
1848 case HYPERV_CPUID_ENLIGHTMENT_INFO: 1847 case HYPERV_CPUID_ENLIGHTMENT_INFO:
1849 ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; 1848 ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
1850 ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; 1849 ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
1851 ent->eax |= HV_X64_SYSTEM_RESET_RECOMMENDED;
1852 ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; 1850 ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
1853 ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; 1851 ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
1854 ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; 1852 ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
1855 ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; 1853 if (evmcs_ver)
1854 ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
1856 1855
1857 /* 1856 /*
1858 * Default number of spinlock retry attempts, matches 1857 * Default number of spinlock retry attempts, matches
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9f089e2e09d0..4b6c2da7265c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1035,6 +1035,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1035 switch (delivery_mode) { 1035 switch (delivery_mode) {
1036 case APIC_DM_LOWEST: 1036 case APIC_DM_LOWEST:
1037 vcpu->arch.apic_arb_prio++; 1037 vcpu->arch.apic_arb_prio++;
1038 /* fall through */
1038 case APIC_DM_FIXED: 1039 case APIC_DM_FIXED:
1039 if (unlikely(trig_mode && !level)) 1040 if (unlikely(trig_mode && !level))
1040 break; 1041 break;
@@ -1874,6 +1875,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1874 1875
1875 case APIC_LVT0: 1876 case APIC_LVT0:
1876 apic_manage_nmi_watchdog(apic, val); 1877 apic_manage_nmi_watchdog(apic, val);
1878 /* fall through */
1877 case APIC_LVTTHMR: 1879 case APIC_LVTTHMR:
1878 case APIC_LVTPC: 1880 case APIC_LVTPC:
1879 case APIC_LVT1: 1881 case APIC_LVT1:
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ce770b446238..da9c42349b1f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4371,6 +4371,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
4371 rsvd_bits(maxphyaddr, 51); 4371 rsvd_bits(maxphyaddr, 51);
4372 rsvd_check->rsvd_bits_mask[1][4] = 4372 rsvd_check->rsvd_bits_mask[1][4] =
4373 rsvd_check->rsvd_bits_mask[0][4]; 4373 rsvd_check->rsvd_bits_mask[0][4];
4374 /* fall through */
4374 case PT64_ROOT_4LEVEL: 4375 case PT64_ROOT_4LEVEL:
4375 rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | 4376 rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
4376 nonleaf_bit8_rsvd | rsvd_bits(7, 7) | 4377 nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a157ca5b6869..f13a3a24d360 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3414,6 +3414,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
3414 kvm_mmu_reset_context(&svm->vcpu); 3414 kvm_mmu_reset_context(&svm->vcpu);
3415 kvm_mmu_load(&svm->vcpu); 3415 kvm_mmu_load(&svm->vcpu);
3416 3416
3417 /*
3418 * Drop what we picked up for L2 via svm_complete_interrupts() so it
3419 * doesn't end up in L1.
3420 */
3421 svm->vcpu.arch.nmi_injected = false;
3422 kvm_clear_exception_queue(&svm->vcpu);
3423 kvm_clear_interrupt_queue(&svm->vcpu);
3424
3417 return 0; 3425 return 0;
3418} 3426}
3419 3427
@@ -4395,7 +4403,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
4395 case MSR_IA32_APICBASE: 4403 case MSR_IA32_APICBASE:
4396 if (kvm_vcpu_apicv_active(vcpu)) 4404 if (kvm_vcpu_apicv_active(vcpu))
4397 avic_update_vapic_bar(to_svm(vcpu), data); 4405 avic_update_vapic_bar(to_svm(vcpu), data);
4398 /* Follow through */ 4406 /* Fall through */
4399 default: 4407 default:
4400 return kvm_set_msr_common(vcpu, msr); 4408 return kvm_set_msr_common(vcpu, msr);
4401 } 4409 }
@@ -4504,28 +4512,19 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
4504 kvm_lapic_reg_write(apic, APIC_ICR, icrl); 4512 kvm_lapic_reg_write(apic, APIC_ICR, icrl);
4505 break; 4513 break;
4506 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: { 4514 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
4507 int i;
4508 struct kvm_vcpu *vcpu;
4509 struct kvm *kvm = svm->vcpu.kvm;
4510 struct kvm_lapic *apic = svm->vcpu.arch.apic; 4515 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4511 4516
4512 /* 4517 /*
4513 * At this point, we expect that the AVIC HW has already 4518 * Update ICR high and low, then emulate sending IPI,
4514 * set the appropriate IRR bits on the valid target 4519 * which is handled when writing APIC_ICR.
4515 * vcpus. So, we just need to kick the appropriate vcpu.
4516 */ 4520 */
4517 kvm_for_each_vcpu(i, vcpu, kvm) { 4521 kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
4518 bool m = kvm_apic_match_dest(vcpu, apic, 4522 kvm_lapic_reg_write(apic, APIC_ICR, icrl);
4519 icrl & KVM_APIC_SHORT_MASK,
4520 GET_APIC_DEST_FIELD(icrh),
4521 icrl & KVM_APIC_DEST_MASK);
4522
4523 if (m && !avic_vcpu_is_running(vcpu))
4524 kvm_vcpu_wake_up(vcpu);
4525 }
4526 break; 4523 break;
4527 } 4524 }
4528 case AVIC_IPI_FAILURE_INVALID_TARGET: 4525 case AVIC_IPI_FAILURE_INVALID_TARGET:
4526 WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
4527 index, svm->vcpu.vcpu_id, icrh, icrl);
4529 break; 4528 break;
4530 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: 4529 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
4531 WARN_ONCE(1, "Invalid backing page\n"); 4530 WARN_ONCE(1, "Invalid backing page\n");
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 705f40ae2532..6432d08c7de7 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1465,7 +1465,7 @@ TRACE_EVENT(kvm_hv_send_ipi_ex,
1465#endif /* _TRACE_KVM_H */ 1465#endif /* _TRACE_KVM_H */
1466 1466
1467#undef TRACE_INCLUDE_PATH 1467#undef TRACE_INCLUDE_PATH
1468#define TRACE_INCLUDE_PATH arch/x86/kvm 1468#define TRACE_INCLUDE_PATH ../../arch/x86/kvm
1469#undef TRACE_INCLUDE_FILE 1469#undef TRACE_INCLUDE_FILE
1470#define TRACE_INCLUDE_FILE trace 1470#define TRACE_INCLUDE_FILE trace
1471 1471
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 95bc2247478d..5466c6d85cf3 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -332,16 +332,17 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
332 uint16_t *vmcs_version) 332 uint16_t *vmcs_version)
333{ 333{
334 struct vcpu_vmx *vmx = to_vmx(vcpu); 334 struct vcpu_vmx *vmx = to_vmx(vcpu);
335 bool evmcs_already_enabled = vmx->nested.enlightened_vmcs_enabled;
336
337 vmx->nested.enlightened_vmcs_enabled = true;
335 338
336 if (vmcs_version) 339 if (vmcs_version)
337 *vmcs_version = nested_get_evmcs_version(vcpu); 340 *vmcs_version = nested_get_evmcs_version(vcpu);
338 341
339 /* We don't support disabling the feature for simplicity. */ 342 /* We don't support disabling the feature for simplicity. */
340 if (vmx->nested.enlightened_vmcs_enabled) 343 if (evmcs_already_enabled)
341 return 0; 344 return 0;
342 345
343 vmx->nested.enlightened_vmcs_enabled = true;
344
345 vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; 346 vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
346 vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; 347 vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
347 vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; 348 vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2616bd2c7f2c..8ff20523661b 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -55,7 +55,7 @@ static u16 shadow_read_write_fields[] = {
55static int max_shadow_read_write_fields = 55static int max_shadow_read_write_fields =
56 ARRAY_SIZE(shadow_read_write_fields); 56 ARRAY_SIZE(shadow_read_write_fields);
57 57
58void init_vmcs_shadow_fields(void) 58static void init_vmcs_shadow_fields(void)
59{ 59{
60 int i, j; 60 int i, j;
61 61
@@ -4140,11 +4140,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
4140 if (r < 0) 4140 if (r < 0)
4141 goto out_vmcs02; 4141 goto out_vmcs02;
4142 4142
4143 vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); 4143 vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL);
4144 if (!vmx->nested.cached_vmcs12) 4144 if (!vmx->nested.cached_vmcs12)
4145 goto out_cached_vmcs12; 4145 goto out_cached_vmcs12;
4146 4146
4147 vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); 4147 vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL);
4148 if (!vmx->nested.cached_shadow_vmcs12) 4148 if (!vmx->nested.cached_shadow_vmcs12)
4149 goto out_cached_shadow_vmcs12; 4149 goto out_cached_shadow_vmcs12;
4150 4150
@@ -5263,13 +5263,17 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
5263 copy_shadow_to_vmcs12(vmx); 5263 copy_shadow_to_vmcs12(vmx);
5264 } 5264 }
5265 5265
5266 if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12))) 5266 /*
5267 * Copy over the full allocated size of vmcs12 rather than just the size
5268 * of the struct.
5269 */
5270 if (copy_to_user(user_kvm_nested_state->data, vmcs12, VMCS12_SIZE))
5267 return -EFAULT; 5271 return -EFAULT;
5268 5272
5269 if (nested_cpu_has_shadow_vmcs(vmcs12) && 5273 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
5270 vmcs12->vmcs_link_pointer != -1ull) { 5274 vmcs12->vmcs_link_pointer != -1ull) {
5271 if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE, 5275 if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE,
5272 get_shadow_vmcs12(vcpu), sizeof(*vmcs12))) 5276 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
5273 return -EFAULT; 5277 return -EFAULT;
5274 } 5278 }
5275 5279
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f6915f10e584..4341175339f3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -423,7 +423,7 @@ static void check_ept_pointer_match(struct kvm *kvm)
423 to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; 423 to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
424} 424}
425 425
426int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush, 426static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
427 void *data) 427 void *data)
428{ 428{
429 struct kvm_tlb_range *range = data; 429 struct kvm_tlb_range *range = data;
@@ -1773,7 +1773,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1773 if (!msr_info->host_initiated && 1773 if (!msr_info->host_initiated &&
1774 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) 1774 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
1775 return 1; 1775 return 1;
1776 /* Otherwise falls through */ 1776 /* Else, falls through */
1777 default: 1777 default:
1778 msr = find_msr_entry(vmx, msr_info->index); 1778 msr = find_msr_entry(vmx, msr_info->index);
1779 if (msr) { 1779 if (msr) {
@@ -2014,7 +2014,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2014 /* Check reserved bit, higher 32 bits should be zero */ 2014 /* Check reserved bit, higher 32 bits should be zero */
2015 if ((data >> 32) != 0) 2015 if ((data >> 32) != 0)
2016 return 1; 2016 return 1;
2017 /* Otherwise falls through */ 2017 /* Else, falls through */
2018 default: 2018 default:
2019 msr = find_msr_entry(vmx, msr_index); 2019 msr = find_msr_entry(vmx, msr_index);
2020 if (msr) { 2020 if (msr) {
@@ -2344,7 +2344,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
2344 case 37: /* AAT100 */ 2344 case 37: /* AAT100 */
2345 case 44: /* BC86,AAY89,BD102 */ 2345 case 44: /* BC86,AAY89,BD102 */
2346 case 46: /* BA97 */ 2346 case 46: /* BA97 */
2347 _vmexit_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; 2347 _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
2348 _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; 2348 _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
2349 pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " 2349 pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
2350 "does not work properly. Using workaround\n"); 2350 "does not work properly. Using workaround\n");
@@ -6362,72 +6362,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
6362 vmx->loaded_vmcs->hv_timer_armed = false; 6362 vmx->loaded_vmcs->hv_timer_armed = false;
6363} 6363}
6364 6364
6365static void vmx_vcpu_run(struct kvm_vcpu *vcpu) 6365static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
6366{ 6366{
6367 struct vcpu_vmx *vmx = to_vmx(vcpu); 6367 unsigned long evmcs_rsp;
6368 unsigned long cr3, cr4, evmcs_rsp;
6369
6370 /* Record the guest's net vcpu time for enforced NMI injections. */
6371 if (unlikely(!enable_vnmi &&
6372 vmx->loaded_vmcs->soft_vnmi_blocked))
6373 vmx->loaded_vmcs->entry_time = ktime_get();
6374
6375 /* Don't enter VMX if guest state is invalid, let the exit handler
6376 start emulation until we arrive back to a valid state */
6377 if (vmx->emulation_required)
6378 return;
6379
6380 if (vmx->ple_window_dirty) {
6381 vmx->ple_window_dirty = false;
6382 vmcs_write32(PLE_WINDOW, vmx->ple_window);
6383 }
6384
6385 if (vmx->nested.need_vmcs12_sync)
6386 nested_sync_from_vmcs12(vcpu);
6387
6388 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
6389 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
6390 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
6391 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
6392
6393 cr3 = __get_current_cr3_fast();
6394 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
6395 vmcs_writel(HOST_CR3, cr3);
6396 vmx->loaded_vmcs->host_state.cr3 = cr3;
6397 }
6398
6399 cr4 = cr4_read_shadow();
6400 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
6401 vmcs_writel(HOST_CR4, cr4);
6402 vmx->loaded_vmcs->host_state.cr4 = cr4;
6403 }
6404
6405 /* When single-stepping over STI and MOV SS, we must clear the
6406 * corresponding interruptibility bits in the guest state. Otherwise
6407 * vmentry fails as it then expects bit 14 (BS) in pending debug
6408 * exceptions being set, but that's not correct for the guest debugging
6409 * case. */
6410 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6411 vmx_set_interrupt_shadow(vcpu, 0);
6412
6413 if (static_cpu_has(X86_FEATURE_PKU) &&
6414 kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
6415 vcpu->arch.pkru != vmx->host_pkru)
6416 __write_pkru(vcpu->arch.pkru);
6417
6418 pt_guest_enter(vmx);
6419
6420 atomic_switch_perf_msrs(vmx);
6421
6422 vmx_update_hv_timer(vcpu);
6423
6424 /*
6425 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
6426 * it's non-zero. Since vmentry is serialising on affected CPUs, there
6427 * is no need to worry about the conditional branch over the wrmsr
6428 * being speculatively taken.
6429 */
6430 x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
6431 6368
6432 vmx->__launched = vmx->loaded_vmcs->launched; 6369 vmx->__launched = vmx->loaded_vmcs->launched;
6433 6370
@@ -6567,6 +6504,77 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
6567 , "eax", "ebx", "edi" 6504 , "eax", "ebx", "edi"
6568#endif 6505#endif
6569 ); 6506 );
6507}
6508STACK_FRAME_NON_STANDARD(__vmx_vcpu_run);
6509
6510static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
6511{
6512 struct vcpu_vmx *vmx = to_vmx(vcpu);
6513 unsigned long cr3, cr4;
6514
6515 /* Record the guest's net vcpu time for enforced NMI injections. */
6516 if (unlikely(!enable_vnmi &&
6517 vmx->loaded_vmcs->soft_vnmi_blocked))
6518 vmx->loaded_vmcs->entry_time = ktime_get();
6519
6520 /* Don't enter VMX if guest state is invalid, let the exit handler
6521 start emulation until we arrive back to a valid state */
6522 if (vmx->emulation_required)
6523 return;
6524
6525 if (vmx->ple_window_dirty) {
6526 vmx->ple_window_dirty = false;
6527 vmcs_write32(PLE_WINDOW, vmx->ple_window);
6528 }
6529
6530 if (vmx->nested.need_vmcs12_sync)
6531 nested_sync_from_vmcs12(vcpu);
6532
6533 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
6534 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
6535 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
6536 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
6537
6538 cr3 = __get_current_cr3_fast();
6539 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
6540 vmcs_writel(HOST_CR3, cr3);
6541 vmx->loaded_vmcs->host_state.cr3 = cr3;
6542 }
6543
6544 cr4 = cr4_read_shadow();
6545 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
6546 vmcs_writel(HOST_CR4, cr4);
6547 vmx->loaded_vmcs->host_state.cr4 = cr4;
6548 }
6549
6550 /* When single-stepping over STI and MOV SS, we must clear the
6551 * corresponding interruptibility bits in the guest state. Otherwise
6552 * vmentry fails as it then expects bit 14 (BS) in pending debug
6553 * exceptions being set, but that's not correct for the guest debugging
6554 * case. */
6555 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6556 vmx_set_interrupt_shadow(vcpu, 0);
6557
6558 if (static_cpu_has(X86_FEATURE_PKU) &&
6559 kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
6560 vcpu->arch.pkru != vmx->host_pkru)
6561 __write_pkru(vcpu->arch.pkru);
6562
6563 pt_guest_enter(vmx);
6564
6565 atomic_switch_perf_msrs(vmx);
6566
6567 vmx_update_hv_timer(vcpu);
6568
6569 /*
6570 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
6571 * it's non-zero. Since vmentry is serialising on affected CPUs, there
6572 * is no need to worry about the conditional branch over the wrmsr
6573 * being speculatively taken.
6574 */
6575 x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
6576
6577 __vmx_vcpu_run(vcpu, vmx);
6570 6578
6571 /* 6579 /*
6572 * We do not use IBRS in the kernel. If this vCPU has used the 6580 * We do not use IBRS in the kernel. If this vCPU has used the
@@ -6648,7 +6656,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
6648 vmx_recover_nmi_blocking(vmx); 6656 vmx_recover_nmi_blocking(vmx);
6649 vmx_complete_interrupts(vmx); 6657 vmx_complete_interrupts(vmx);
6650} 6658}
6651STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
6652 6659
6653static struct kvm *vmx_vm_alloc(void) 6660static struct kvm *vmx_vm_alloc(void)
6654{ 6661{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02c8e095a239..3d27206f6c01 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3834,6 +3834,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3834 case KVM_CAP_HYPERV_SYNIC2: 3834 case KVM_CAP_HYPERV_SYNIC2:
3835 if (cap->args[0]) 3835 if (cap->args[0])
3836 return -EINVAL; 3836 return -EINVAL;
3837 /* fall through */
3838
3837 case KVM_CAP_HYPERV_SYNIC: 3839 case KVM_CAP_HYPERV_SYNIC:
3838 if (!irqchip_in_kernel(vcpu->kvm)) 3840 if (!irqchip_in_kernel(vcpu->kvm))
3839 return -EINVAL; 3841 return -EINVAL;
@@ -6480,8 +6482,7 @@ restart:
6480 toggle_interruptibility(vcpu, ctxt->interruptibility); 6482 toggle_interruptibility(vcpu, ctxt->interruptibility);
6481 vcpu->arch.emulate_regs_need_sync_to_vcpu = false; 6483 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6482 kvm_rip_write(vcpu, ctxt->eip); 6484 kvm_rip_write(vcpu, ctxt->eip);
6483 if (r == EMULATE_DONE && 6485 if (r == EMULATE_DONE && ctxt->tf)
6484 (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
6485 kvm_vcpu_do_singlestep(vcpu, &r); 6486 kvm_vcpu_do_singlestep(vcpu, &r);
6486 if (!ctxt->have_exception || 6487 if (!ctxt->have_exception ||
6487 exception_type(ctxt->exception.vector) == EXCPT_TRAP) 6488 exception_type(ctxt->exception.vector) == EXCPT_TRAP)
@@ -7093,10 +7094,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
7093 case KVM_HC_CLOCK_PAIRING: 7094 case KVM_HC_CLOCK_PAIRING:
7094 ret = kvm_pv_clock_pairing(vcpu, a0, a1); 7095 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
7095 break; 7096 break;
7097#endif
7096 case KVM_HC_SEND_IPI: 7098 case KVM_HC_SEND_IPI:
7097 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); 7099 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
7098 break; 7100 break;
7099#endif
7100 default: 7101 default:
7101 ret = -KVM_ENOSYS; 7102 ret = -KVM_ENOSYS;
7102 break; 7103 break;
@@ -7937,6 +7938,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
7937 vcpu->arch.pv.pv_unhalted = false; 7938 vcpu->arch.pv.pv_unhalted = false;
7938 vcpu->arch.mp_state = 7939 vcpu->arch.mp_state =
7939 KVM_MP_STATE_RUNNABLE; 7940 KVM_MP_STATE_RUNNABLE;
7941 /* fall through */
7940 case KVM_MP_STATE_RUNNABLE: 7942 case KVM_MP_STATE_RUNNABLE:
7941 vcpu->arch.apf.halted = false; 7943 vcpu->arch.apf.halted = false;
7942 break; 7944 break;
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 23022e9d32eb..b52cfdefecbf 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -571,7 +571,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
571 * already exist. 571 * already exist.
572 */ 572 */
573 region = (struct userspace_mem_region *) userspace_mem_region_find( 573 region = (struct userspace_mem_region *) userspace_mem_region_find(
574 vm, guest_paddr, guest_paddr + npages * vm->page_size); 574 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
575 if (region != NULL) 575 if (region != NULL)
576 TEST_ASSERT(false, "overlapping userspace_mem_region already " 576 TEST_ASSERT(false, "overlapping userspace_mem_region already "
577 "exists\n" 577 "exists\n"
@@ -587,15 +587,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
587 region = region->next) { 587 region = region->next) {
588 if (region->region.slot == slot) 588 if (region->region.slot == slot)
589 break; 589 break;
590 if ((guest_paddr <= (region->region.guest_phys_addr
591 + region->region.memory_size))
592 && ((guest_paddr + npages * vm->page_size)
593 >= region->region.guest_phys_addr))
594 break;
595 } 590 }
596 if (region != NULL) 591 if (region != NULL)
597 TEST_ASSERT(false, "A mem region with the requested slot " 592 TEST_ASSERT(false, "A mem region with the requested slot "
598 "or overlapping physical memory range already exists.\n" 593 "already exists.\n"
599 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 594 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
600 " existing slot: %u paddr: 0x%lx size: 0x%lx", 595 " existing slot: %u paddr: 0x%lx size: 0x%lx",
601 slot, guest_paddr, npages, 596 slot, guest_paddr, npages,
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index ea3c73e8f4f6..c49c2a28b0eb 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -103,6 +103,12 @@ int main(int argc, char *argv[])
103 103
104 vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); 104 vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
105 105
106 /* KVM should return supported EVMCS version range */
107 TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
108 (evmcs_ver & 0xff) > 0,
109 "Incorrect EVMCS version range: %x:%x\n",
110 evmcs_ver & 0xff, evmcs_ver >> 8);
111
106 run = vcpu_state(vm, VCPU_ID); 112 run = vcpu_state(vm, VCPU_ID);
107 113
108 vcpu_regs_get(vm, VCPU_ID, &regs1); 114 vcpu_regs_get(vm, VCPU_ID, &regs1);