aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWincy Van <fanwenyi0529@gmail.com>2015-02-03 10:58:17 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2015-02-03 11:15:08 -0500
commit705699a139948a671cd66b915e8095c95fdf44d9 (patch)
tree7660d5125c4bf6039a62b43ab5fb84d9b62969a8
parent608406e290ca31d8f217cb765ee50152b41a7c9c (diff)
KVM: nVMX: Enable nested posted interrupt processing
If vcpu has a interrupt in vmx non-root mode, injecting that interrupt requires a vmexit. With posted interrupt processing, the vmexit is not needed, and interrupts are fully taken care of by hardware. In nested vmx, this feature avoids much more vmexits than non-nested vmx. When L1 asks L0 to deliver L1's posted interrupt vector, and the target VCPU is in non-root mode, we use a physical ipi to deliver POSTED_INTR_NV to the target vCPU. Using POSTED_INTR_NV avoids unexpected interrupts if a concurrent vmexit happens and L1's vector is different with L0's. The IPI triggers posted interrupt processing in the target physical CPU. In case the target vCPU was not in guest mode, complete the posted interrupt delivery on the next entry to L2. Signed-off-by: Wincy Van <fanwenyi0529@gmail.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/x86/kvm/lapic.c13
-rw-r--r--arch/x86/kvm/lapic.h1
-rw-r--r--arch/x86/kvm/vmx.c154
3 files changed, 161 insertions, 7 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3481e9eee8f4..86609c15726f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -325,17 +325,24 @@ static u8 count_vectors(void *bitmap)
325 return count; 325 return count;
326} 326}
327 327
328void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) 328void __kvm_apic_update_irr(u32 *pir, void *regs)
329{ 329{
330 u32 i, pir_val; 330 u32 i, pir_val;
331 struct kvm_lapic *apic = vcpu->arch.apic;
332 331
333 for (i = 0; i <= 7; i++) { 332 for (i = 0; i <= 7; i++) {
334 pir_val = xchg(&pir[i], 0); 333 pir_val = xchg(&pir[i], 0);
335 if (pir_val) 334 if (pir_val)
336 *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; 335 *((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
337 } 336 }
338} 337}
338EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
339
340void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
341{
342 struct kvm_lapic *apic = vcpu->arch.apic;
343
344 __kvm_apic_update_irr(pir, apic->regs);
345}
339EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 346EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
340 347
341static inline void apic_set_irr(int vec, struct kvm_lapic *apic) 348static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index c1ef25c89508..0bc6c656625b 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -57,6 +57,7 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
57void kvm_apic_set_version(struct kvm_vcpu *vcpu); 57void kvm_apic_set_version(struct kvm_vcpu *vcpu);
58 58
59void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); 59void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
60void __kvm_apic_update_irr(u32 *pir, void *regs);
60void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); 61void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
61int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 62int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
62 unsigned long *dest_map); 63 unsigned long *dest_map);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 955eff21f14a..6e112472b0b3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -218,6 +218,7 @@ struct __packed vmcs12 {
218 u64 tsc_offset; 218 u64 tsc_offset;
219 u64 virtual_apic_page_addr; 219 u64 virtual_apic_page_addr;
220 u64 apic_access_addr; 220 u64 apic_access_addr;
221 u64 posted_intr_desc_addr;
221 u64 ept_pointer; 222 u64 ept_pointer;
222 u64 eoi_exit_bitmap0; 223 u64 eoi_exit_bitmap0;
223 u64 eoi_exit_bitmap1; 224 u64 eoi_exit_bitmap1;
@@ -337,6 +338,7 @@ struct __packed vmcs12 {
337 u32 vmx_preemption_timer_value; 338 u32 vmx_preemption_timer_value;
338 u32 padding32[7]; /* room for future expansion */ 339 u32 padding32[7]; /* room for future expansion */
339 u16 virtual_processor_id; 340 u16 virtual_processor_id;
341 u16 posted_intr_nv;
340 u16 guest_es_selector; 342 u16 guest_es_selector;
341 u16 guest_cs_selector; 343 u16 guest_cs_selector;
342 u16 guest_ss_selector; 344 u16 guest_ss_selector;
@@ -409,6 +411,10 @@ struct nested_vmx {
409 */ 411 */
410 struct page *apic_access_page; 412 struct page *apic_access_page;
411 struct page *virtual_apic_page; 413 struct page *virtual_apic_page;
414 struct page *pi_desc_page;
415 struct pi_desc *pi_desc;
416 bool pi_pending;
417 u16 posted_intr_nv;
412 u64 msr_ia32_feature_control; 418 u64 msr_ia32_feature_control;
413 419
414 struct hrtimer preemption_timer; 420 struct hrtimer preemption_timer;
@@ -623,6 +629,7 @@ static int max_shadow_read_write_fields =
623 629
624static const unsigned short vmcs_field_to_offset_table[] = { 630static const unsigned short vmcs_field_to_offset_table[] = {
625 FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), 631 FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
632 FIELD(POSTED_INTR_NV, posted_intr_nv),
626 FIELD(GUEST_ES_SELECTOR, guest_es_selector), 633 FIELD(GUEST_ES_SELECTOR, guest_es_selector),
627 FIELD(GUEST_CS_SELECTOR, guest_cs_selector), 634 FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
628 FIELD(GUEST_SS_SELECTOR, guest_ss_selector), 635 FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
@@ -648,6 +655,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
648 FIELD64(TSC_OFFSET, tsc_offset), 655 FIELD64(TSC_OFFSET, tsc_offset),
649 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), 656 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
650 FIELD64(APIC_ACCESS_ADDR, apic_access_addr), 657 FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
658 FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
651 FIELD64(EPT_POINTER, ept_pointer), 659 FIELD64(EPT_POINTER, ept_pointer),
652 FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), 660 FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
653 FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), 661 FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
@@ -800,6 +808,7 @@ static void kvm_cpu_vmxon(u64 addr);
800static void kvm_cpu_vmxoff(void); 808static void kvm_cpu_vmxoff(void);
801static bool vmx_mpx_supported(void); 809static bool vmx_mpx_supported(void);
802static bool vmx_xsaves_supported(void); 810static bool vmx_xsaves_supported(void);
811static int vmx_vm_has_apicv(struct kvm *kvm);
803static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); 812static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
804static void vmx_set_segment(struct kvm_vcpu *vcpu, 813static void vmx_set_segment(struct kvm_vcpu *vcpu,
805 struct kvm_segment *var, int seg); 814 struct kvm_segment *var, int seg);
@@ -1157,6 +1166,11 @@ static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12)
1157 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 1166 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
1158} 1167}
1159 1168
1169static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
1170{
1171 return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
1172}
1173
1160static inline bool is_exception(u32 intr_info) 1174static inline bool is_exception(u32 intr_info)
1161{ 1175{
1162 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) 1176 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2360,6 +2374,9 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
2360 vmx->nested.nested_vmx_pinbased_ctls_high |= 2374 vmx->nested.nested_vmx_pinbased_ctls_high |=
2361 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | 2375 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
2362 PIN_BASED_VMX_PREEMPTION_TIMER; 2376 PIN_BASED_VMX_PREEMPTION_TIMER;
2377 if (vmx_vm_has_apicv(vmx->vcpu.kvm))
2378 vmx->nested.nested_vmx_pinbased_ctls_high |=
2379 PIN_BASED_POSTED_INTR;
2363 2380
2364 /* exit controls */ 2381 /* exit controls */
2365 rdmsr(MSR_IA32_VMX_EXIT_CTLS, 2382 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
@@ -4312,6 +4329,64 @@ static int vmx_vm_has_apicv(struct kvm *kvm)
4312 return enable_apicv && irqchip_in_kernel(kvm); 4329 return enable_apicv && irqchip_in_kernel(kvm);
4313} 4330}
4314 4331
4332static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
4333{
4334 struct vcpu_vmx *vmx = to_vmx(vcpu);
4335 int max_irr;
4336 void *vapic_page;
4337 u16 status;
4338
4339 if (vmx->nested.pi_desc &&
4340 vmx->nested.pi_pending) {
4341 vmx->nested.pi_pending = false;
4342 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
4343 return 0;
4344
4345 max_irr = find_last_bit(
4346 (unsigned long *)vmx->nested.pi_desc->pir, 256);
4347
4348 if (max_irr == 256)
4349 return 0;
4350
4351 vapic_page = kmap(vmx->nested.virtual_apic_page);
4352 if (!vapic_page) {
4353 WARN_ON(1);
4354 return -ENOMEM;
4355 }
4356 __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
4357 kunmap(vmx->nested.virtual_apic_page);
4358
4359 status = vmcs_read16(GUEST_INTR_STATUS);
4360 if ((u8)max_irr > ((u8)status & 0xff)) {
4361 status &= ~0xff;
4362 status |= (u8)max_irr;
4363 vmcs_write16(GUEST_INTR_STATUS, status);
4364 }
4365 }
4366 return 0;
4367}
4368
4369static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
4370 int vector)
4371{
4372 struct vcpu_vmx *vmx = to_vmx(vcpu);
4373
4374 if (is_guest_mode(vcpu) &&
4375 vector == vmx->nested.posted_intr_nv) {
4376 /* the PIR and ON have been set by L1. */
4377 if (vcpu->mode == IN_GUEST_MODE)
4378 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
4379 POSTED_INTR_VECTOR);
4380 /*
4381 * If a posted intr is not recognized by hardware,
4382 * we will accomplish it in the next vmentry.
4383 */
4384 vmx->nested.pi_pending = true;
4385 kvm_make_request(KVM_REQ_EVENT, vcpu);
4386 return 0;
4387 }
4388 return -1;
4389}
4315/* 4390/*
4316 * Send interrupt to vcpu via posted interrupt way. 4391 * Send interrupt to vcpu via posted interrupt way.
4317 * 1. If target vcpu is running(non-root mode), send posted interrupt 4392 * 1. If target vcpu is running(non-root mode), send posted interrupt
@@ -4324,6 +4399,10 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
4324 struct vcpu_vmx *vmx = to_vmx(vcpu); 4399 struct vcpu_vmx *vmx = to_vmx(vcpu);
4325 int r; 4400 int r;
4326 4401
4402 r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
4403 if (!r)
4404 return;
4405
4327 if (pi_test_and_set_pir(vector, &vmx->pi_desc)) 4406 if (pi_test_and_set_pir(vector, &vmx->pi_desc))
4328 return; 4407 return;
4329 4408
@@ -6585,6 +6664,7 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
6585 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 6664 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
6586 vmcs_write64(VMCS_LINK_POINTER, -1ull); 6665 vmcs_write64(VMCS_LINK_POINTER, -1ull);
6587 } 6666 }
6667 vmx->nested.posted_intr_nv = -1;
6588 kunmap(vmx->nested.current_vmcs12_page); 6668 kunmap(vmx->nested.current_vmcs12_page);
6589 nested_release_page(vmx->nested.current_vmcs12_page); 6669 nested_release_page(vmx->nested.current_vmcs12_page);
6590 vmx->nested.current_vmptr = -1ull; 6670 vmx->nested.current_vmptr = -1ull;
@@ -6613,6 +6693,12 @@ static void free_nested(struct vcpu_vmx *vmx)
6613 nested_release_page(vmx->nested.virtual_apic_page); 6693 nested_release_page(vmx->nested.virtual_apic_page);
6614 vmx->nested.virtual_apic_page = NULL; 6694 vmx->nested.virtual_apic_page = NULL;
6615 } 6695 }
6696 if (vmx->nested.pi_desc_page) {
6697 kunmap(vmx->nested.pi_desc_page);
6698 nested_release_page(vmx->nested.pi_desc_page);
6699 vmx->nested.pi_desc_page = NULL;
6700 vmx->nested.pi_desc = NULL;
6701 }
6616 6702
6617 nested_free_all_saved_vmcss(vmx); 6703 nested_free_all_saved_vmcss(vmx);
6618} 6704}
@@ -8320,6 +8406,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
8320 if (nested) 8406 if (nested)
8321 nested_vmx_setup_ctls_msrs(vmx); 8407 nested_vmx_setup_ctls_msrs(vmx);
8322 8408
8409 vmx->nested.posted_intr_nv = -1;
8323 vmx->nested.current_vmptr = -1ull; 8410 vmx->nested.current_vmptr = -1ull;
8324 vmx->nested.current_vmcs12 = NULL; 8411 vmx->nested.current_vmcs12 = NULL;
8325 8412
@@ -8565,6 +8652,31 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
8565 return false; 8652 return false;
8566 } 8653 }
8567 8654
8655 if (nested_cpu_has_posted_intr(vmcs12)) {
8656 if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64))
8657 return false;
8658
8659 if (vmx->nested.pi_desc_page) { /* shouldn't happen */
8660 kunmap(vmx->nested.pi_desc_page);
8661 nested_release_page(vmx->nested.pi_desc_page);
8662 }
8663 vmx->nested.pi_desc_page =
8664 nested_get_page(vcpu, vmcs12->posted_intr_desc_addr);
8665 if (!vmx->nested.pi_desc_page)
8666 return false;
8667
8668 vmx->nested.pi_desc =
8669 (struct pi_desc *)kmap(vmx->nested.pi_desc_page);
8670 if (!vmx->nested.pi_desc) {
8671 nested_release_page_clean(vmx->nested.pi_desc_page);
8672 return false;
8673 }
8674 vmx->nested.pi_desc =
8675 (struct pi_desc *)((void *)vmx->nested.pi_desc +
8676 (unsigned long)(vmcs12->posted_intr_desc_addr &
8677 (PAGE_SIZE - 1)));
8678 }
8679
8568 return true; 8680 return true;
8569} 8681}
8570 8682
@@ -8700,7 +8812,8 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
8700{ 8812{
8701 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && 8813 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
8702 !nested_cpu_has_apic_reg_virt(vmcs12) && 8814 !nested_cpu_has_apic_reg_virt(vmcs12) &&
8703 !nested_cpu_has_vid(vmcs12)) 8815 !nested_cpu_has_vid(vmcs12) &&
8816 !nested_cpu_has_posted_intr(vmcs12))
8704 return 0; 8817 return 0;
8705 8818
8706 /* 8819 /*
@@ -8719,6 +8832,17 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
8719 !nested_exit_on_intr(vcpu)) 8832 !nested_exit_on_intr(vcpu))
8720 return -EINVAL; 8833 return -EINVAL;
8721 8834
8835 /*
8836 * bits 15:8 should be zero in posted_intr_nv,
8837 * the descriptor address has been already checked
8838 * in nested_get_vmcs12_pages.
8839 */
8840 if (nested_cpu_has_posted_intr(vmcs12) &&
8841 (!nested_cpu_has_vid(vmcs12) ||
8842 !nested_exit_intr_ack_set(vcpu) ||
8843 vmcs12->posted_intr_nv & 0xff00))
8844 return -EINVAL;
8845
8722 /* tpr shadow is needed by all apicv features. */ 8846 /* tpr shadow is needed by all apicv features. */
8723 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) 8847 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
8724 return -EINVAL; 8848 return -EINVAL;
@@ -8961,8 +9085,23 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8961 9085
8962 exec_control = vmcs12->pin_based_vm_exec_control; 9086 exec_control = vmcs12->pin_based_vm_exec_control;
8963 exec_control |= vmcs_config.pin_based_exec_ctrl; 9087 exec_control |= vmcs_config.pin_based_exec_ctrl;
8964 exec_control &= ~(PIN_BASED_VMX_PREEMPTION_TIMER | 9088 exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
8965 PIN_BASED_POSTED_INTR); 9089
9090 if (nested_cpu_has_posted_intr(vmcs12)) {
9091 /*
9092 * Note that we use L0's vector here and in
9093 * vmx_deliver_nested_posted_interrupt.
9094 */
9095 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
9096 vmx->nested.pi_pending = false;
9097 vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
9098 vmcs_write64(POSTED_INTR_DESC_ADDR,
9099 page_to_phys(vmx->nested.pi_desc_page) +
9100 (unsigned long)(vmcs12->posted_intr_desc_addr &
9101 (PAGE_SIZE - 1)));
9102 } else
9103 exec_control &= ~PIN_BASED_POSTED_INTR;
9104
8966 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); 9105 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
8967 9106
8968 vmx->nested.preemption_timer_expired = false; 9107 vmx->nested.preemption_timer_expired = false;
@@ -9498,9 +9637,10 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
9498 if (vmx->nested.nested_run_pending) 9637 if (vmx->nested.nested_run_pending)
9499 return -EBUSY; 9638 return -EBUSY;
9500 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); 9639 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
9640 return 0;
9501 } 9641 }
9502 9642
9503 return 0; 9643 return vmx_complete_nested_posted_interrupt(vcpu);
9504} 9644}
9505 9645
9506static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) 9646static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
@@ -9878,6 +10018,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
9878 nested_release_page(vmx->nested.virtual_apic_page); 10018 nested_release_page(vmx->nested.virtual_apic_page);
9879 vmx->nested.virtual_apic_page = NULL; 10019 vmx->nested.virtual_apic_page = NULL;
9880 } 10020 }
10021 if (vmx->nested.pi_desc_page) {
10022 kunmap(vmx->nested.pi_desc_page);
10023 nested_release_page(vmx->nested.pi_desc_page);
10024 vmx->nested.pi_desc_page = NULL;
10025 vmx->nested.pi_desc = NULL;
10026 }
9881 10027
9882 /* 10028 /*
9883 * We are now running in L2, mmu_notifier will force to reload the 10029 * We are now running in L2, mmu_notifier will force to reload the