aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFeng Wu <feng.wu@intel.com>2015-09-18 10:29:55 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2015-10-01 09:06:53 -0400
commitbf9f6ac8d74969690df1485b33b7c238ca9f2269 (patch)
treecfaa21f2831d959dca9bdb4c4ad9e329fec4d5e8
parent28b835d60fcc2498e717cf5e6f0c3691c24546f7 (diff)
KVM: Update Posted-Interrupts Descriptor when vCPU is blocked
This patch updates the Posted-Interrupts Descriptor when vCPU is blocked. pre-block: - Add the vCPU to the blocked per-CPU list - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR post-block: - Remove the vCPU from the per-CPU list Signed-off-by: Feng Wu <feng.wu@intel.com> [Concentrate invocation of pre/post-block hooks to vcpu_block. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--Documentation/virtual/kvm/locking.txt12
-rw-r--r--arch/x86/include/asm/kvm_host.h11
-rw-r--r--arch/x86/kvm/vmx.c153
-rw-r--r--arch/x86/kvm/x86.c34
-rw-r--r--include/linux/kvm_host.h3
-rw-r--r--virt/kvm/kvm_main.c3
6 files changed, 206 insertions, 10 deletions
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index d68af4dc3006..19f94a6b9bb0 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -166,3 +166,15 @@ Comment: The srcu read lock must be held while accessing memslots (e.g.
166 MMIO/PIO address->device structure mapping (kvm->buses). 166 MMIO/PIO address->device structure mapping (kvm->buses).
167 The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu 167 The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
168 if it is needed by multiple functions. 168 if it is needed by multiple functions.
169
170Name: blocked_vcpu_on_cpu_lock
171Type: spinlock_t
172Arch: x86
173Protects: blocked_vcpu_on_cpu
174Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
175 When VT-d posted-interrupts is supported and the VM has assigned
176 devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
177 protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
178 wakeup notification event since external interrupts from the
179 assigned devices happens, we will find the vCPU on the list to
180 wakeup.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 15664994b6f3..cdbdb559ecd2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -899,6 +899,17 @@ struct kvm_x86_ops {
899 /* pmu operations of sub-arch */ 899 /* pmu operations of sub-arch */
900 const struct kvm_pmu_ops *pmu_ops; 900 const struct kvm_pmu_ops *pmu_ops;
901 901
902 /*
903 * Architecture specific hooks for vCPU blocking due to
904 * HLT instruction.
905 * Returns for .pre_block():
906 * - 0 means continue to block the vCPU.
907 * - 1 means we cannot block the vCPU since some event
908 * happens during this period, such as, 'ON' bit in
909 * posted-interrupts descriptor is set.
910 */
911 int (*pre_block)(struct kvm_vcpu *vcpu);
912 void (*post_block)(struct kvm_vcpu *vcpu);
902 int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, 913 int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
903 uint32_t guest_irq, bool set); 914 uint32_t guest_irq, bool set);
904}; 915};
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 99f5c61954ea..c5c22831aee2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -878,6 +878,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
878static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); 878static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
879static DEFINE_PER_CPU(struct desc_ptr, host_gdt); 879static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
880 880
881/*
882 * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
883 * can find which vCPU should be waken up.
884 */
885static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
886static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
887
881static unsigned long *vmx_io_bitmap_a; 888static unsigned long *vmx_io_bitmap_a;
882static unsigned long *vmx_io_bitmap_b; 889static unsigned long *vmx_io_bitmap_b;
883static unsigned long *vmx_msr_bitmap_legacy; 890static unsigned long *vmx_msr_bitmap_legacy;
@@ -2986,6 +2993,8 @@ static int hardware_enable(void)
2986 return -EBUSY; 2993 return -EBUSY;
2987 2994
2988 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); 2995 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
2996 INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
2997 spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
2989 2998
2990 /* 2999 /*
2991 * Now we can enable the vmclear operation in kdump 3000 * Now we can enable the vmclear operation in kdump
@@ -6045,6 +6054,25 @@ static void update_ple_window_actual_max(void)
6045 ple_window_grow, INT_MIN); 6054 ple_window_grow, INT_MIN);
6046} 6055}
6047 6056
6057/*
6058 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
6059 */
6060static void wakeup_handler(void)
6061{
6062 struct kvm_vcpu *vcpu;
6063 int cpu = smp_processor_id();
6064
6065 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
6066 list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
6067 blocked_vcpu_list) {
6068 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
6069
6070 if (pi_test_on(pi_desc) == 1)
6071 kvm_vcpu_kick(vcpu);
6072 }
6073 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
6074}
6075
6048static __init int hardware_setup(void) 6076static __init int hardware_setup(void)
6049{ 6077{
6050 int r = -ENOMEM, i, msr; 6078 int r = -ENOMEM, i, msr;
@@ -6231,6 +6259,8 @@ static __init int hardware_setup(void)
6231 kvm_x86_ops->enable_log_dirty_pt_masked = NULL; 6259 kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
6232 } 6260 }
6233 6261
6262 kvm_set_posted_intr_wakeup_handler(wakeup_handler);
6263
6234 return alloc_kvm_area(); 6264 return alloc_kvm_area();
6235 6265
6236out8: 6266out8:
@@ -10432,6 +10462,126 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
10432} 10462}
10433 10463
10434/* 10464/*
10465 * This routine does the following things for vCPU which is going
10466 * to be blocked if VT-d PI is enabled.
10467 * - Store the vCPU to the wakeup list, so when interrupts happen
10468 * we can find the right vCPU to wake up.
10469 * - Change the Posted-interrupt descriptor as below:
10470 * 'NDST' <-- vcpu->pre_pcpu
10471 * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
10472 * - If 'ON' is set during this process, which means at least one
10473 * interrupt is posted for this vCPU, we cannot block it, in
10474 * this case, return 1, otherwise, return 0.
10475 *
10476 */
10477static int vmx_pre_block(struct kvm_vcpu *vcpu)
10478{
10479 unsigned long flags;
10480 unsigned int dest;
10481 struct pi_desc old, new;
10482 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
10483
10484 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
10485 !irq_remapping_cap(IRQ_POSTING_CAP))
10486 return 0;
10487
10488 vcpu->pre_pcpu = vcpu->cpu;
10489 spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
10490 vcpu->pre_pcpu), flags);
10491 list_add_tail(&vcpu->blocked_vcpu_list,
10492 &per_cpu(blocked_vcpu_on_cpu,
10493 vcpu->pre_pcpu));
10494 spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
10495 vcpu->pre_pcpu), flags);
10496
10497 do {
10498 old.control = new.control = pi_desc->control;
10499
10500 /*
10501 * We should not block the vCPU if
10502 * an interrupt is posted for it.
10503 */
10504 if (pi_test_on(pi_desc) == 1) {
10505 spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
10506 vcpu->pre_pcpu), flags);
10507 list_del(&vcpu->blocked_vcpu_list);
10508 spin_unlock_irqrestore(
10509 &per_cpu(blocked_vcpu_on_cpu_lock,
10510 vcpu->pre_pcpu), flags);
10511 vcpu->pre_pcpu = -1;
10512
10513 return 1;
10514 }
10515
10516 WARN((pi_desc->sn == 1),
10517 "Warning: SN field of posted-interrupts "
10518 "is set before blocking\n");
10519
10520 /*
10521 * Since vCPU can be preempted during this process,
10522 * vcpu->cpu could be different with pre_pcpu, we
10523 * need to set pre_pcpu as the destination of wakeup
10524 * notification event, then we can find the right vCPU
10525 * to wakeup in wakeup handler if interrupts happen
10526 * when the vCPU is in blocked state.
10527 */
10528 dest = cpu_physical_id(vcpu->pre_pcpu);
10529
10530 if (x2apic_enabled())
10531 new.ndst = dest;
10532 else
10533 new.ndst = (dest << 8) & 0xFF00;
10534
10535 /* set 'NV' to 'wakeup vector' */
10536 new.nv = POSTED_INTR_WAKEUP_VECTOR;
10537 } while (cmpxchg(&pi_desc->control, old.control,
10538 new.control) != old.control);
10539
10540 return 0;
10541}
10542
10543static void vmx_post_block(struct kvm_vcpu *vcpu)
10544{
10545 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
10546 struct pi_desc old, new;
10547 unsigned int dest;
10548 unsigned long flags;
10549
10550 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
10551 !irq_remapping_cap(IRQ_POSTING_CAP))
10552 return;
10553
10554 do {
10555 old.control = new.control = pi_desc->control;
10556
10557 dest = cpu_physical_id(vcpu->cpu);
10558
10559 if (x2apic_enabled())
10560 new.ndst = dest;
10561 else
10562 new.ndst = (dest << 8) & 0xFF00;
10563
10564 /* Allow posting non-urgent interrupts */
10565 new.sn = 0;
10566
10567 /* set 'NV' to 'notification vector' */
10568 new.nv = POSTED_INTR_VECTOR;
10569 } while (cmpxchg(&pi_desc->control, old.control,
10570 new.control) != old.control);
10571
10572 if(vcpu->pre_pcpu != -1) {
10573 spin_lock_irqsave(
10574 &per_cpu(blocked_vcpu_on_cpu_lock,
10575 vcpu->pre_pcpu), flags);
10576 list_del(&vcpu->blocked_vcpu_list);
10577 spin_unlock_irqrestore(
10578 &per_cpu(blocked_vcpu_on_cpu_lock,
10579 vcpu->pre_pcpu), flags);
10580 vcpu->pre_pcpu = -1;
10581 }
10582}
10583
10584/*
10435 * vmx_update_pi_irte - set IRTE for Posted-Interrupts 10585 * vmx_update_pi_irte - set IRTE for Posted-Interrupts
10436 * 10586 *
10437 * @kvm: kvm 10587 * @kvm: kvm
@@ -10622,6 +10772,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
10622 .flush_log_dirty = vmx_flush_log_dirty, 10772 .flush_log_dirty = vmx_flush_log_dirty,
10623 .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, 10773 .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
10624 10774
10775 .pre_block = vmx_pre_block,
10776 .post_block = vmx_post_block,
10777
10625 .pmu_ops = &intel_pmu_ops, 10778 .pmu_ops = &intel_pmu_ops,
10626 10779
10627 .update_pi_irte = vmx_update_pi_irte, 10780 .update_pi_irte = vmx_update_pi_irte,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b8425a769c0a..2d2c9bb0d6d6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6335,6 +6335,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6335 } 6335 }
6336 } 6336 }
6337 6337
6338 /*
6339 * KVM_REQ_EVENT is not set when posted interrupts are set by
6340 * VT-d hardware, so we have to update RVI unconditionally.
6341 */
6342 if (kvm_lapic_enabled(vcpu)) {
6343 /*
6344 * Update architecture specific hints for APIC
6345 * virtual interrupt delivery.
6346 */
6347 if (kvm_x86_ops->hwapic_irr_update)
6348 kvm_x86_ops->hwapic_irr_update(vcpu,
6349 kvm_lapic_find_highest_irr(vcpu));
6350 }
6351
6338 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 6352 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
6339 kvm_apic_accept_events(vcpu); 6353 kvm_apic_accept_events(vcpu);
6340 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 6354 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
@@ -6351,13 +6365,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6351 kvm_x86_ops->enable_irq_window(vcpu); 6365 kvm_x86_ops->enable_irq_window(vcpu);
6352 6366
6353 if (kvm_lapic_enabled(vcpu)) { 6367 if (kvm_lapic_enabled(vcpu)) {
6354 /*
6355 * Update architecture specific hints for APIC
6356 * virtual interrupt delivery.
6357 */
6358 if (kvm_x86_ops->hwapic_irr_update)
6359 kvm_x86_ops->hwapic_irr_update(vcpu,
6360 kvm_lapic_find_highest_irr(vcpu));
6361 update_cr8_intercept(vcpu); 6368 update_cr8_intercept(vcpu);
6362 kvm_lapic_sync_to_vapic(vcpu); 6369 kvm_lapic_sync_to_vapic(vcpu);
6363 } 6370 }
@@ -6493,10 +6500,15 @@ out:
6493 6500
6494static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) 6501static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
6495{ 6502{
6496 if (!kvm_arch_vcpu_runnable(vcpu)) { 6503 if (!kvm_arch_vcpu_runnable(vcpu) &&
6504 (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
6497 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 6505 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6498 kvm_vcpu_block(vcpu); 6506 kvm_vcpu_block(vcpu);
6499 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 6507 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6508
6509 if (kvm_x86_ops->post_block)
6510 kvm_x86_ops->post_block(vcpu);
6511
6500 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) 6512 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
6501 return 1; 6513 return 1;
6502 } 6514 }
@@ -6528,10 +6540,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
6528 6540
6529 for (;;) { 6541 for (;;) {
6530 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && 6542 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6531 !vcpu->arch.apf.halted) 6543 !vcpu->arch.apf.halted) {
6532 r = vcpu_enter_guest(vcpu); 6544 r = vcpu_enter_guest(vcpu);
6533 else 6545 } else {
6534 r = vcpu_block(kvm, vcpu); 6546 r = vcpu_block(kvm, vcpu);
6547 }
6548
6535 if (r <= 0) 6549 if (r <= 0)
6536 break; 6550 break;
6537 6551
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5c3f4538807f..9596a2f0977b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -234,6 +234,9 @@ struct kvm_vcpu {
234 unsigned long requests; 234 unsigned long requests;
235 unsigned long guest_debug; 235 unsigned long guest_debug;
236 236
237 int pre_pcpu;
238 struct list_head blocked_vcpu_list;
239
237 struct mutex mutex; 240 struct mutex mutex;
238 struct kvm_run *run; 241 struct kvm_run *run;
239 242
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index afd7ae6aec65..a75502c93c3e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
230 init_waitqueue_head(&vcpu->wq); 230 init_waitqueue_head(&vcpu->wq);
231 kvm_async_pf_vcpu_init(vcpu); 231 kvm_async_pf_vcpu_init(vcpu);
232 232
233 vcpu->pre_pcpu = -1;
234 INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
235
233 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 236 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
234 if (!page) { 237 if (!page) {
235 r = -ENOMEM; 238 r = -ENOMEM;