aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWanpeng Li <wanpengli@tencent.com>2018-07-23 02:39:54 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2018-08-06 11:59:20 -0400
commit4180bf1b655a791a0a6ef93a2ffffc762722c782 (patch)
treeb15142899c6343af5a3adf015d0528277820748c
parent74fec5b9dbaa5e6fe776f6c73e6c00fb23dca844 (diff)
KVM: X86: Implement "send IPI" hypercall
Using hypercall to send IPIs by one vmexit instead of one by one for xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster mode. Intel guest can enter x2apic cluster mode when interrupt remmaping is enabled in qemu, however, latest AMD EPYC still just supports xapic mode which can get great improvement by Exit-less IPIs. This patchset lets a guest send multicast IPIs, with at most 128 destinations per hypercall in 64-bit mode and 64 vCPUs per hypercall in 32-bit mode. Hardware: Xeon Skylake 2.5GHz, 2 sockets, 40 cores, 80 threads, the VM is 80 vCPUs, IPI microbenchmark(https://lkml.org/lkml/2017/12/19/141): x2apic cluster mode, vanilla Dry-run: 0, 2392199 ns Self-IPI: 6907514, 15027589 ns Normal IPI: 223910476, 251301666 ns Broadcast IPI: 0, 9282161150 ns Broadcast lock: 0, 8812934104 ns x2apic cluster mode, pv-ipi Dry-run: 0, 2449341 ns Self-IPI: 6720360, 15028732 ns Normal IPI: 228643307, 255708477 ns Broadcast IPI: 0, 7572293590 ns => 22% performance boost Broadcast lock: 0, 8316124651 ns x2apic physical mode, vanilla Dry-run: 0, 3135933 ns Self-IPI: 8572670, 17901757 ns Normal IPI: 226444334, 255421709 ns Broadcast IPI: 0, 19845070887 ns Broadcast lock: 0, 19827383656 ns x2apic physical mode, pv-ipi Dry-run: 0, 2446381 ns Self-IPI: 6788217, 15021056 ns Normal IPI: 219454441, 249583458 ns Broadcast IPI: 0, 7806540019 ns => 154% performance boost Broadcast lock: 0, 9143618799 ns Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Vitaly Kuznetsov <vkuznets@redhat.com> Signed-off-by: Wanpeng Li <wanpengli@tencent.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--Documentation/virtual/kvm/cpuid.txt4
-rw-r--r--Documentation/virtual/kvm/hypercalls.txt20
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/lapic.c40
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--include/uapi/linux/kvm_para.h1
7 files changed, 74 insertions, 1 deletions
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index ab022dcd0911..97ca1940a0dc 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
62 || || can be enabled by setting bit 2 62 || || can be enabled by setting bit 2
63 || || when writing to msr 0x4b564d02 63 || || when writing to msr 0x4b564d02
64------------------------------------------------------------------------------ 64------------------------------------------------------------------------------
65KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
66 || || before using paravirtualized
67 || || send IPIs.
68------------------------------------------------------------------------------
65KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side 69KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
66 || || per-cpu warps are expected in 70 || || per-cpu warps are expected in
67 || || kvmclock. 71 || || kvmclock.
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index a890529c63ed..da24c138c8d1 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -121,3 +121,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
121 121
122Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource, 122Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
123or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. 123or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
124
1256. KVM_HC_SEND_IPI
126------------------------
127Architecture: x86
128Status: active
129Purpose: Send IPIs to multiple vCPUs.
130
131a0: lower part of the bitmap of destination APIC IDs
132a1: higher part of the bitmap of destination APIC IDs
133a2: the lowest APIC ID in bitmap
134a3: APIC ICR
135
136The hypercall lets a guest send multicast IPIs, with at most 128
137128 destinations per hypercall in 64-bit mode and 64 vCPUs per
138hypercall in 32-bit mode. The destinations are represented by a
139bitmap contained in the first two arguments (a0 and a1). Bit 0 of
140a0 corresponds to the APIC ID in the third argument (a2), bit 1
141corresponds to the APIC ID a2+1, and so on.
142
143Returns the number of CPUs to which the IPIs were delivered successfully.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 150937e64f63..c18958ef17d2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1457,6 +1457,10 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
1457void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); 1457void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
1458void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); 1458void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
1459 1459
1460int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
1461 unsigned long ipi_bitmap_high, int min,
1462 unsigned long icr, int op_64_bit);
1463
1460void kvm_define_shared_msr(unsigned index, u32 msr); 1464void kvm_define_shared_msr(unsigned index, u32 msr);
1461int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 1465int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
1462 1466
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7e042e3d47fd..7bcfa61375c0 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
621 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | 621 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
622 (1 << KVM_FEATURE_PV_UNHALT) | 622 (1 << KVM_FEATURE_PV_UNHALT) |
623 (1 << KVM_FEATURE_PV_TLB_FLUSH) | 623 (1 << KVM_FEATURE_PV_TLB_FLUSH) |
624 (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); 624 (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
625 (1 << KVM_FEATURE_PV_SEND_IPI);
625 626
626 if (sched_info_on()) 627 if (sched_info_on())
627 entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); 628 entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b5cd8465d44f..f0d693122c24 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -547,6 +547,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
547 irq->level, irq->trig_mode, dest_map); 547 irq->level, irq->trig_mode, dest_map);
548} 548}
549 549
550int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
551 unsigned long ipi_bitmap_high, int min,
552 unsigned long icr, int op_64_bit)
553{
554 int i;
555 struct kvm_apic_map *map;
556 struct kvm_vcpu *vcpu;
557 struct kvm_lapic_irq irq = {0};
558 int cluster_size = op_64_bit ? 64 : 32;
559 int count = 0;
560
561 irq.vector = icr & APIC_VECTOR_MASK;
562 irq.delivery_mode = icr & APIC_MODE_MASK;
563 irq.level = (icr & APIC_INT_ASSERT) != 0;
564 irq.trig_mode = icr & APIC_INT_LEVELTRIG;
565
566 if (icr & APIC_DEST_MASK)
567 return -KVM_EINVAL;
568 if (icr & APIC_SHORT_MASK)
569 return -KVM_EINVAL;
570
571 rcu_read_lock();
572 map = rcu_dereference(kvm->arch.apic_map);
573
574 /* Bits above cluster_size are masked in the caller. */
575 for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
576 vcpu = map->phys_map[min + i]->vcpu;
577 count += kvm_apic_set_irq(vcpu, &irq, NULL);
578 }
579
580 min += cluster_size;
581 for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
582 vcpu = map->phys_map[min + i]->vcpu;
583 count += kvm_apic_set_irq(vcpu, &irq, NULL);
584 }
585
586 rcu_read_unlock();
587 return count;
588}
589
550static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 590static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
551{ 591{
552 592
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b974802cadb..3c83711c0ebe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6802,6 +6802,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
6802 case KVM_HC_CLOCK_PAIRING: 6802 case KVM_HC_CLOCK_PAIRING:
6803 ret = kvm_pv_clock_pairing(vcpu, a0, a1); 6803 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
6804 break; 6804 break;
6805 case KVM_HC_SEND_IPI:
6806 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
6807 break;
6805#endif 6808#endif
6806 default: 6809 default:
6807 ret = -KVM_ENOSYS; 6810 ret = -KVM_ENOSYS;
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index dcf629dd2889..f3893ef82b65 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -13,6 +13,7 @@
13/* Return values for hypercalls */ 13/* Return values for hypercalls */
14#define KVM_ENOSYS 1000 14#define KVM_ENOSYS 1000
15#define KVM_EFAULT EFAULT 15#define KVM_EFAULT EFAULT
16#define KVM_EINVAL EINVAL
16#define KVM_E2BIG E2BIG 17#define KVM_E2BIG E2BIG
17#define KVM_EPERM EPERM 18#define KVM_EPERM EPERM
18#define KVM_EOPNOTSUPP 95 19#define KVM_EOPNOTSUPP 95