aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Borntraeger <borntraeger@de.ibm.com>2014-01-16 07:44:20 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2014-05-05 10:29:11 -0400
commit719d93cd5f5c5c8775b7a38192069e8e1d1ac46e (patch)
treee6d7703b4b69acf92db962fbe96d9f7c380484c8
parent57b5981cd38cbca3554c5e663b2361d9adea70c2 (diff)
kvm/irqchip: Speed up KVM_SET_GSI_ROUTING
When starting lots of dataplane devices the bootup takes very long on Christian's s390 with irqfd patches. With larger setups he is even able to trigger some timeouts in some components. Turns out that the KVM_SET_GSI_ROUTING ioctl takes very long (strace claims up to 0.1 sec) when having multiple CPUs. This is caused by the synchronize_rcu and the HZ=100 of s390. By changing the code to use a private srcu we can speed things up. This patch reduces the boot time till mounting root from 8 to 2 seconds on my s390 guest with 100 disks. Uses of hlist_for_each_entry_rcu, hlist_add_head_rcu, hlist_del_init_rcu are fine because they do not have lockdep checks (hlist_for_each_entry_rcu uses rcu_dereference_raw rather than rcu_dereference, and write-sides do not do rcu lockdep at all). Note that we're hardly relying on the "sleepable" part of srcu. We just want SRCU's faster detection of grace periods. Testing was done by Andrew Theurer using netperf tests STREAM, MAERTS and RR. The difference between results "before" and "after" the patch has mean -0.2% and standard deviation 0.6%. Using a paired t-test on the data points says that there is a 2.5% probability that the patch is the cause of the performance difference (rather than a random fluctuation). (Restricting the t-test to RR, which is the most likely to be affected, changes the numbers to respectively -0.3% mean, 0.7% stdev, and 8% probability that the numbers actually say something about the patch. The probability increases mostly because there are fewer data points). Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Michael S. Tsirkin <mst@redhat.com> Tested-by: Christian Borntraeger <borntraeger@de.ibm.com> # s390 Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--virt/kvm/eventfd.c25
-rw-r--r--virt/kvm/irq_comm.c17
-rw-r--r--virt/kvm/irqchip.c31
-rw-r--r--virt/kvm/kvm_main.c16
5 files changed, 51 insertions, 39 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1e125b055327..970c68197c69 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -370,6 +370,7 @@ struct kvm {
370 struct mm_struct *mm; /* userspace tied to this vm */ 370 struct mm_struct *mm; /* userspace tied to this vm */
371 struct kvm_memslots *memslots; 371 struct kvm_memslots *memslots;
372 struct srcu_struct srcu; 372 struct srcu_struct srcu;
373 struct srcu_struct irq_srcu;
373#ifdef CONFIG_KVM_APIC_ARCHITECTURE 374#ifdef CONFIG_KVM_APIC_ARCHITECTURE
374 u32 bsp_vcpu_id; 375 u32 bsp_vcpu_id;
375#endif 376#endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 912ec5a95e2c..20c3af7692c5 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -31,6 +31,7 @@
31#include <linux/list.h> 31#include <linux/list.h>
32#include <linux/eventfd.h> 32#include <linux/eventfd.h>
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/srcu.h>
34#include <linux/slab.h> 35#include <linux/slab.h>
35 36
36#include "iodev.h" 37#include "iodev.h"
@@ -118,19 +119,22 @@ static void
118irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) 119irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
119{ 120{
120 struct _irqfd_resampler *resampler; 121 struct _irqfd_resampler *resampler;
122 struct kvm *kvm;
121 struct _irqfd *irqfd; 123 struct _irqfd *irqfd;
124 int idx;
122 125
123 resampler = container_of(kian, struct _irqfd_resampler, notifier); 126 resampler = container_of(kian, struct _irqfd_resampler, notifier);
127 kvm = resampler->kvm;
124 128
125 kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 129 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
126 resampler->notifier.gsi, 0, false); 130 resampler->notifier.gsi, 0, false);
127 131
128 rcu_read_lock(); 132 idx = srcu_read_lock(&kvm->irq_srcu);
129 133
130 list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) 134 list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
131 eventfd_signal(irqfd->resamplefd, 1); 135 eventfd_signal(irqfd->resamplefd, 1);
132 136
133 rcu_read_unlock(); 137 srcu_read_unlock(&kvm->irq_srcu, idx);
134} 138}
135 139
136static void 140static void
@@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
142 mutex_lock(&kvm->irqfds.resampler_lock); 146 mutex_lock(&kvm->irqfds.resampler_lock);
143 147
144 list_del_rcu(&irqfd->resampler_link); 148 list_del_rcu(&irqfd->resampler_link);
145 synchronize_rcu(); 149 synchronize_srcu(&kvm->irq_srcu);
146 150
147 if (list_empty(&resampler->list)) { 151 if (list_empty(&resampler->list)) {
148 list_del(&resampler->link); 152 list_del(&resampler->link);
@@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
221 unsigned long flags = (unsigned long)key; 225 unsigned long flags = (unsigned long)key;
222 struct kvm_kernel_irq_routing_entry *irq; 226 struct kvm_kernel_irq_routing_entry *irq;
223 struct kvm *kvm = irqfd->kvm; 227 struct kvm *kvm = irqfd->kvm;
228 int idx;
224 229
225 if (flags & POLLIN) { 230 if (flags & POLLIN) {
226 rcu_read_lock(); 231 idx = srcu_read_lock(&kvm->irq_srcu);
227 irq = rcu_dereference(irqfd->irq_entry); 232 irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu);
228 /* An event has been signaled, inject an interrupt */ 233 /* An event has been signaled, inject an interrupt */
229 if (irq) 234 if (irq)
230 kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, 235 kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
231 false); 236 false);
232 else 237 else
233 schedule_work(&irqfd->inject); 238 schedule_work(&irqfd->inject);
234 rcu_read_unlock(); 239 srcu_read_unlock(&kvm->irq_srcu, idx);
235 } 240 }
236 241
237 if (flags & POLLHUP) { 242 if (flags & POLLHUP) {
@@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
363 } 368 }
364 369
365 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); 370 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
366 synchronize_rcu(); 371 synchronize_srcu(&kvm->irq_srcu);
367 372
368 mutex_unlock(&kvm->irqfds.resampler_lock); 373 mutex_unlock(&kvm->irqfds.resampler_lock);
369 } 374 }
@@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
465 * another thread calls kvm_irq_routing_update before 470 * another thread calls kvm_irq_routing_update before
466 * we flush workqueue below (we synchronize with 471 * we flush workqueue below (we synchronize with
467 * kvm_irq_routing_update using irqfds.lock). 472 * kvm_irq_routing_update using irqfds.lock).
468 * It is paired with synchronize_rcu done by caller 473 * It is paired with synchronize_srcu done by caller
469 * of that function. 474 * of that function.
470 */ 475 */
471 rcu_assign_pointer(irqfd->irq_entry, NULL); 476 rcu_assign_pointer(irqfd->irq_entry, NULL);
@@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm)
524 529
525/* 530/*
526 * Change irq_routing and irqfd. 531 * Change irq_routing and irqfd.
527 * Caller must invoke synchronize_rcu afterwards. 532 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
528 */ 533 */
529void kvm_irq_routing_update(struct kvm *kvm, 534void kvm_irq_routing_update(struct kvm *kvm,
530 struct kvm_irq_routing_table *irq_rt) 535 struct kvm_irq_routing_table *irq_rt)
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e2e6b4473a96..ced4a542a031 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
163 struct kvm_kernel_irq_routing_entry *e; 163 struct kvm_kernel_irq_routing_entry *e;
164 int ret = -EINVAL; 164 int ret = -EINVAL;
165 struct kvm_irq_routing_table *irq_rt; 165 struct kvm_irq_routing_table *irq_rt;
166 int idx;
166 167
167 trace_kvm_set_irq(irq, level, irq_source_id); 168 trace_kvm_set_irq(irq, level, irq_source_id);
168 169
@@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
174 * Since there's no easy way to do this, we only support injecting MSI 175 * Since there's no easy way to do this, we only support injecting MSI
175 * which is limited to 1:1 GSI mapping. 176 * which is limited to 1:1 GSI mapping.
176 */ 177 */
177 rcu_read_lock(); 178 idx = srcu_read_lock(&kvm->irq_srcu);
178 irq_rt = rcu_dereference(kvm->irq_routing); 179 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
179 if (irq < irq_rt->nr_rt_entries) 180 if (irq < irq_rt->nr_rt_entries)
180 hlist_for_each_entry(e, &irq_rt->map[irq], link) { 181 hlist_for_each_entry(e, &irq_rt->map[irq], link) {
181 if (likely(e->type == KVM_IRQ_ROUTING_MSI)) 182 if (likely(e->type == KVM_IRQ_ROUTING_MSI))
@@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
184 ret = -EWOULDBLOCK; 185 ret = -EWOULDBLOCK;
185 break; 186 break;
186 } 187 }
187 rcu_read_unlock(); 188 srcu_read_unlock(&kvm->irq_srcu, idx);
188 return ret; 189 return ret;
189} 190}
190 191
@@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
253 mutex_lock(&kvm->irq_lock); 254 mutex_lock(&kvm->irq_lock);
254 hlist_del_rcu(&kimn->link); 255 hlist_del_rcu(&kimn->link);
255 mutex_unlock(&kvm->irq_lock); 256 mutex_unlock(&kvm->irq_lock);
256 synchronize_rcu(); 257 synchronize_srcu(&kvm->irq_srcu);
257} 258}
258 259
259void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, 260void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
260 bool mask) 261 bool mask)
261{ 262{
262 struct kvm_irq_mask_notifier *kimn; 263 struct kvm_irq_mask_notifier *kimn;
263 int gsi; 264 int idx, gsi;
264 265
265 rcu_read_lock(); 266 idx = srcu_read_lock(&kvm->irq_srcu);
266 gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; 267 gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
267 if (gsi != -1) 268 if (gsi != -1)
268 hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) 269 hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
269 if (kimn->irq == gsi) 270 if (kimn->irq == gsi)
270 kimn->func(kimn, mask); 271 kimn->func(kimn, mask);
271 rcu_read_unlock(); 272 srcu_read_unlock(&kvm->irq_srcu, idx);
272} 273}
273 274
274int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, 275int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 20dc9e4a8f6c..b43c275775cd 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -26,6 +26,7 @@
26 26
27#include <linux/kvm_host.h> 27#include <linux/kvm_host.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/srcu.h>
29#include <linux/export.h> 30#include <linux/export.h>
30#include <trace/events/kvm.h> 31#include <trace/events/kvm.h>
31#include "irq.h" 32#include "irq.h"
@@ -33,19 +34,19 @@
33bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) 34bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
34{ 35{
35 struct kvm_irq_ack_notifier *kian; 36 struct kvm_irq_ack_notifier *kian;
36 int gsi; 37 int gsi, idx;
37 38
38 rcu_read_lock(); 39 idx = srcu_read_lock(&kvm->irq_srcu);
39 gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; 40 gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
40 if (gsi != -1) 41 if (gsi != -1)
41 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 42 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
42 link) 43 link)
43 if (kian->gsi == gsi) { 44 if (kian->gsi == gsi) {
44 rcu_read_unlock(); 45 srcu_read_unlock(&kvm->irq_srcu, idx);
45 return true; 46 return true;
46 } 47 }
47 48
48 rcu_read_unlock(); 49 srcu_read_unlock(&kvm->irq_srcu, idx);
49 50
50 return false; 51 return false;
51} 52}
@@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
54void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) 55void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
55{ 56{
56 struct kvm_irq_ack_notifier *kian; 57 struct kvm_irq_ack_notifier *kian;
57 int gsi; 58 int gsi, idx;
58 59
59 trace_kvm_ack_irq(irqchip, pin); 60 trace_kvm_ack_irq(irqchip, pin);
60 61
61 rcu_read_lock(); 62 idx = srcu_read_lock(&kvm->irq_srcu);
62 gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; 63 gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
63 if (gsi != -1) 64 if (gsi != -1)
64 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 65 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
65 link) 66 link)
66 if (kian->gsi == gsi) 67 if (kian->gsi == gsi)
67 kian->irq_acked(kian); 68 kian->irq_acked(kian);
68 rcu_read_unlock(); 69 srcu_read_unlock(&kvm->irq_srcu, idx);
69} 70}
70 71
71void kvm_register_irq_ack_notifier(struct kvm *kvm, 72void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
85 mutex_lock(&kvm->irq_lock); 86 mutex_lock(&kvm->irq_lock);
86 hlist_del_init_rcu(&kian->link); 87 hlist_del_init_rcu(&kian->link);
87 mutex_unlock(&kvm->irq_lock); 88 mutex_unlock(&kvm->irq_lock);
88 synchronize_rcu(); 89 synchronize_srcu(&kvm->irq_srcu);
89#ifdef __KVM_HAVE_IOAPIC 90#ifdef __KVM_HAVE_IOAPIC
90 kvm_vcpu_request_scan_ioapic(kvm); 91 kvm_vcpu_request_scan_ioapic(kvm);
91#endif 92#endif
@@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
115 bool line_status) 116 bool line_status)
116{ 117{
117 struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; 118 struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
118 int ret = -1, i = 0; 119 int ret = -1, i = 0, idx;
119 struct kvm_irq_routing_table *irq_rt; 120 struct kvm_irq_routing_table *irq_rt;
120 121
121 trace_kvm_set_irq(irq, level, irq_source_id); 122 trace_kvm_set_irq(irq, level, irq_source_id);
@@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
124 * IOAPIC. So set the bit in both. The guest will ignore 125 * IOAPIC. So set the bit in both. The guest will ignore
125 * writes to the unused one. 126 * writes to the unused one.
126 */ 127 */
127 rcu_read_lock(); 128 idx = srcu_read_lock(&kvm->irq_srcu);
128 irq_rt = rcu_dereference(kvm->irq_routing); 129 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
129 if (irq < irq_rt->nr_rt_entries) 130 if (irq < irq_rt->nr_rt_entries)
130 hlist_for_each_entry(e, &irq_rt->map[irq], link) 131 hlist_for_each_entry(e, &irq_rt->map[irq], link)
131 irq_set[i++] = *e; 132 irq_set[i++] = *e;
132 rcu_read_unlock(); 133 srcu_read_unlock(&kvm->irq_srcu, idx);
133 134
134 while(i--) { 135 while(i--) {
135 int r; 136 int r;
@@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
226 kvm_irq_routing_update(kvm, new); 227 kvm_irq_routing_update(kvm, new);
227 mutex_unlock(&kvm->irq_lock); 228 mutex_unlock(&kvm->irq_lock);
228 229
229 synchronize_rcu(); 230 synchronize_srcu_expedited(&kvm->irq_srcu);
230 231
231 new = old; 232 new = old;
232 r = 0; 233 r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fa70c6e642b4..95b4c2b3906a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -457,11 +457,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
457 457
458 r = kvm_arch_init_vm(kvm, type); 458 r = kvm_arch_init_vm(kvm, type);
459 if (r) 459 if (r)
460 goto out_err_nodisable; 460 goto out_err_no_disable;
461 461
462 r = hardware_enable_all(); 462 r = hardware_enable_all();
463 if (r) 463 if (r)
464 goto out_err_nodisable; 464 goto out_err_no_disable;
465 465
466#ifdef CONFIG_HAVE_KVM_IRQCHIP 466#ifdef CONFIG_HAVE_KVM_IRQCHIP
467 INIT_HLIST_HEAD(&kvm->mask_notifier_list); 467 INIT_HLIST_HEAD(&kvm->mask_notifier_list);
@@ -473,10 +473,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
473 r = -ENOMEM; 473 r = -ENOMEM;
474 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 474 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
475 if (!kvm->memslots) 475 if (!kvm->memslots)
476 goto out_err_nosrcu; 476 goto out_err_no_srcu;
477 kvm_init_memslots_id(kvm); 477 kvm_init_memslots_id(kvm);
478 if (init_srcu_struct(&kvm->srcu)) 478 if (init_srcu_struct(&kvm->srcu))
479 goto out_err_nosrcu; 479 goto out_err_no_srcu;
480 if (init_srcu_struct(&kvm->irq_srcu))
481 goto out_err_no_irq_srcu;
480 for (i = 0; i < KVM_NR_BUSES; i++) { 482 for (i = 0; i < KVM_NR_BUSES; i++) {
481 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), 483 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
482 GFP_KERNEL); 484 GFP_KERNEL);
@@ -505,10 +507,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
505 return kvm; 507 return kvm;
506 508
507out_err: 509out_err:
510 cleanup_srcu_struct(&kvm->irq_srcu);
511out_err_no_irq_srcu:
508 cleanup_srcu_struct(&kvm->srcu); 512 cleanup_srcu_struct(&kvm->srcu);
509out_err_nosrcu: 513out_err_no_srcu:
510 hardware_disable_all(); 514 hardware_disable_all();
511out_err_nodisable: 515out_err_no_disable:
512 for (i = 0; i < KVM_NR_BUSES; i++) 516 for (i = 0; i < KVM_NR_BUSES; i++)
513 kfree(kvm->buses[i]); 517 kfree(kvm->buses[i]);
514 kfree(kvm->memslots); 518 kfree(kvm->memslots);