aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@redhat.com>2010-11-18 12:09:08 -0500
committerAvi Kivity <avi@redhat.com>2011-01-12 04:29:38 -0500
commitbd2b53b20fcd0d6c4c815b54e6d464e34429d3a4 (patch)
tree1f225ea8b74368056bc144de14a1015fa4ebde29
parent104f226bfd0a607ca0e804ae4907555374f72cd9 (diff)
KVM: fast-path msi injection with irqfd
Store irq routing table pointer in the irqfd object, and use that to inject MSI directly without bouncing out to a kernel thread. While we touch this structure, rearrange irqfd fields to make fastpath better packed for better cache utilization. This also adds some comments about locking rules and rcu usage in code. Some notes on the design: - Use pointer into the rt instead of copying an entry, to make it possible to use rcu, thus side-stepping locking complexities. We also save some memory this way. - Old workqueue code is still used for level irqs. I don't think we DTRT with level anyway, however, it seems easier to keep the code around as it has been thought through and debugged, and fix level later than rip out and re-instate it later. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Marcelo Tosatti <mtosatti@redhat.com> Acked-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--include/linux/kvm_host.h16
-rw-r--r--virt/kvm/eventfd.c91
-rw-r--r--virt/kvm/irq_comm.c7
3 files changed, 99 insertions, 15 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4bd663d6443d..f17beae3cca0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -17,6 +17,7 @@
17#include <linux/preempt.h> 17#include <linux/preempt.h>
18#include <linux/msi.h> 18#include <linux/msi.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/rcupdate.h>
20#include <asm/signal.h> 21#include <asm/signal.h>
21 22
22#include <linux/kvm.h> 23#include <linux/kvm.h>
@@ -240,6 +241,10 @@ struct kvm {
240 241
241 struct mutex irq_lock; 242 struct mutex irq_lock;
242#ifdef CONFIG_HAVE_KVM_IRQCHIP 243#ifdef CONFIG_HAVE_KVM_IRQCHIP
244 /*
245 * Update side is protected by irq_lock and,
246 * if configured, irqfds.lock.
247 */
243 struct kvm_irq_routing_table __rcu *irq_routing; 248 struct kvm_irq_routing_table __rcu *irq_routing;
244 struct hlist_head mask_notifier_list; 249 struct hlist_head mask_notifier_list;
245 struct hlist_head irq_ack_notifier_list; 250 struct hlist_head irq_ack_notifier_list;
@@ -511,6 +516,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
511 unsigned long *deliver_bitmask); 516 unsigned long *deliver_bitmask);
512#endif 517#endif
513int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); 518int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
519int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
520 int irq_source_id, int level);
514void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); 521void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
515void kvm_register_irq_ack_notifier(struct kvm *kvm, 522void kvm_register_irq_ack_notifier(struct kvm *kvm,
516 struct kvm_irq_ack_notifier *kian); 523 struct kvm_irq_ack_notifier *kian);
@@ -652,17 +659,26 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
652void kvm_eventfd_init(struct kvm *kvm); 659void kvm_eventfd_init(struct kvm *kvm);
653int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); 660int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
654void kvm_irqfd_release(struct kvm *kvm); 661void kvm_irqfd_release(struct kvm *kvm);
662void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
655int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); 663int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
656 664
657#else 665#else
658 666
659static inline void kvm_eventfd_init(struct kvm *kvm) {} 667static inline void kvm_eventfd_init(struct kvm *kvm) {}
668
660static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) 669static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
661{ 670{
662 return -EINVAL; 671 return -EINVAL;
663} 672}
664 673
665static inline void kvm_irqfd_release(struct kvm *kvm) {} 674static inline void kvm_irqfd_release(struct kvm *kvm) {}
675
676static inline void kvm_irq_routing_update(struct kvm *kvm,
677 struct kvm_irq_routing_table *irq_rt)
678{
679 rcu_assign_pointer(kvm->irq_routing, irq_rt);
680}
681
666static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 682static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
667{ 683{
668 return -ENOSYS; 684 return -ENOSYS;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c1f1e3c62984..2ca4535f4fb7 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -44,14 +44,19 @@
44 */ 44 */
45 45
46struct _irqfd { 46struct _irqfd {
47 struct kvm *kvm; 47 /* Used for MSI fast-path */
48 struct eventfd_ctx *eventfd; 48 struct kvm *kvm;
49 int gsi; 49 wait_queue_t wait;
50 struct list_head list; 50 /* Update side is protected by irqfds.lock */
51 poll_table pt; 51 struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
52 wait_queue_t wait; 52 /* Used for level IRQ fast-path */
53 struct work_struct inject; 53 int gsi;
54 struct work_struct shutdown; 54 struct work_struct inject;
55 /* Used for setup/shutdown */
56 struct eventfd_ctx *eventfd;
57 struct list_head list;
58 poll_table pt;
59 struct work_struct shutdown;
55}; 60};
56 61
57static struct workqueue_struct *irqfd_cleanup_wq; 62static struct workqueue_struct *irqfd_cleanup_wq;
@@ -125,14 +130,22 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
125{ 130{
126 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); 131 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
127 unsigned long flags = (unsigned long)key; 132 unsigned long flags = (unsigned long)key;
133 struct kvm_kernel_irq_routing_entry *irq;
134 struct kvm *kvm = irqfd->kvm;
128 135
129 if (flags & POLLIN) 136 if (flags & POLLIN) {
137 rcu_read_lock();
138 irq = rcu_dereference(irqfd->irq_entry);
130 /* An event has been signaled, inject an interrupt */ 139 /* An event has been signaled, inject an interrupt */
131 schedule_work(&irqfd->inject); 140 if (irq)
141 kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
142 else
143 schedule_work(&irqfd->inject);
144 rcu_read_unlock();
145 }
132 146
133 if (flags & POLLHUP) { 147 if (flags & POLLHUP) {
134 /* The eventfd is closing, detach from KVM */ 148 /* The eventfd is closing, detach from KVM */
135 struct kvm *kvm = irqfd->kvm;
136 unsigned long flags; 149 unsigned long flags;
137 150
138 spin_lock_irqsave(&kvm->irqfds.lock, flags); 151 spin_lock_irqsave(&kvm->irqfds.lock, flags);
@@ -163,9 +176,31 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
163 add_wait_queue(wqh, &irqfd->wait); 176 add_wait_queue(wqh, &irqfd->wait);
164} 177}
165 178
179/* Must be called under irqfds.lock */
180static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
181 struct kvm_irq_routing_table *irq_rt)
182{
183 struct kvm_kernel_irq_routing_entry *e;
184 struct hlist_node *n;
185
186 if (irqfd->gsi >= irq_rt->nr_rt_entries) {
187 rcu_assign_pointer(irqfd->irq_entry, NULL);
188 return;
189 }
190
191 hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
192 /* Only fast-path MSI. */
193 if (e->type == KVM_IRQ_ROUTING_MSI)
194 rcu_assign_pointer(irqfd->irq_entry, e);
195 else
196 rcu_assign_pointer(irqfd->irq_entry, NULL);
197 }
198}
199
166static int 200static int
167kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) 201kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
168{ 202{
203 struct kvm_irq_routing_table *irq_rt;
169 struct _irqfd *irqfd, *tmp; 204 struct _irqfd *irqfd, *tmp;
170 struct file *file = NULL; 205 struct file *file = NULL;
171 struct eventfd_ctx *eventfd = NULL; 206 struct eventfd_ctx *eventfd = NULL;
@@ -215,6 +250,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
215 goto fail; 250 goto fail;
216 } 251 }
217 252
253 irq_rt = rcu_dereference_protected(kvm->irq_routing,
254 lockdep_is_held(&kvm->irqfds.lock));
255 irqfd_update(kvm, irqfd, irq_rt);
256
218 events = file->f_op->poll(file, &irqfd->pt); 257 events = file->f_op->poll(file, &irqfd->pt);
219 258
220 list_add_tail(&irqfd->list, &kvm->irqfds.items); 259 list_add_tail(&irqfd->list, &kvm->irqfds.items);
@@ -271,8 +310,17 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
271 spin_lock_irq(&kvm->irqfds.lock); 310 spin_lock_irq(&kvm->irqfds.lock);
272 311
273 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { 312 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
274 if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) 313 if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
314 /*
315 * This rcu_assign_pointer is needed for when
316 * another thread calls kvm_irqfd_update before
317 * we flush workqueue below.
318 * It is paired with synchronize_rcu done by caller
319 * of that function.
320 */
321 rcu_assign_pointer(irqfd->irq_entry, NULL);
275 irqfd_deactivate(irqfd); 322 irqfd_deactivate(irqfd);
323 }
276 } 324 }
277 325
278 spin_unlock_irq(&kvm->irqfds.lock); 326 spin_unlock_irq(&kvm->irqfds.lock);
@@ -322,6 +370,25 @@ kvm_irqfd_release(struct kvm *kvm)
322} 370}
323 371
324/* 372/*
373 * Change irq_routing and irqfd.
374 * Caller must invoke synchronize_rcu afterwards.
375 */
376void kvm_irq_routing_update(struct kvm *kvm,
377 struct kvm_irq_routing_table *irq_rt)
378{
379 struct _irqfd *irqfd;
380
381 spin_lock_irq(&kvm->irqfds.lock);
382
383 rcu_assign_pointer(kvm->irq_routing, irq_rt);
384
385 list_for_each_entry(irqfd, &kvm->irqfds.items, list)
386 irqfd_update(kvm, irqfd, irq_rt);
387
388 spin_unlock_irq(&kvm->irqfds.lock);
389}
390
391/*
325 * create a host-wide workqueue for issuing deferred shutdown requests 392 * create a host-wide workqueue for issuing deferred shutdown requests
326 * aggregated from all vm* instances. We need our own isolated single-thread 393 * aggregated from all vm* instances. We need our own isolated single-thread
327 * queue to prevent deadlock against flushing the normal work-queue. 394 * queue to prevent deadlock against flushing the normal work-queue.
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 8edca9141b78..9f614b4e365f 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
114 return r; 114 return r;
115} 115}
116 116
117static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, 117int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
118 struct kvm *kvm, int irq_source_id, int level) 118 struct kvm *kvm, int irq_source_id, int level)
119{ 119{
120 struct kvm_lapic_irq irq; 120 struct kvm_lapic_irq irq;
121 121
@@ -409,8 +409,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
409 409
410 mutex_lock(&kvm->irq_lock); 410 mutex_lock(&kvm->irq_lock);
411 old = kvm->irq_routing; 411 old = kvm->irq_routing;
412 rcu_assign_pointer(kvm->irq_routing, new); 412 kvm_irq_routing_update(kvm, new);
413 mutex_unlock(&kvm->irq_lock); 413 mutex_unlock(&kvm->irq_lock);
414
414 synchronize_rcu(); 415 synchronize_rcu();
415 416
416 new = old; 417 new = old;