aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm/eventfd.c
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@redhat.com>2010-11-18 12:09:08 -0500
committerAvi Kivity <avi@redhat.com>2011-01-12 04:29:38 -0500
commitbd2b53b20fcd0d6c4c815b54e6d464e34429d3a4 (patch)
tree1f225ea8b74368056bc144de14a1015fa4ebde29 /virt/kvm/eventfd.c
parent104f226bfd0a607ca0e804ae4907555374f72cd9 (diff)
KVM: fast-path msi injection with irqfd
Store irq routing table pointer in the irqfd object, and use that to inject MSI directly without bouncing out to a kernel thread. While we touch this structure, rearrange irqfd fields to make fastpath better packed for better cache utilization. This also adds some comments about locking rules and rcu usage in code. Some notes on the design: - Use pointer into the rt instead of copying an entry, to make it possible to use rcu, thus side-stepping locking complexities. We also save some memory this way. - Old workqueue code is still used for level irqs. I don't think we DTRT with level anyway, however, it seems easier to keep the code around as it has been thought through and debugged, and fix level later than rip out and re-instate it later. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Marcelo Tosatti <mtosatti@redhat.com> Acked-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'virt/kvm/eventfd.c')
-rw-r--r--virt/kvm/eventfd.c91
1 files changed, 79 insertions, 12 deletions
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c1f1e3c62984..2ca4535f4fb7 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -44,14 +44,19 @@
44 */ 44 */
45 45
46struct _irqfd { 46struct _irqfd {
47 struct kvm *kvm; 47 /* Used for MSI fast-path */
48 struct eventfd_ctx *eventfd; 48 struct kvm *kvm;
49 int gsi; 49 wait_queue_t wait;
50 struct list_head list; 50 /* Update side is protected by irqfds.lock */
51 poll_table pt; 51 struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
52 wait_queue_t wait; 52 /* Used for level IRQ fast-path */
53 struct work_struct inject; 53 int gsi;
54 struct work_struct shutdown; 54 struct work_struct inject;
55 /* Used for setup/shutdown */
56 struct eventfd_ctx *eventfd;
57 struct list_head list;
58 poll_table pt;
59 struct work_struct shutdown;
55}; 60};
56 61
57static struct workqueue_struct *irqfd_cleanup_wq; 62static struct workqueue_struct *irqfd_cleanup_wq;
@@ -125,14 +130,22 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
125{ 130{
126 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); 131 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
127 unsigned long flags = (unsigned long)key; 132 unsigned long flags = (unsigned long)key;
133 struct kvm_kernel_irq_routing_entry *irq;
134 struct kvm *kvm = irqfd->kvm;
128 135
129 if (flags & POLLIN) 136 if (flags & POLLIN) {
137 rcu_read_lock();
138 irq = rcu_dereference(irqfd->irq_entry);
130 /* An event has been signaled, inject an interrupt */ 139 /* An event has been signaled, inject an interrupt */
131 schedule_work(&irqfd->inject); 140 if (irq)
141 kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
142 else
143 schedule_work(&irqfd->inject);
144 rcu_read_unlock();
145 }
132 146
133 if (flags & POLLHUP) { 147 if (flags & POLLHUP) {
134 /* The eventfd is closing, detach from KVM */ 148 /* The eventfd is closing, detach from KVM */
135 struct kvm *kvm = irqfd->kvm;
136 unsigned long flags; 149 unsigned long flags;
137 150
138 spin_lock_irqsave(&kvm->irqfds.lock, flags); 151 spin_lock_irqsave(&kvm->irqfds.lock, flags);
@@ -163,9 +176,31 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
163 add_wait_queue(wqh, &irqfd->wait); 176 add_wait_queue(wqh, &irqfd->wait);
164} 177}
165 178
179/* Must be called under irqfds.lock */
180static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
181 struct kvm_irq_routing_table *irq_rt)
182{
183 struct kvm_kernel_irq_routing_entry *e;
184 struct hlist_node *n;
185
186 if (irqfd->gsi >= irq_rt->nr_rt_entries) {
187 rcu_assign_pointer(irqfd->irq_entry, NULL);
188 return;
189 }
190
191 hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
192 /* Only fast-path MSI. */
193 if (e->type == KVM_IRQ_ROUTING_MSI)
194 rcu_assign_pointer(irqfd->irq_entry, e);
195 else
196 rcu_assign_pointer(irqfd->irq_entry, NULL);
197 }
198}
199
166static int 200static int
167kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) 201kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
168{ 202{
203 struct kvm_irq_routing_table *irq_rt;
169 struct _irqfd *irqfd, *tmp; 204 struct _irqfd *irqfd, *tmp;
170 struct file *file = NULL; 205 struct file *file = NULL;
171 struct eventfd_ctx *eventfd = NULL; 206 struct eventfd_ctx *eventfd = NULL;
@@ -215,6 +250,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
215 goto fail; 250 goto fail;
216 } 251 }
217 252
253 irq_rt = rcu_dereference_protected(kvm->irq_routing,
254 lockdep_is_held(&kvm->irqfds.lock));
255 irqfd_update(kvm, irqfd, irq_rt);
256
218 events = file->f_op->poll(file, &irqfd->pt); 257 events = file->f_op->poll(file, &irqfd->pt);
219 258
220 list_add_tail(&irqfd->list, &kvm->irqfds.items); 259 list_add_tail(&irqfd->list, &kvm->irqfds.items);
@@ -271,8 +310,17 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
271 spin_lock_irq(&kvm->irqfds.lock); 310 spin_lock_irq(&kvm->irqfds.lock);
272 311
273 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { 312 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
274 if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) 313 if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
314 /*
315 * This rcu_assign_pointer is needed for when
316 * another thread calls kvm_irqfd_update before
317 * we flush workqueue below.
318 * It is paired with synchronize_rcu done by caller
319 * of that function.
320 */
321 rcu_assign_pointer(irqfd->irq_entry, NULL);
275 irqfd_deactivate(irqfd); 322 irqfd_deactivate(irqfd);
323 }
276 } 324 }
277 325
278 spin_unlock_irq(&kvm->irqfds.lock); 326 spin_unlock_irq(&kvm->irqfds.lock);
@@ -322,6 +370,25 @@ kvm_irqfd_release(struct kvm *kvm)
322} 370}
323 371
324/* 372/*
373 * Change irq_routing and irqfd.
374 * Caller must invoke synchronize_rcu afterwards.
375 */
376void kvm_irq_routing_update(struct kvm *kvm,
377 struct kvm_irq_routing_table *irq_rt)
378{
379 struct _irqfd *irqfd;
380
381 spin_lock_irq(&kvm->irqfds.lock);
382
383 rcu_assign_pointer(kvm->irq_routing, irq_rt);
384
385 list_for_each_entry(irqfd, &kvm->irqfds.items, list)
386 irqfd_update(kvm, irqfd, irq_rt);
387
388 spin_unlock_irq(&kvm->irqfds.lock);
389}
390
391/*
325 * create a host-wide workqueue for issuing deferred shutdown requests 392 * create a host-wide workqueue for issuing deferred shutdown requests
326 * aggregated from all vm* instances. We need our own isolated single-thread 393 * aggregated from all vm* instances. We need our own isolated single-thread
327 * queue to prevent deadlock against flushing the normal work-queue. 394 * queue to prevent deadlock against flushing the normal work-queue.