aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Williamson <alex.williamson@redhat.com>2012-09-21 12:48:28 -0400
committerAlex Williamson <alex.williamson@redhat.com>2012-09-21 12:48:28 -0400
commitb68e7fa879cd3b1126a7c455d9da1b70299efc0d (patch)
tree96126d0337b76ab5148fcd844e971e4afe9a02b1
parentb37b593e20a073dce5c7cbaf230f166db6425450 (diff)
vfio: Fix virqfd release race
vfoi-pci supports a mechanism like KVM's irqfd for unmasking an interrupt through an eventfd. There are two ways to shutdown this interface: 1) close the eventfd, 2) ioctl (such as disabling the interrupt). Both of these do the release through a workqueue, which can result in a segfault if two jobs get queued for the same virqfd. Fix this by protecting the pointer to these virqfds by a spinlock. The vfio pci device will therefore no longer have a reference to it once the release job is queued under lock. On the ioctl side, we still flush the workqueue to ensure that any outstanding releases are completed. Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c76
1 files changed, 56 insertions, 20 deletions
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 211a4920b88a..d8dedc7d3910 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -76,9 +76,24 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
76 schedule_work(&virqfd->inject); 76 schedule_work(&virqfd->inject);
77 } 77 }
78 78
79 if (flags & POLLHUP) 79 if (flags & POLLHUP) {
80 /* The eventfd is closing, detach from VFIO */ 80 unsigned long flags;
81 virqfd_deactivate(virqfd); 81 spin_lock_irqsave(&virqfd->vdev->irqlock, flags);
82
83 /*
84 * The eventfd is closing, if the virqfd has not yet been
85 * queued for release, as determined by testing whether the
86 * vdev pointer to it is still valid, queue it now. As
87 * with kvm irqfds, we know we won't race against the virqfd
88 * going away because we hold wqh->lock to get here.
89 */
90 if (*(virqfd->pvirqfd) == virqfd) {
91 *(virqfd->pvirqfd) = NULL;
92 virqfd_deactivate(virqfd);
93 }
94
95 spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags);
96 }
82 97
83 return 0; 98 return 0;
84} 99}
@@ -93,7 +108,6 @@ static void virqfd_ptable_queue_proc(struct file *file,
93static void virqfd_shutdown(struct work_struct *work) 108static void virqfd_shutdown(struct work_struct *work)
94{ 109{
95 struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); 110 struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
96 struct virqfd **pvirqfd = virqfd->pvirqfd;
97 u64 cnt; 111 u64 cnt;
98 112
99 eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); 113 eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
@@ -101,7 +115,6 @@ static void virqfd_shutdown(struct work_struct *work)
101 eventfd_ctx_put(virqfd->eventfd); 115 eventfd_ctx_put(virqfd->eventfd);
102 116
103 kfree(virqfd); 117 kfree(virqfd);
104 *pvirqfd = NULL;
105} 118}
106 119
107static void virqfd_inject(struct work_struct *work) 120static void virqfd_inject(struct work_struct *work)
@@ -122,15 +135,11 @@ static int virqfd_enable(struct vfio_pci_device *vdev,
122 int ret = 0; 135 int ret = 0;
123 unsigned int events; 136 unsigned int events;
124 137
125 if (*pvirqfd)
126 return -EBUSY;
127
128 virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); 138 virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL);
129 if (!virqfd) 139 if (!virqfd)
130 return -ENOMEM; 140 return -ENOMEM;
131 141
132 virqfd->pvirqfd = pvirqfd; 142 virqfd->pvirqfd = pvirqfd;
133 *pvirqfd = virqfd;
134 virqfd->vdev = vdev; 143 virqfd->vdev = vdev;
135 virqfd->handler = handler; 144 virqfd->handler = handler;
136 virqfd->thread = thread; 145 virqfd->thread = thread;
@@ -154,6 +163,23 @@ static int virqfd_enable(struct vfio_pci_device *vdev,
154 virqfd->eventfd = ctx; 163 virqfd->eventfd = ctx;
155 164
156 /* 165 /*
166 * virqfds can be released by closing the eventfd or directly
167 * through ioctl. These are both done through a workqueue, so
168 * we update the pointer to the virqfd under lock to avoid
169 * pushing multiple jobs to release the same virqfd.
170 */
171 spin_lock_irq(&vdev->irqlock);
172
173 if (*pvirqfd) {
174 spin_unlock_irq(&vdev->irqlock);
175 ret = -EBUSY;
176 goto fail;
177 }
178 *pvirqfd = virqfd;
179
180 spin_unlock_irq(&vdev->irqlock);
181
182 /*
157 * Install our own custom wake-up handling so we are notified via 183 * Install our own custom wake-up handling so we are notified via
158 * a callback whenever someone signals the underlying eventfd. 184 * a callback whenever someone signals the underlying eventfd.
159 */ 185 */
@@ -187,19 +213,29 @@ fail:
187 fput(file); 213 fput(file);
188 214
189 kfree(virqfd); 215 kfree(virqfd);
190 *pvirqfd = NULL;
191 216
192 return ret; 217 return ret;
193} 218}
194 219
195static void virqfd_disable(struct virqfd *virqfd) 220static void virqfd_disable(struct vfio_pci_device *vdev,
221 struct virqfd **pvirqfd)
196{ 222{
197 if (!virqfd) 223 unsigned long flags;
198 return; 224
225 spin_lock_irqsave(&vdev->irqlock, flags);
226
227 if (*pvirqfd) {
228 virqfd_deactivate(*pvirqfd);
229 *pvirqfd = NULL;
230 }
199 231
200 virqfd_deactivate(virqfd); 232 spin_unlock_irqrestore(&vdev->irqlock, flags);
201 233
202 /* Block until we know all outstanding shutdown jobs have completed. */ 234 /*
235 * Block until we know all outstanding shutdown jobs have completed.
236 * Even if we don't queue the job, flush the wq to be sure it's
237 * been released.
238 */
203 flush_workqueue(vfio_irqfd_cleanup_wq); 239 flush_workqueue(vfio_irqfd_cleanup_wq);
204} 240}
205 241
@@ -392,8 +428,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
392static void vfio_intx_disable(struct vfio_pci_device *vdev) 428static void vfio_intx_disable(struct vfio_pci_device *vdev)
393{ 429{
394 vfio_intx_set_signal(vdev, -1); 430 vfio_intx_set_signal(vdev, -1);
395 virqfd_disable(vdev->ctx[0].unmask); 431 virqfd_disable(vdev, &vdev->ctx[0].unmask);
396 virqfd_disable(vdev->ctx[0].mask); 432 virqfd_disable(vdev, &vdev->ctx[0].mask);
397 vdev->irq_type = VFIO_PCI_NUM_IRQS; 433 vdev->irq_type = VFIO_PCI_NUM_IRQS;
398 vdev->num_ctx = 0; 434 vdev->num_ctx = 0;
399 kfree(vdev->ctx); 435 kfree(vdev->ctx);
@@ -539,8 +575,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
539 vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); 575 vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
540 576
541 for (i = 0; i < vdev->num_ctx; i++) { 577 for (i = 0; i < vdev->num_ctx; i++) {
542 virqfd_disable(vdev->ctx[i].unmask); 578 virqfd_disable(vdev, &vdev->ctx[i].unmask);
543 virqfd_disable(vdev->ctx[i].mask); 579 virqfd_disable(vdev, &vdev->ctx[i].mask);
544 } 580 }
545 581
546 if (msix) { 582 if (msix) {
@@ -577,7 +613,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
577 vfio_send_intx_eventfd, NULL, 613 vfio_send_intx_eventfd, NULL,
578 &vdev->ctx[0].unmask, fd); 614 &vdev->ctx[0].unmask, fd);
579 615
580 virqfd_disable(vdev->ctx[0].unmask); 616 virqfd_disable(vdev, &vdev->ctx[0].unmask);
581 } 617 }
582 618
583 return 0; 619 return 0;