diff options
| author | Alex Williamson <alex.williamson@redhat.com> | 2012-09-21 12:48:28 -0400 |
|---|---|---|
| committer | Alex Williamson <alex.williamson@redhat.com> | 2012-09-21 12:48:28 -0400 |
| commit | b68e7fa879cd3b1126a7c455d9da1b70299efc0d (patch) | |
| tree | 96126d0337b76ab5148fcd844e971e4afe9a02b1 | |
| parent | b37b593e20a073dce5c7cbaf230f166db6425450 (diff) | |
vfio: Fix virqfd release race
vfoi-pci supports a mechanism like KVM's irqfd for unmasking an
interrupt through an eventfd. There are two ways to shutdown this
interface: 1) close the eventfd, 2) ioctl (such as disabling the
interrupt). Both of these do the release through a workqueue,
which can result in a segfault if two jobs get queued for the same
virqfd.
Fix this by protecting the pointer to these virqfds by a spinlock.
The vfio pci device will therefore no longer have a reference to it
once the release job is queued under lock. On the ioctl side, we
still flush the workqueue to ensure that any outstanding releases
are completed.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
| -rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 76 |
1 files changed, 56 insertions, 20 deletions
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 211a4920b88a..d8dedc7d3910 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c | |||
| @@ -76,9 +76,24 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | |||
| 76 | schedule_work(&virqfd->inject); | 76 | schedule_work(&virqfd->inject); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | if (flags & POLLHUP) | 79 | if (flags & POLLHUP) { |
| 80 | /* The eventfd is closing, detach from VFIO */ | 80 | unsigned long flags; |
| 81 | virqfd_deactivate(virqfd); | 81 | spin_lock_irqsave(&virqfd->vdev->irqlock, flags); |
| 82 | |||
| 83 | /* | ||
| 84 | * The eventfd is closing, if the virqfd has not yet been | ||
| 85 | * queued for release, as determined by testing whether the | ||
| 86 | * vdev pointer to it is still valid, queue it now. As | ||
| 87 | * with kvm irqfds, we know we won't race against the virqfd | ||
| 88 | * going away because we hold wqh->lock to get here. | ||
| 89 | */ | ||
| 90 | if (*(virqfd->pvirqfd) == virqfd) { | ||
| 91 | *(virqfd->pvirqfd) = NULL; | ||
| 92 | virqfd_deactivate(virqfd); | ||
| 93 | } | ||
| 94 | |||
| 95 | spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); | ||
| 96 | } | ||
| 82 | 97 | ||
| 83 | return 0; | 98 | return 0; |
| 84 | } | 99 | } |
| @@ -93,7 +108,6 @@ static void virqfd_ptable_queue_proc(struct file *file, | |||
| 93 | static void virqfd_shutdown(struct work_struct *work) | 108 | static void virqfd_shutdown(struct work_struct *work) |
| 94 | { | 109 | { |
| 95 | struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); | 110 | struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); |
| 96 | struct virqfd **pvirqfd = virqfd->pvirqfd; | ||
| 97 | u64 cnt; | 111 | u64 cnt; |
| 98 | 112 | ||
| 99 | eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); | 113 | eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); |
| @@ -101,7 +115,6 @@ static void virqfd_shutdown(struct work_struct *work) | |||
| 101 | eventfd_ctx_put(virqfd->eventfd); | 115 | eventfd_ctx_put(virqfd->eventfd); |
| 102 | 116 | ||
| 103 | kfree(virqfd); | 117 | kfree(virqfd); |
| 104 | *pvirqfd = NULL; | ||
| 105 | } | 118 | } |
| 106 | 119 | ||
| 107 | static void virqfd_inject(struct work_struct *work) | 120 | static void virqfd_inject(struct work_struct *work) |
| @@ -122,15 +135,11 @@ static int virqfd_enable(struct vfio_pci_device *vdev, | |||
| 122 | int ret = 0; | 135 | int ret = 0; |
| 123 | unsigned int events; | 136 | unsigned int events; |
| 124 | 137 | ||
| 125 | if (*pvirqfd) | ||
| 126 | return -EBUSY; | ||
| 127 | |||
| 128 | virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); | 138 | virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); |
| 129 | if (!virqfd) | 139 | if (!virqfd) |
| 130 | return -ENOMEM; | 140 | return -ENOMEM; |
| 131 | 141 | ||
| 132 | virqfd->pvirqfd = pvirqfd; | 142 | virqfd->pvirqfd = pvirqfd; |
| 133 | *pvirqfd = virqfd; | ||
| 134 | virqfd->vdev = vdev; | 143 | virqfd->vdev = vdev; |
| 135 | virqfd->handler = handler; | 144 | virqfd->handler = handler; |
| 136 | virqfd->thread = thread; | 145 | virqfd->thread = thread; |
| @@ -154,6 +163,23 @@ static int virqfd_enable(struct vfio_pci_device *vdev, | |||
| 154 | virqfd->eventfd = ctx; | 163 | virqfd->eventfd = ctx; |
| 155 | 164 | ||
| 156 | /* | 165 | /* |
| 166 | * virqfds can be released by closing the eventfd or directly | ||
| 167 | * through ioctl. These are both done through a workqueue, so | ||
| 168 | * we update the pointer to the virqfd under lock to avoid | ||
| 169 | * pushing multiple jobs to release the same virqfd. | ||
| 170 | */ | ||
| 171 | spin_lock_irq(&vdev->irqlock); | ||
| 172 | |||
| 173 | if (*pvirqfd) { | ||
| 174 | spin_unlock_irq(&vdev->irqlock); | ||
| 175 | ret = -EBUSY; | ||
| 176 | goto fail; | ||
| 177 | } | ||
| 178 | *pvirqfd = virqfd; | ||
| 179 | |||
| 180 | spin_unlock_irq(&vdev->irqlock); | ||
| 181 | |||
| 182 | /* | ||
| 157 | * Install our own custom wake-up handling so we are notified via | 183 | * Install our own custom wake-up handling so we are notified via |
| 158 | * a callback whenever someone signals the underlying eventfd. | 184 | * a callback whenever someone signals the underlying eventfd. |
| 159 | */ | 185 | */ |
| @@ -187,19 +213,29 @@ fail: | |||
| 187 | fput(file); | 213 | fput(file); |
| 188 | 214 | ||
| 189 | kfree(virqfd); | 215 | kfree(virqfd); |
| 190 | *pvirqfd = NULL; | ||
| 191 | 216 | ||
| 192 | return ret; | 217 | return ret; |
| 193 | } | 218 | } |
| 194 | 219 | ||
| 195 | static void virqfd_disable(struct virqfd *virqfd) | 220 | static void virqfd_disable(struct vfio_pci_device *vdev, |
| 221 | struct virqfd **pvirqfd) | ||
| 196 | { | 222 | { |
| 197 | if (!virqfd) | 223 | unsigned long flags; |
| 198 | return; | 224 | |
| 225 | spin_lock_irqsave(&vdev->irqlock, flags); | ||
| 226 | |||
| 227 | if (*pvirqfd) { | ||
| 228 | virqfd_deactivate(*pvirqfd); | ||
| 229 | *pvirqfd = NULL; | ||
| 230 | } | ||
| 199 | 231 | ||
| 200 | virqfd_deactivate(virqfd); | 232 | spin_unlock_irqrestore(&vdev->irqlock, flags); |
| 201 | 233 | ||
| 202 | /* Block until we know all outstanding shutdown jobs have completed. */ | 234 | /* |
| 235 | * Block until we know all outstanding shutdown jobs have completed. | ||
| 236 | * Even if we don't queue the job, flush the wq to be sure it's | ||
| 237 | * been released. | ||
| 238 | */ | ||
| 203 | flush_workqueue(vfio_irqfd_cleanup_wq); | 239 | flush_workqueue(vfio_irqfd_cleanup_wq); |
| 204 | } | 240 | } |
| 205 | 241 | ||
| @@ -392,8 +428,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) | |||
| 392 | static void vfio_intx_disable(struct vfio_pci_device *vdev) | 428 | static void vfio_intx_disable(struct vfio_pci_device *vdev) |
| 393 | { | 429 | { |
| 394 | vfio_intx_set_signal(vdev, -1); | 430 | vfio_intx_set_signal(vdev, -1); |
| 395 | virqfd_disable(vdev->ctx[0].unmask); | 431 | virqfd_disable(vdev, &vdev->ctx[0].unmask); |
| 396 | virqfd_disable(vdev->ctx[0].mask); | 432 | virqfd_disable(vdev, &vdev->ctx[0].mask); |
| 397 | vdev->irq_type = VFIO_PCI_NUM_IRQS; | 433 | vdev->irq_type = VFIO_PCI_NUM_IRQS; |
| 398 | vdev->num_ctx = 0; | 434 | vdev->num_ctx = 0; |
| 399 | kfree(vdev->ctx); | 435 | kfree(vdev->ctx); |
| @@ -539,8 +575,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) | |||
| 539 | vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); | 575 | vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); |
| 540 | 576 | ||
| 541 | for (i = 0; i < vdev->num_ctx; i++) { | 577 | for (i = 0; i < vdev->num_ctx; i++) { |
| 542 | virqfd_disable(vdev->ctx[i].unmask); | 578 | virqfd_disable(vdev, &vdev->ctx[i].unmask); |
| 543 | virqfd_disable(vdev->ctx[i].mask); | 579 | virqfd_disable(vdev, &vdev->ctx[i].mask); |
| 544 | } | 580 | } |
| 545 | 581 | ||
| 546 | if (msix) { | 582 | if (msix) { |
| @@ -577,7 +613,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, | |||
| 577 | vfio_send_intx_eventfd, NULL, | 613 | vfio_send_intx_eventfd, NULL, |
| 578 | &vdev->ctx[0].unmask, fd); | 614 | &vdev->ctx[0].unmask, fd); |
| 579 | 615 | ||
| 580 | virqfd_disable(vdev->ctx[0].unmask); | 616 | virqfd_disable(vdev, &vdev->ctx[0].unmask); |
| 581 | } | 617 | } |
| 582 | 618 | ||
| 583 | return 0; | 619 | return 0; |
