diff options
author | Alex Williamson <alex.williamson@redhat.com> | 2012-09-21 12:48:28 -0400 |
---|---|---|
committer | Alex Williamson <alex.williamson@redhat.com> | 2012-09-21 12:48:28 -0400 |
commit | b68e7fa879cd3b1126a7c455d9da1b70299efc0d (patch) | |
tree | 96126d0337b76ab5148fcd844e971e4afe9a02b1 | |
parent | b37b593e20a073dce5c7cbaf230f166db6425450 (diff) |
vfio: Fix virqfd release race
vfoi-pci supports a mechanism like KVM's irqfd for unmasking an
interrupt through an eventfd. There are two ways to shutdown this
interface: 1) close the eventfd, 2) ioctl (such as disabling the
interrupt). Both of these do the release through a workqueue,
which can result in a segfault if two jobs get queued for the same
virqfd.
Fix this by protecting the pointer to these virqfds by a spinlock.
The vfio pci device will therefore no longer have a reference to it
once the release job is queued under lock. On the ioctl side, we
still flush the workqueue to ensure that any outstanding releases
are completed.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 76 |
1 files changed, 56 insertions, 20 deletions
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 211a4920b88a..d8dedc7d3910 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c | |||
@@ -76,9 +76,24 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | |||
76 | schedule_work(&virqfd->inject); | 76 | schedule_work(&virqfd->inject); |
77 | } | 77 | } |
78 | 78 | ||
79 | if (flags & POLLHUP) | 79 | if (flags & POLLHUP) { |
80 | /* The eventfd is closing, detach from VFIO */ | 80 | unsigned long flags; |
81 | virqfd_deactivate(virqfd); | 81 | spin_lock_irqsave(&virqfd->vdev->irqlock, flags); |
82 | |||
83 | /* | ||
84 | * The eventfd is closing, if the virqfd has not yet been | ||
85 | * queued for release, as determined by testing whether the | ||
86 | * vdev pointer to it is still valid, queue it now. As | ||
87 | * with kvm irqfds, we know we won't race against the virqfd | ||
88 | * going away because we hold wqh->lock to get here. | ||
89 | */ | ||
90 | if (*(virqfd->pvirqfd) == virqfd) { | ||
91 | *(virqfd->pvirqfd) = NULL; | ||
92 | virqfd_deactivate(virqfd); | ||
93 | } | ||
94 | |||
95 | spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); | ||
96 | } | ||
82 | 97 | ||
83 | return 0; | 98 | return 0; |
84 | } | 99 | } |
@@ -93,7 +108,6 @@ static void virqfd_ptable_queue_proc(struct file *file, | |||
93 | static void virqfd_shutdown(struct work_struct *work) | 108 | static void virqfd_shutdown(struct work_struct *work) |
94 | { | 109 | { |
95 | struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); | 110 | struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); |
96 | struct virqfd **pvirqfd = virqfd->pvirqfd; | ||
97 | u64 cnt; | 111 | u64 cnt; |
98 | 112 | ||
99 | eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); | 113 | eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); |
@@ -101,7 +115,6 @@ static void virqfd_shutdown(struct work_struct *work) | |||
101 | eventfd_ctx_put(virqfd->eventfd); | 115 | eventfd_ctx_put(virqfd->eventfd); |
102 | 116 | ||
103 | kfree(virqfd); | 117 | kfree(virqfd); |
104 | *pvirqfd = NULL; | ||
105 | } | 118 | } |
106 | 119 | ||
107 | static void virqfd_inject(struct work_struct *work) | 120 | static void virqfd_inject(struct work_struct *work) |
@@ -122,15 +135,11 @@ static int virqfd_enable(struct vfio_pci_device *vdev, | |||
122 | int ret = 0; | 135 | int ret = 0; |
123 | unsigned int events; | 136 | unsigned int events; |
124 | 137 | ||
125 | if (*pvirqfd) | ||
126 | return -EBUSY; | ||
127 | |||
128 | virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); | 138 | virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); |
129 | if (!virqfd) | 139 | if (!virqfd) |
130 | return -ENOMEM; | 140 | return -ENOMEM; |
131 | 141 | ||
132 | virqfd->pvirqfd = pvirqfd; | 142 | virqfd->pvirqfd = pvirqfd; |
133 | *pvirqfd = virqfd; | ||
134 | virqfd->vdev = vdev; | 143 | virqfd->vdev = vdev; |
135 | virqfd->handler = handler; | 144 | virqfd->handler = handler; |
136 | virqfd->thread = thread; | 145 | virqfd->thread = thread; |
@@ -154,6 +163,23 @@ static int virqfd_enable(struct vfio_pci_device *vdev, | |||
154 | virqfd->eventfd = ctx; | 163 | virqfd->eventfd = ctx; |
155 | 164 | ||
156 | /* | 165 | /* |
166 | * virqfds can be released by closing the eventfd or directly | ||
167 | * through ioctl. These are both done through a workqueue, so | ||
168 | * we update the pointer to the virqfd under lock to avoid | ||
169 | * pushing multiple jobs to release the same virqfd. | ||
170 | */ | ||
171 | spin_lock_irq(&vdev->irqlock); | ||
172 | |||
173 | if (*pvirqfd) { | ||
174 | spin_unlock_irq(&vdev->irqlock); | ||
175 | ret = -EBUSY; | ||
176 | goto fail; | ||
177 | } | ||
178 | *pvirqfd = virqfd; | ||
179 | |||
180 | spin_unlock_irq(&vdev->irqlock); | ||
181 | |||
182 | /* | ||
157 | * Install our own custom wake-up handling so we are notified via | 183 | * Install our own custom wake-up handling so we are notified via |
158 | * a callback whenever someone signals the underlying eventfd. | 184 | * a callback whenever someone signals the underlying eventfd. |
159 | */ | 185 | */ |
@@ -187,19 +213,29 @@ fail: | |||
187 | fput(file); | 213 | fput(file); |
188 | 214 | ||
189 | kfree(virqfd); | 215 | kfree(virqfd); |
190 | *pvirqfd = NULL; | ||
191 | 216 | ||
192 | return ret; | 217 | return ret; |
193 | } | 218 | } |
194 | 219 | ||
195 | static void virqfd_disable(struct virqfd *virqfd) | 220 | static void virqfd_disable(struct vfio_pci_device *vdev, |
221 | struct virqfd **pvirqfd) | ||
196 | { | 222 | { |
197 | if (!virqfd) | 223 | unsigned long flags; |
198 | return; | 224 | |
225 | spin_lock_irqsave(&vdev->irqlock, flags); | ||
226 | |||
227 | if (*pvirqfd) { | ||
228 | virqfd_deactivate(*pvirqfd); | ||
229 | *pvirqfd = NULL; | ||
230 | } | ||
199 | 231 | ||
200 | virqfd_deactivate(virqfd); | 232 | spin_unlock_irqrestore(&vdev->irqlock, flags); |
201 | 233 | ||
202 | /* Block until we know all outstanding shutdown jobs have completed. */ | 234 | /* |
235 | * Block until we know all outstanding shutdown jobs have completed. | ||
236 | * Even if we don't queue the job, flush the wq to be sure it's | ||
237 | * been released. | ||
238 | */ | ||
203 | flush_workqueue(vfio_irqfd_cleanup_wq); | 239 | flush_workqueue(vfio_irqfd_cleanup_wq); |
204 | } | 240 | } |
205 | 241 | ||
@@ -392,8 +428,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) | |||
392 | static void vfio_intx_disable(struct vfio_pci_device *vdev) | 428 | static void vfio_intx_disable(struct vfio_pci_device *vdev) |
393 | { | 429 | { |
394 | vfio_intx_set_signal(vdev, -1); | 430 | vfio_intx_set_signal(vdev, -1); |
395 | virqfd_disable(vdev->ctx[0].unmask); | 431 | virqfd_disable(vdev, &vdev->ctx[0].unmask); |
396 | virqfd_disable(vdev->ctx[0].mask); | 432 | virqfd_disable(vdev, &vdev->ctx[0].mask); |
397 | vdev->irq_type = VFIO_PCI_NUM_IRQS; | 433 | vdev->irq_type = VFIO_PCI_NUM_IRQS; |
398 | vdev->num_ctx = 0; | 434 | vdev->num_ctx = 0; |
399 | kfree(vdev->ctx); | 435 | kfree(vdev->ctx); |
@@ -539,8 +575,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) | |||
539 | vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); | 575 | vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); |
540 | 576 | ||
541 | for (i = 0; i < vdev->num_ctx; i++) { | 577 | for (i = 0; i < vdev->num_ctx; i++) { |
542 | virqfd_disable(vdev->ctx[i].unmask); | 578 | virqfd_disable(vdev, &vdev->ctx[i].unmask); |
543 | virqfd_disable(vdev->ctx[i].mask); | 579 | virqfd_disable(vdev, &vdev->ctx[i].mask); |
544 | } | 580 | } |
545 | 581 | ||
546 | if (msix) { | 582 | if (msix) { |
@@ -577,7 +613,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, | |||
577 | vfio_send_intx_eventfd, NULL, | 613 | vfio_send_intx_eventfd, NULL, |
578 | &vdev->ctx[0].unmask, fd); | 614 | &vdev->ctx[0].unmask, fd); |
579 | 615 | ||
580 | virqfd_disable(vdev->ctx[0].unmask); | 616 | virqfd_disable(vdev, &vdev->ctx[0].unmask); |
581 | } | 617 | } |
582 | 618 | ||
583 | return 0; | 619 | return 0; |