aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
authorPetr Mladek <pmladek@suse.com>2016-10-19 08:07:19 -0400
committerDoug Ledford <dledford@redhat.com>2016-12-14 12:16:11 -0500
commit6efaf10f163d9a60d1d4b2a049b194a53537ba1b (patch)
tree24c430ab5e6d794a205bcdfb627fa3ce3422cd38 /drivers/infiniband/sw
parent66431b0e8657e2406742105f89175f571340090b (diff)
IB/rdmavt: Avoid queuing work into a destroyed cq kthread worker
The memory barrier is not enough to protect queuing works into a destroyed cq kthread. Just imagine the following situation: CPU1 CPU2 rvt_cq_enter() worker = cq->rdi->worker; rvt_cq_exit() rdi->worker = NULL; smp_wmb(); kthread_flush_worker(worker); kthread_stop(worker->task); kfree(worker); // nothing queued yet => // nothing flushed and // happily stopped and freed if (likely(worker)) { // true => read before CPU2 acted cq->notify = RVT_CQ_NONE; cq->triggered++; kthread_queue_work(worker, &cq->comptask); BANG: worker has been flushed/stopped/freed in the meantime. This patch solves this by protecting the critical sections by rdi->n_cqs_lock. It seems that this lock is not much contended and looks reasonable for this purpose. One catch is that rvt_cq_enter() might be called from IRQ context. Therefore we must always take the lock with IRQs disabled to avoid a possible deadlock. Signed-off-by: Petr Mladek <pmladek@suse.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c30
1 files changed, 16 insertions, 14 deletions
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 6d9904a4a0ab..223ec4589fc7 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
119 if (cq->notify == IB_CQ_NEXT_COMP || 119 if (cq->notify == IB_CQ_NEXT_COMP ||
120 (cq->notify == IB_CQ_SOLICITED && 120 (cq->notify == IB_CQ_SOLICITED &&
121 (solicited || entry->status != IB_WC_SUCCESS))) { 121 (solicited || entry->status != IB_WC_SUCCESS))) {
122 struct kthread_worker *worker;
123 /* 122 /*
124 * This will cause send_complete() to be called in 123 * This will cause send_complete() to be called in
125 * another thread. 124 * another thread.
126 */ 125 */
127 smp_read_barrier_depends(); /* see rvt_cq_exit */ 126 spin_lock(&cq->rdi->n_cqs_lock);
128 worker = cq->rdi->worker; 127 if (likely(cq->rdi->worker)) {
129 if (likely(worker)) {
130 cq->notify = RVT_CQ_NONE; 128 cq->notify = RVT_CQ_NONE;
131 cq->triggered++; 129 cq->triggered++;
132 kthread_queue_work(worker, &cq->comptask); 130 kthread_queue_work(cq->rdi->worker, &cq->comptask);
133 } 131 }
132 spin_unlock(&cq->rdi->n_cqs_lock);
134 } 133 }
135 134
136 spin_unlock_irqrestore(&cq->lock, flags); 135 spin_unlock_irqrestore(&cq->lock, flags);
@@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
240 } 239 }
241 } 240 }
242 241
243 spin_lock(&rdi->n_cqs_lock); 242 spin_lock_irq(&rdi->n_cqs_lock);
244 if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { 243 if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
245 spin_unlock(&rdi->n_cqs_lock); 244 spin_unlock_irq(&rdi->n_cqs_lock);
246 ret = ERR_PTR(-ENOMEM); 245 ret = ERR_PTR(-ENOMEM);
247 goto bail_ip; 246 goto bail_ip;
248 } 247 }
249 248
250 rdi->n_cqs_allocated++; 249 rdi->n_cqs_allocated++;
251 spin_unlock(&rdi->n_cqs_lock); 250 spin_unlock_irq(&rdi->n_cqs_lock);
252 251
253 if (cq->ip) { 252 if (cq->ip) {
254 spin_lock_irq(&rdi->pending_lock); 253 spin_lock_irq(&rdi->pending_lock);
@@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq)
296 struct rvt_dev_info *rdi = cq->rdi; 295 struct rvt_dev_info *rdi = cq->rdi;
297 296
298 kthread_flush_work(&cq->comptask); 297 kthread_flush_work(&cq->comptask);
299 spin_lock(&rdi->n_cqs_lock); 298 spin_lock_irq(&rdi->n_cqs_lock);
300 rdi->n_cqs_allocated--; 299 rdi->n_cqs_allocated--;
301 spin_unlock(&rdi->n_cqs_lock); 300 spin_unlock_irq(&rdi->n_cqs_lock);
302 if (cq->ip) 301 if (cq->ip)
303 kref_put(&cq->ip->ref, rvt_release_mmap_info); 302 kref_put(&cq->ip->ref, rvt_release_mmap_info);
304 else 303 else
@@ -541,12 +540,15 @@ void rvt_cq_exit(struct rvt_dev_info *rdi)
541{ 540{
542 struct kthread_worker *worker; 541 struct kthread_worker *worker;
543 542
544 worker = rdi->worker; 543 /* block future queuing from send_complete() */
545 if (!worker) 544 spin_lock_irq(&rdi->n_cqs_lock);
545 if (!rdi->worker) {
546 spin_unlock_irq(&rdi->n_cqs_lock);
546 return; 547 return;
547 /* blocks future queuing from send_complete() */ 548 }
548 rdi->worker = NULL; 549 rdi->worker = NULL;
549 smp_wmb(); /* See rdi_cq_enter */ 550 spin_unlock_irq(&rdi->n_cqs_lock);
551
550 kthread_flush_worker(worker); 552 kthread_flush_worker(worker);
551 kthread_stop(worker->task); 553 kthread_stop(worker->task);
552 kfree(worker); 554 kfree(worker);