diff options
author | Petr Mladek <pmladek@suse.com> | 2016-10-19 08:07:19 -0400 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-12-14 12:16:11 -0500 |
commit | 6efaf10f163d9a60d1d4b2a049b194a53537ba1b (patch) | |
tree | 24c430ab5e6d794a205bcdfb627fa3ce3422cd38 /drivers/infiniband/sw | |
parent | 66431b0e8657e2406742105f89175f571340090b (diff) |
IB/rdmavt: Avoid queuing work into a destroyed cq kthread worker
The memory barrier is not enough to protect queuing works into
a destroyed cq kthread. Just imagine the following situation:
CPU1 CPU2
rvt_cq_enter()
worker = cq->rdi->worker;
rvt_cq_exit()
rdi->worker = NULL;
smp_wmb();
kthread_flush_worker(worker);
kthread_stop(worker->task);
kfree(worker);
// nothing queued yet =>
// nothing flushed and
// happily stopped and freed
if (likely(worker)) {
// true => read before CPU2 acted
cq->notify = RVT_CQ_NONE;
cq->triggered++;
kthread_queue_work(worker, &cq->comptask);
BANG: worker has been flushed/stopped/freed in the meantime.
This patch solves this by protecting the critical sections by
rdi->n_cqs_lock. It seems that this lock is not much contended
and looks reasonable for this purpose.
One catch is that rvt_cq_enter() might be called from IRQ context.
Therefore we must always take the lock with IRQs disabled to avoid
a possible deadlock.
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r-- | drivers/infiniband/sw/rdmavt/cq.c | 30 |
1 files changed, 16 insertions, 14 deletions
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 6d9904a4a0ab..223ec4589fc7 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c | |||
@@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) | |||
119 | if (cq->notify == IB_CQ_NEXT_COMP || | 119 | if (cq->notify == IB_CQ_NEXT_COMP || |
120 | (cq->notify == IB_CQ_SOLICITED && | 120 | (cq->notify == IB_CQ_SOLICITED && |
121 | (solicited || entry->status != IB_WC_SUCCESS))) { | 121 | (solicited || entry->status != IB_WC_SUCCESS))) { |
122 | struct kthread_worker *worker; | ||
123 | /* | 122 | /* |
124 | * This will cause send_complete() to be called in | 123 | * This will cause send_complete() to be called in |
125 | * another thread. | 124 | * another thread. |
126 | */ | 125 | */ |
127 | smp_read_barrier_depends(); /* see rvt_cq_exit */ | 126 | spin_lock(&cq->rdi->n_cqs_lock); |
128 | worker = cq->rdi->worker; | 127 | if (likely(cq->rdi->worker)) { |
129 | if (likely(worker)) { | ||
130 | cq->notify = RVT_CQ_NONE; | 128 | cq->notify = RVT_CQ_NONE; |
131 | cq->triggered++; | 129 | cq->triggered++; |
132 | kthread_queue_work(worker, &cq->comptask); | 130 | kthread_queue_work(cq->rdi->worker, &cq->comptask); |
133 | } | 131 | } |
132 | spin_unlock(&cq->rdi->n_cqs_lock); | ||
134 | } | 133 | } |
135 | 134 | ||
136 | spin_unlock_irqrestore(&cq->lock, flags); | 135 | spin_unlock_irqrestore(&cq->lock, flags); |
@@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, | |||
240 | } | 239 | } |
241 | } | 240 | } |
242 | 241 | ||
243 | spin_lock(&rdi->n_cqs_lock); | 242 | spin_lock_irq(&rdi->n_cqs_lock); |
244 | if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { | 243 | if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { |
245 | spin_unlock(&rdi->n_cqs_lock); | 244 | spin_unlock_irq(&rdi->n_cqs_lock); |
246 | ret = ERR_PTR(-ENOMEM); | 245 | ret = ERR_PTR(-ENOMEM); |
247 | goto bail_ip; | 246 | goto bail_ip; |
248 | } | 247 | } |
249 | 248 | ||
250 | rdi->n_cqs_allocated++; | 249 | rdi->n_cqs_allocated++; |
251 | spin_unlock(&rdi->n_cqs_lock); | 250 | spin_unlock_irq(&rdi->n_cqs_lock); |
252 | 251 | ||
253 | if (cq->ip) { | 252 | if (cq->ip) { |
254 | spin_lock_irq(&rdi->pending_lock); | 253 | spin_lock_irq(&rdi->pending_lock); |
@@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq) | |||
296 | struct rvt_dev_info *rdi = cq->rdi; | 295 | struct rvt_dev_info *rdi = cq->rdi; |
297 | 296 | ||
298 | kthread_flush_work(&cq->comptask); | 297 | kthread_flush_work(&cq->comptask); |
299 | spin_lock(&rdi->n_cqs_lock); | 298 | spin_lock_irq(&rdi->n_cqs_lock); |
300 | rdi->n_cqs_allocated--; | 299 | rdi->n_cqs_allocated--; |
301 | spin_unlock(&rdi->n_cqs_lock); | 300 | spin_unlock_irq(&rdi->n_cqs_lock); |
302 | if (cq->ip) | 301 | if (cq->ip) |
303 | kref_put(&cq->ip->ref, rvt_release_mmap_info); | 302 | kref_put(&cq->ip->ref, rvt_release_mmap_info); |
304 | else | 303 | else |
@@ -541,12 +540,15 @@ void rvt_cq_exit(struct rvt_dev_info *rdi) | |||
541 | { | 540 | { |
542 | struct kthread_worker *worker; | 541 | struct kthread_worker *worker; |
543 | 542 | ||
544 | worker = rdi->worker; | 543 | /* block future queuing from send_complete() */ |
545 | if (!worker) | 544 | spin_lock_irq(&rdi->n_cqs_lock); |
545 | if (!rdi->worker) { | ||
546 | spin_unlock_irq(&rdi->n_cqs_lock); | ||
546 | return; | 547 | return; |
547 | /* blocks future queuing from send_complete() */ | 548 | } |
548 | rdi->worker = NULL; | 549 | rdi->worker = NULL; |
549 | smp_wmb(); /* See rdi_cq_enter */ | 550 | spin_unlock_irq(&rdi->n_cqs_lock); |
551 | |||
550 | kthread_flush_worker(worker); | 552 | kthread_flush_worker(worker); |
551 | kthread_stop(worker->task); | 553 | kthread_stop(worker->task); |
552 | kfree(worker); | 554 | kfree(worker); |