aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2017-01-16 11:31:37 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-04-18 01:11:50 -0400
commit48b2f1dd5787c643aa321fc159c11c418407234d (patch)
tree0b25197de270ef5eddd1085df59b87dbdc9cad77
parentcee26997a604ee8c6bd98b6841da195e828598b6 (diff)
net/mlx4_core: Fix racy CQ (Completion Queue) free
commit 291c566a28910614ce42d0ffe82196eddd6346f4 upstream. In function mlx4_cq_completion() and mlx4_cq_event(), the radix_tree_lookup requires a rcu_read_lock. This is mandatory: if another core frees the CQ, it could run the radix_tree_node_rcu_free() call_rcu() callback while its being used by the radix tree lookup function. Additionally, in function mlx4_cq_event(), since we are adding the rcu lock around the radix-tree lookup, we no longer need to take the spinlock. Also, the synchronize_irq() call for the async event eliminates the need for incrementing the cq reference count in mlx4_cq_event(). Other changes: 1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock: we no longer take this spinlock in the interrupt context. The spinlock here, therefore, simply protects against different threads simultaneously invoking mlx4_cq_free() for different cq's. 2. In function mlx4_cq_free(), we move the radix tree delete to before the synchronize_irq() calls. This guarantees that we will not access this cq during any subsequent interrupts, and therefore can safely free the CQ after the synchronize_irq calls. The rcu_read_lock in the interrupt handlers only needs to protect against corrupting the radix tree; the interrupt handlers may access the cq outside the rcu_read_lock due to the synchronize_irq calls which protect against premature freeing of the cq. 3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg. 4. We leave the cq reference count mechanism in place, because it is still needed for the cq completion tasklet mechanism. Fixes: 6d90aa5cf17b ("net/mlx4_core: Make sure there are no pending async events when freeing CQ") Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Matan Barak <matanb@mellanox.com> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cq.c38
1 files changed, 20 insertions, 18 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index a849da92f857..6b8635378f1f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -101,13 +101,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
101{ 101{
102 struct mlx4_cq *cq; 102 struct mlx4_cq *cq;
103 103
104 rcu_read_lock();
104 cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, 105 cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
105 cqn & (dev->caps.num_cqs - 1)); 106 cqn & (dev->caps.num_cqs - 1));
107 rcu_read_unlock();
108
106 if (!cq) { 109 if (!cq) {
107 mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); 110 mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
108 return; 111 return;
109 } 112 }
110 113
114 /* Acessing the CQ outside of rcu_read_lock is safe, because
115 * the CQ is freed only after interrupt handling is completed.
116 */
111 ++cq->arm_sn; 117 ++cq->arm_sn;
112 118
113 cq->comp(cq); 119 cq->comp(cq);
@@ -118,23 +124,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
118 struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; 124 struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
119 struct mlx4_cq *cq; 125 struct mlx4_cq *cq;
120 126
121 spin_lock(&cq_table->lock); 127 rcu_read_lock();
122
123 cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); 128 cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
124 if (cq) 129 rcu_read_unlock();
125 atomic_inc(&cq->refcount);
126
127 spin_unlock(&cq_table->lock);
128 130
129 if (!cq) { 131 if (!cq) {
130 mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); 132 mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn);
131 return; 133 return;
132 } 134 }
133 135
136 /* Acessing the CQ outside of rcu_read_lock is safe, because
137 * the CQ is freed only after interrupt handling is completed.
138 */
134 cq->event(cq, event_type); 139 cq->event(cq, event_type);
135
136 if (atomic_dec_and_test(&cq->refcount))
137 complete(&cq->free);
138} 140}
139 141
140static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, 142static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -301,9 +303,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
301 if (err) 303 if (err)
302 return err; 304 return err;
303 305
304 spin_lock_irq(&cq_table->lock); 306 spin_lock(&cq_table->lock);
305 err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); 307 err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
306 spin_unlock_irq(&cq_table->lock); 308 spin_unlock(&cq_table->lock);
307 if (err) 309 if (err)
308 goto err_icm; 310 goto err_icm;
309 311
@@ -349,9 +351,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
349 return 0; 351 return 0;
350 352
351err_radix: 353err_radix:
352 spin_lock_irq(&cq_table->lock); 354 spin_lock(&cq_table->lock);
353 radix_tree_delete(&cq_table->tree, cq->cqn); 355 radix_tree_delete(&cq_table->tree, cq->cqn);
354 spin_unlock_irq(&cq_table->lock); 356 spin_unlock(&cq_table->lock);
355 357
356err_icm: 358err_icm:
357 mlx4_cq_free_icm(dev, cq->cqn); 359 mlx4_cq_free_icm(dev, cq->cqn);
@@ -370,15 +372,15 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
370 if (err) 372 if (err)
371 mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); 373 mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
372 374
375 spin_lock(&cq_table->lock);
376 radix_tree_delete(&cq_table->tree, cq->cqn);
377 spin_unlock(&cq_table->lock);
378
373 synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); 379 synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq);
374 if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq != 380 if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq !=
375 priv->eq_table.eq[MLX4_EQ_ASYNC].irq) 381 priv->eq_table.eq[MLX4_EQ_ASYNC].irq)
376 synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); 382 synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
377 383
378 spin_lock_irq(&cq_table->lock);
379 radix_tree_delete(&cq_table->tree, cq->cqn);
380 spin_unlock_irq(&cq_table->lock);
381
382 if (atomic_dec_and_test(&cq->refcount)) 384 if (atomic_dec_and_test(&cq->refcount))
383 complete(&cq->free); 385 complete(&cq->free);
384 wait_for_completion(&cq->free); 386 wait_for_completion(&cq->free);