diff options
author | Jack Morgenstein <jackm@dev.mellanox.co.il> | 2017-01-16 11:31:37 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-04-18 01:11:50 -0400 |
commit | 48b2f1dd5787c643aa321fc159c11c418407234d (patch) | |
tree | 0b25197de270ef5eddd1085df59b87dbdc9cad77 | |
parent | cee26997a604ee8c6bd98b6841da195e828598b6 (diff) |
net/mlx4_core: Fix racy CQ (Completion Queue) free
commit 291c566a28910614ce42d0ffe82196eddd6346f4 upstream.
In function mlx4_cq_completion() and mlx4_cq_event(), the
radix_tree_lookup requires a rcu_read_lock.
This is mandatory: if another core frees the CQ, it could
run the radix_tree_node_rcu_free() call_rcu() callback while
its being used by the radix tree lookup function.
Additionally, in function mlx4_cq_event(), since we are adding
the rcu lock around the radix-tree lookup, we no longer need to take
the spinlock. Also, the synchronize_irq() call for the async event
eliminates the need for incrementing the cq reference count in
mlx4_cq_event().
Other changes:
1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock:
we no longer take this spinlock in the interrupt context.
The spinlock here, therefore, simply protects against different
threads simultaneously invoking mlx4_cq_free() for different cq's.
2. In function mlx4_cq_free(), we move the radix tree delete to before
the synchronize_irq() calls. This guarantees that we will not
access this cq during any subsequent interrupts, and therefore can
safely free the CQ after the synchronize_irq calls. The rcu_read_lock
in the interrupt handlers only needs to protect against corrupting the
radix tree; the interrupt handlers may access the cq outside the
rcu_read_lock due to the synchronize_irq calls which protect against
premature freeing of the cq.
3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg.
4. We leave the cq reference count mechanism in place, because it is
still needed for the cq completion tasklet mechanism.
Fixes: 6d90aa5cf17b ("net/mlx4_core: Make sure there are no pending async events when freeing CQ")
Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/cq.c | 38 |
1 files changed, 20 insertions, 18 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index a849da92f857..6b8635378f1f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c | |||
@@ -101,13 +101,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) | |||
101 | { | 101 | { |
102 | struct mlx4_cq *cq; | 102 | struct mlx4_cq *cq; |
103 | 103 | ||
104 | rcu_read_lock(); | ||
104 | cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, | 105 | cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, |
105 | cqn & (dev->caps.num_cqs - 1)); | 106 | cqn & (dev->caps.num_cqs - 1)); |
107 | rcu_read_unlock(); | ||
108 | |||
106 | if (!cq) { | 109 | if (!cq) { |
107 | mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); | 110 | mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); |
108 | return; | 111 | return; |
109 | } | 112 | } |
110 | 113 | ||
114 | /* Acessing the CQ outside of rcu_read_lock is safe, because | ||
115 | * the CQ is freed only after interrupt handling is completed. | ||
116 | */ | ||
111 | ++cq->arm_sn; | 117 | ++cq->arm_sn; |
112 | 118 | ||
113 | cq->comp(cq); | 119 | cq->comp(cq); |
@@ -118,23 +124,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) | |||
118 | struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; | 124 | struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; |
119 | struct mlx4_cq *cq; | 125 | struct mlx4_cq *cq; |
120 | 126 | ||
121 | spin_lock(&cq_table->lock); | 127 | rcu_read_lock(); |
122 | |||
123 | cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); | 128 | cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); |
124 | if (cq) | 129 | rcu_read_unlock(); |
125 | atomic_inc(&cq->refcount); | ||
126 | |||
127 | spin_unlock(&cq_table->lock); | ||
128 | 130 | ||
129 | if (!cq) { | 131 | if (!cq) { |
130 | mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); | 132 | mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn); |
131 | return; | 133 | return; |
132 | } | 134 | } |
133 | 135 | ||
136 | /* Acessing the CQ outside of rcu_read_lock is safe, because | ||
137 | * the CQ is freed only after interrupt handling is completed. | ||
138 | */ | ||
134 | cq->event(cq, event_type); | 139 | cq->event(cq, event_type); |
135 | |||
136 | if (atomic_dec_and_test(&cq->refcount)) | ||
137 | complete(&cq->free); | ||
138 | } | 140 | } |
139 | 141 | ||
140 | static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, | 142 | static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, |
@@ -301,9 +303,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, | |||
301 | if (err) | 303 | if (err) |
302 | return err; | 304 | return err; |
303 | 305 | ||
304 | spin_lock_irq(&cq_table->lock); | 306 | spin_lock(&cq_table->lock); |
305 | err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); | 307 | err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); |
306 | spin_unlock_irq(&cq_table->lock); | 308 | spin_unlock(&cq_table->lock); |
307 | if (err) | 309 | if (err) |
308 | goto err_icm; | 310 | goto err_icm; |
309 | 311 | ||
@@ -349,9 +351,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, | |||
349 | return 0; | 351 | return 0; |
350 | 352 | ||
351 | err_radix: | 353 | err_radix: |
352 | spin_lock_irq(&cq_table->lock); | 354 | spin_lock(&cq_table->lock); |
353 | radix_tree_delete(&cq_table->tree, cq->cqn); | 355 | radix_tree_delete(&cq_table->tree, cq->cqn); |
354 | spin_unlock_irq(&cq_table->lock); | 356 | spin_unlock(&cq_table->lock); |
355 | 357 | ||
356 | err_icm: | 358 | err_icm: |
357 | mlx4_cq_free_icm(dev, cq->cqn); | 359 | mlx4_cq_free_icm(dev, cq->cqn); |
@@ -370,15 +372,15 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) | |||
370 | if (err) | 372 | if (err) |
371 | mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); | 373 | mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); |
372 | 374 | ||
375 | spin_lock(&cq_table->lock); | ||
376 | radix_tree_delete(&cq_table->tree, cq->cqn); | ||
377 | spin_unlock(&cq_table->lock); | ||
378 | |||
373 | synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); | 379 | synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); |
374 | if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq != | 380 | if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq != |
375 | priv->eq_table.eq[MLX4_EQ_ASYNC].irq) | 381 | priv->eq_table.eq[MLX4_EQ_ASYNC].irq) |
376 | synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); | 382 | synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); |
377 | 383 | ||
378 | spin_lock_irq(&cq_table->lock); | ||
379 | radix_tree_delete(&cq_table->tree, cq->cqn); | ||
380 | spin_unlock_irq(&cq_table->lock); | ||
381 | |||
382 | if (atomic_dec_and_test(&cq->refcount)) | 384 | if (atomic_dec_and_test(&cq->refcount)) |
383 | complete(&cq->free); | 385 | complete(&cq->free); |
384 | wait_for_completion(&cq->free); | 386 | wait_for_completion(&cq->free); |