diff options
author | Eugenia Emantayev <eugenia@mellanox.com> | 2013-07-25 12:21:23 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-07-29 03:54:51 -0400 |
commit | 2d4b646613d6b12175b017aca18113945af1faf3 (patch) | |
tree | 0833822282841dcc7b2ec5f030e5089ca0ac22de /drivers/net/ethernet/mellanox | |
parent | 73d94e9481a20817abe2f1b41ee441bb4f6461f7 (diff) |
net/mlx4_en: Fix BlueFlame race
Fix a race between BlueFlame flow and stamping in post send flow.
Example:
SW: Build WQE 0 on the TX buffer, except the ownership bit
SW: Set ownership for WQE 0 on the TX buffer
SW: Ring doorbell for WQE 0
SW: Build WQE 1 on the TX buffer, except the ownership bit
SW: Set ownership for WQE 1 on the TX buffer
HW: Read WQE 0 and then WQE 1, before doorbell was rung/BF was done for WQE 1
HW: Produce CQEs for WQE 0 and WQE 1
SW: Process the CQEs, and stamp WQE 0 and WQE 1 accordingly (on the TX buffer)
SW: Copy WQE 1 from the TX buffer to the BF register - ALREADY STAMPED!
HW: CQE error with index 0xFFFF - the BF WQE's control segment is STAMPED,
so the BF index is 0xFFFF. Error: Invalid Opcode.
As a result QP enters the error state and no traffic can be sent.
Solution:
When stamping - do not stamp last completed wqe.
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/mellanox')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_tx.c | 61 |
1 files changed, 42 insertions, 19 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 7c492382da09..6dcca9817888 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c | |||
@@ -191,6 +191,39 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, | |||
191 | MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); | 191 | MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); |
192 | } | 192 | } |
193 | 193 | ||
194 | static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, | ||
195 | struct mlx4_en_tx_ring *ring, int index, | ||
196 | u8 owner) | ||
197 | { | ||
198 | __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); | ||
199 | struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; | ||
200 | struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; | ||
201 | void *end = ring->buf + ring->buf_size; | ||
202 | __be32 *ptr = (__be32 *)tx_desc; | ||
203 | int i; | ||
204 | |||
205 | /* Optimize the common case when there are no wraparounds */ | ||
206 | if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { | ||
207 | /* Stamp the freed descriptor */ | ||
208 | for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; | ||
209 | i += STAMP_STRIDE) { | ||
210 | *ptr = stamp; | ||
211 | ptr += STAMP_DWORDS; | ||
212 | } | ||
213 | } else { | ||
214 | /* Stamp the freed descriptor */ | ||
215 | for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; | ||
216 | i += STAMP_STRIDE) { | ||
217 | *ptr = stamp; | ||
218 | ptr += STAMP_DWORDS; | ||
219 | if ((void *)ptr >= end) { | ||
220 | ptr = ring->buf; | ||
221 | stamp ^= cpu_to_be32(0x80000000); | ||
222 | } | ||
223 | } | ||
224 | } | ||
225 | } | ||
226 | |||
194 | 227 | ||
195 | static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | 228 | static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, |
196 | struct mlx4_en_tx_ring *ring, | 229 | struct mlx4_en_tx_ring *ring, |
@@ -205,8 +238,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | |||
205 | void *end = ring->buf + ring->buf_size; | 238 | void *end = ring->buf + ring->buf_size; |
206 | int frags = skb_shinfo(skb)->nr_frags; | 239 | int frags = skb_shinfo(skb)->nr_frags; |
207 | int i; | 240 | int i; |
208 | __be32 *ptr = (__be32 *)tx_desc; | ||
209 | __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); | ||
210 | struct skb_shared_hwtstamps hwts; | 241 | struct skb_shared_hwtstamps hwts; |
211 | 242 | ||
212 | if (timestamp) { | 243 | if (timestamp) { |
@@ -232,12 +263,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | |||
232 | skb_frag_size(frag), PCI_DMA_TODEVICE); | 263 | skb_frag_size(frag), PCI_DMA_TODEVICE); |
233 | } | 264 | } |
234 | } | 265 | } |
235 | /* Stamp the freed descriptor */ | ||
236 | for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { | ||
237 | *ptr = stamp; | ||
238 | ptr += STAMP_DWORDS; | ||
239 | } | ||
240 | |||
241 | } else { | 266 | } else { |
242 | if (!tx_info->inl) { | 267 | if (!tx_info->inl) { |
243 | if ((void *) data >= end) { | 268 | if ((void *) data >= end) { |
@@ -263,16 +288,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | |||
263 | ++data; | 288 | ++data; |
264 | } | 289 | } |
265 | } | 290 | } |
266 | /* Stamp the freed descriptor */ | ||
267 | for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { | ||
268 | *ptr = stamp; | ||
269 | ptr += STAMP_DWORDS; | ||
270 | if ((void *) ptr >= end) { | ||
271 | ptr = ring->buf; | ||
272 | stamp ^= cpu_to_be32(0x80000000); | ||
273 | } | ||
274 | } | ||
275 | |||
276 | } | 291 | } |
277 | dev_kfree_skb_any(skb); | 292 | dev_kfree_skb_any(skb); |
278 | return tx_info->nr_txbb; | 293 | return tx_info->nr_txbb; |
@@ -318,8 +333,9 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) | |||
318 | struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; | 333 | struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; |
319 | struct mlx4_cqe *cqe; | 334 | struct mlx4_cqe *cqe; |
320 | u16 index; | 335 | u16 index; |
321 | u16 new_index, ring_index; | 336 | u16 new_index, ring_index, stamp_index; |
322 | u32 txbbs_skipped = 0; | 337 | u32 txbbs_skipped = 0; |
338 | u32 txbbs_stamp = 0; | ||
323 | u32 cons_index = mcq->cons_index; | 339 | u32 cons_index = mcq->cons_index; |
324 | int size = cq->size; | 340 | int size = cq->size; |
325 | u32 size_mask = ring->size_mask; | 341 | u32 size_mask = ring->size_mask; |
@@ -335,6 +351,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) | |||
335 | index = cons_index & size_mask; | 351 | index = cons_index & size_mask; |
336 | cqe = &buf[(index << factor) + factor]; | 352 | cqe = &buf[(index << factor) + factor]; |
337 | ring_index = ring->cons & size_mask; | 353 | ring_index = ring->cons & size_mask; |
354 | stamp_index = ring_index; | ||
338 | 355 | ||
339 | /* Process all completed CQEs */ | 356 | /* Process all completed CQEs */ |
340 | while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, | 357 | while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, |
@@ -359,6 +376,12 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) | |||
359 | priv, ring, ring_index, | 376 | priv, ring, ring_index, |
360 | !!((ring->cons + txbbs_skipped) & | 377 | !!((ring->cons + txbbs_skipped) & |
361 | ring->size), timestamp); | 378 | ring->size), timestamp); |
379 | |||
380 | mlx4_en_stamp_wqe(priv, ring, stamp_index, | ||
381 | !!((ring->cons + txbbs_stamp) & | ||
382 | ring->size)); | ||
383 | stamp_index = ring_index; | ||
384 | txbbs_stamp = txbbs_skipped; | ||
362 | packets++; | 385 | packets++; |
363 | bytes += ring->tx_info[ring_index].nr_bytes; | 386 | bytes += ring->tx_info[ring_index].nr_bytes; |
364 | } while (ring_index != new_index); | 387 | } while (ring_index != new_index); |