aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2009-01-16 15:47:47 -0500
committerRoland Dreier <rolandd@cisco.com>2009-01-16 15:47:47 -0500
commit0fd7e1d8559f45a6838cee93ea49adc0c5bda8f0 (patch)
tree7d31d09b0d60de47a1b668474957ce1926812087 /drivers/infiniband
parentd3b924d960a808105180d229b4667061123cc4ef (diff)
IB/mlx4: Fix memory ordering problem when posting LSO sends
The current work request posting code writes the LSO segment before writing any data segments. This leaves a window where the LSO segment overwrites the stamping in one cacheline that the HCA prefetches before the rest of the cacheline is filled with the correct data segments. When the HCA processes this work request, a local protection error may result. Fix this by saving the LSO header size field off and writing it only after all data segments are written. This fix is a cleaned-up version of a patch from Jack Morgenstein <jackm@dev.mellanox.co.il>. This fixes <https://bugs.openfabrics.org/show_bug.cgi?id=1383>. Reported-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c28
1 files changed, 19 insertions, 9 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 39167a797f99..a91cb4c3fa5c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1462,7 +1462,8 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
1462} 1462}
1463 1463
1464static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, 1464static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
1465 struct mlx4_ib_qp *qp, unsigned *lso_seg_len) 1465 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
1466 __be32 *lso_hdr_sz)
1466{ 1467{
1467 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); 1468 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
1468 1469
@@ -1479,12 +1480,8 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
1479 1480
1480 memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); 1481 memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
1481 1482
1482 /* make sure LSO header is written before overwriting stamping */ 1483 *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
1483 wmb(); 1484 wr->wr.ud.hlen);
1484
1485 wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
1486 wr->wr.ud.hlen);
1487
1488 *lso_seg_len = halign; 1485 *lso_seg_len = halign;
1489 return 0; 1486 return 0;
1490} 1487}
@@ -1518,6 +1515,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1518 int uninitialized_var(stamp); 1515 int uninitialized_var(stamp);
1519 int uninitialized_var(size); 1516 int uninitialized_var(size);
1520 unsigned uninitialized_var(seglen); 1517 unsigned uninitialized_var(seglen);
1518 __be32 dummy;
1519 __be32 *lso_wqe;
1520 __be32 uninitialized_var(lso_hdr_sz);
1521 int i; 1521 int i;
1522 1522
1523 spin_lock_irqsave(&qp->sq.lock, flags); 1523 spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1525,6 +1525,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1525 ind = qp->sq_next_wqe; 1525 ind = qp->sq_next_wqe;
1526 1526
1527 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1527 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1528 lso_wqe = &dummy;
1529
1528 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { 1530 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1529 err = -ENOMEM; 1531 err = -ENOMEM;
1530 *bad_wr = wr; 1532 *bad_wr = wr;
@@ -1606,11 +1608,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1606 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 1608 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
1607 1609
1608 if (wr->opcode == IB_WR_LSO) { 1610 if (wr->opcode == IB_WR_LSO) {
1609 err = build_lso_seg(wqe, wr, qp, &seglen); 1611 err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
1610 if (unlikely(err)) { 1612 if (unlikely(err)) {
1611 *bad_wr = wr; 1613 *bad_wr = wr;
1612 goto out; 1614 goto out;
1613 } 1615 }
1616 lso_wqe = (__be32 *) wqe;
1614 wqe += seglen; 1617 wqe += seglen;
1615 size += seglen / 16; 1618 size += seglen / 16;
1616 } 1619 }
@@ -1652,6 +1655,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1652 for (i = wr->num_sge - 1; i >= 0; --i, --dseg) 1655 for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
1653 set_data_seg(dseg, wr->sg_list + i); 1656 set_data_seg(dseg, wr->sg_list + i);
1654 1657
1658 /*
1659 * Possibly overwrite stamping in cacheline with LSO
1660 * segment only after making sure all data segments
1661 * are written.
1662 */
1663 wmb();
1664 *lso_wqe = lso_hdr_sz;
1665
1655 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? 1666 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
1656 MLX4_WQE_CTRL_FENCE : 0) | size; 1667 MLX4_WQE_CTRL_FENCE : 0) | size;
1657 1668
@@ -1686,7 +1697,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1686 stamp_send_wqe(qp, stamp, size * 16); 1697 stamp_send_wqe(qp, stamp, size * 16);
1687 ind = pad_wraparound(qp, ind); 1698 ind = pad_wraparound(qp, ind);
1688 } 1699 }
1689
1690 } 1700 }
1691 1701
1692out: 1702out: