aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c28
1 files changed, 19 insertions, 9 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 39167a797f99..a91cb4c3fa5c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1462,7 +1462,8 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
1462} 1462}
1463 1463
1464static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, 1464static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
1465 struct mlx4_ib_qp *qp, unsigned *lso_seg_len) 1465 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
1466 __be32 *lso_hdr_sz)
1466{ 1467{
1467 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); 1468 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
1468 1469
@@ -1479,12 +1480,8 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
1479 1480
1480 memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); 1481 memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
1481 1482
1482 /* make sure LSO header is written before overwriting stamping */ 1483 *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
1483 wmb(); 1484 wr->wr.ud.hlen);
1484
1485 wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
1486 wr->wr.ud.hlen);
1487
1488 *lso_seg_len = halign; 1485 *lso_seg_len = halign;
1489 return 0; 1486 return 0;
1490} 1487}
@@ -1518,6 +1515,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1518 int uninitialized_var(stamp); 1515 int uninitialized_var(stamp);
1519 int uninitialized_var(size); 1516 int uninitialized_var(size);
1520 unsigned uninitialized_var(seglen); 1517 unsigned uninitialized_var(seglen);
1518 __be32 dummy;
1519 __be32 *lso_wqe;
1520 __be32 uninitialized_var(lso_hdr_sz);
1521 int i; 1521 int i;
1522 1522
1523 spin_lock_irqsave(&qp->sq.lock, flags); 1523 spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1525,6 +1525,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1525 ind = qp->sq_next_wqe; 1525 ind = qp->sq_next_wqe;
1526 1526
1527 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1527 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1528 lso_wqe = &dummy;
1529
1528 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { 1530 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1529 err = -ENOMEM; 1531 err = -ENOMEM;
1530 *bad_wr = wr; 1532 *bad_wr = wr;
@@ -1606,11 +1608,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1606 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 1608 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
1607 1609
1608 if (wr->opcode == IB_WR_LSO) { 1610 if (wr->opcode == IB_WR_LSO) {
1609 err = build_lso_seg(wqe, wr, qp, &seglen); 1611 err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
1610 if (unlikely(err)) { 1612 if (unlikely(err)) {
1611 *bad_wr = wr; 1613 *bad_wr = wr;
1612 goto out; 1614 goto out;
1613 } 1615 }
1616 lso_wqe = (__be32 *) wqe;
1614 wqe += seglen; 1617 wqe += seglen;
1615 size += seglen / 16; 1618 size += seglen / 16;
1616 } 1619 }
@@ -1652,6 +1655,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1652 for (i = wr->num_sge - 1; i >= 0; --i, --dseg) 1655 for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
1653 set_data_seg(dseg, wr->sg_list + i); 1656 set_data_seg(dseg, wr->sg_list + i);
1654 1657
1658 /*
1659 * Possibly overwrite stamping in cacheline with LSO
1660 * segment only after making sure all data segments
1661 * are written.
1662 */
1663 wmb();
1664 *lso_wqe = lso_hdr_sz;
1665
1655 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? 1666 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
1656 MLX4_WQE_CTRL_FENCE : 0) | size; 1667 MLX4_WQE_CTRL_FENCE : 0) | size;
1657 1668
@@ -1686,7 +1697,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1686 stamp_send_wqe(qp, stamp, size * 16); 1697 stamp_send_wqe(qp, stamp, size * 16);
1687 ind = pad_wraparound(qp, ind); 1698 ind = pad_wraparound(qp, ind);
1688 } 1699 }
1689
1690 } 1700 }
1691 1701
1692out: 1702out: