diff options
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 28 |
1 files changed, 19 insertions, 9 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 39167a797f99..a91cb4c3fa5c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c | |||
@@ -1462,7 +1462,8 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) | |||
1462 | } | 1462 | } |
1463 | 1463 | ||
1464 | static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, | 1464 | static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, |
1465 | struct mlx4_ib_qp *qp, unsigned *lso_seg_len) | 1465 | struct mlx4_ib_qp *qp, unsigned *lso_seg_len, |
1466 | __be32 *lso_hdr_sz) | ||
1466 | { | 1467 | { |
1467 | unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); | 1468 | unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); |
1468 | 1469 | ||
@@ -1479,12 +1480,8 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, | |||
1479 | 1480 | ||
1480 | memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); | 1481 | memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); |
1481 | 1482 | ||
1482 | /* make sure LSO header is written before overwriting stamping */ | 1483 | *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | |
1483 | wmb(); | 1484 | wr->wr.ud.hlen); |
1484 | |||
1485 | wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | | ||
1486 | wr->wr.ud.hlen); | ||
1487 | |||
1488 | *lso_seg_len = halign; | 1485 | *lso_seg_len = halign; |
1489 | return 0; | 1486 | return 0; |
1490 | } | 1487 | } |
@@ -1518,6 +1515,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1518 | int uninitialized_var(stamp); | 1515 | int uninitialized_var(stamp); |
1519 | int uninitialized_var(size); | 1516 | int uninitialized_var(size); |
1520 | unsigned uninitialized_var(seglen); | 1517 | unsigned uninitialized_var(seglen); |
1518 | __be32 dummy; | ||
1519 | __be32 *lso_wqe; | ||
1520 | __be32 uninitialized_var(lso_hdr_sz); | ||
1521 | int i; | 1521 | int i; |
1522 | 1522 | ||
1523 | spin_lock_irqsave(&qp->sq.lock, flags); | 1523 | spin_lock_irqsave(&qp->sq.lock, flags); |
@@ -1525,6 +1525,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1525 | ind = qp->sq_next_wqe; | 1525 | ind = qp->sq_next_wqe; |
1526 | 1526 | ||
1527 | for (nreq = 0; wr; ++nreq, wr = wr->next) { | 1527 | for (nreq = 0; wr; ++nreq, wr = wr->next) { |
1528 | lso_wqe = &dummy; | ||
1529 | |||
1528 | if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { | 1530 | if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { |
1529 | err = -ENOMEM; | 1531 | err = -ENOMEM; |
1530 | *bad_wr = wr; | 1532 | *bad_wr = wr; |
@@ -1606,11 +1608,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1606 | size += sizeof (struct mlx4_wqe_datagram_seg) / 16; | 1608 | size += sizeof (struct mlx4_wqe_datagram_seg) / 16; |
1607 | 1609 | ||
1608 | if (wr->opcode == IB_WR_LSO) { | 1610 | if (wr->opcode == IB_WR_LSO) { |
1609 | err = build_lso_seg(wqe, wr, qp, &seglen); | 1611 | err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz); |
1610 | if (unlikely(err)) { | 1612 | if (unlikely(err)) { |
1611 | *bad_wr = wr; | 1613 | *bad_wr = wr; |
1612 | goto out; | 1614 | goto out; |
1613 | } | 1615 | } |
1616 | lso_wqe = (__be32 *) wqe; | ||
1614 | wqe += seglen; | 1617 | wqe += seglen; |
1615 | size += seglen / 16; | 1618 | size += seglen / 16; |
1616 | } | 1619 | } |
@@ -1652,6 +1655,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1652 | for (i = wr->num_sge - 1; i >= 0; --i, --dseg) | 1655 | for (i = wr->num_sge - 1; i >= 0; --i, --dseg) |
1653 | set_data_seg(dseg, wr->sg_list + i); | 1656 | set_data_seg(dseg, wr->sg_list + i); |
1654 | 1657 | ||
1658 | /* | ||
1659 | * Possibly overwrite stamping in cacheline with LSO | ||
1660 | * segment only after making sure all data segments | ||
1661 | * are written. | ||
1662 | */ | ||
1663 | wmb(); | ||
1664 | *lso_wqe = lso_hdr_sz; | ||
1665 | |||
1655 | ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? | 1666 | ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? |
1656 | MLX4_WQE_CTRL_FENCE : 0) | size; | 1667 | MLX4_WQE_CTRL_FENCE : 0) | size; |
1657 | 1668 | ||
@@ -1686,7 +1697,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1686 | stamp_send_wqe(qp, stamp, size * 16); | 1697 | stamp_send_wqe(qp, stamp, size * 16); |
1687 | ind = pad_wraparound(qp, ind); | 1698 | ind = pad_wraparound(qp, ind); |
1688 | } | 1699 | } |
1689 | |||
1690 | } | 1700 | } |
1691 | 1701 | ||
1692 | out: | 1702 | out: |