diff options
author | Eli Cohen <eli@mellanox.co.il> | 2009-11-12 14:19:44 -0500 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2009-11-12 14:19:44 -0500 |
commit | 417608c20a4c8397bc5307d949ec01ea0a0dd8e5 (patch) | |
tree | 9986edf24a8d52fdfc5f51b2cb50f56c4eeb0c6c /drivers/infiniband/hw/mlx4 | |
parent | ecdc428e4c5d821a07baf4f8b1718faf67b9026f (diff) |
IB/mlx4: Remove limitation on LSO header size
Current code has a limitation: an LSO header is not allowed to cross a
64 byte boundary. This patch removes this limitation by setting the
WQE RR for large headers thus allowing LSO headers of any size. The
extra buffer reserved for MLX4_IB_QP_LSO QPs has been doubled, from 64
to 128 bytes, assuming this is reasonable upper limit for header
length. Also, this patch will cause IB_DEVICE_UD_TSO to be set only
for HCA FW versions that set MLX4_DEV_CAP_FLAG_BLH; e.g. FW version
2.6.000 and higher.
Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx4')
-rw-r--r-- | drivers/infiniband/hw/mlx4/main.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 24 |
2 files changed, 13 insertions, 13 deletions
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3cb3f47a10b8..e596537ff353 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c | |||
@@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, | |||
103 | props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; | 103 | props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; |
104 | if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) | 104 | if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) |
105 | props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; | 105 | props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; |
106 | if (dev->dev->caps.max_gso_sz) | 106 | if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH) |
107 | props->device_cap_flags |= IB_DEVICE_UD_TSO; | 107 | props->device_cap_flags |= IB_DEVICE_UD_TSO; |
108 | if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) | 108 | if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) |
109 | props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; | 109 | props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; |
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 518d561970aa..847030c89a8d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c | |||
@@ -54,7 +54,8 @@ enum { | |||
54 | /* | 54 | /* |
55 | * Largest possible UD header: send with GRH and immediate data. | 55 | * Largest possible UD header: send with GRH and immediate data. |
56 | */ | 56 | */ |
57 | MLX4_IB_UD_HEADER_SIZE = 72 | 57 | MLX4_IB_UD_HEADER_SIZE = 72, |
58 | MLX4_IB_LSO_HEADER_SPARE = 128, | ||
58 | }; | 59 | }; |
59 | 60 | ||
60 | struct mlx4_ib_sqp { | 61 | struct mlx4_ib_sqp { |
@@ -67,7 +68,8 @@ struct mlx4_ib_sqp { | |||
67 | }; | 68 | }; |
68 | 69 | ||
69 | enum { | 70 | enum { |
70 | MLX4_IB_MIN_SQ_STRIDE = 6 | 71 | MLX4_IB_MIN_SQ_STRIDE = 6, |
72 | MLX4_IB_CACHE_LINE_SIZE = 64, | ||
71 | }; | 73 | }; |
72 | 74 | ||
73 | static const __be32 mlx4_ib_opcode[] = { | 75 | static const __be32 mlx4_ib_opcode[] = { |
@@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags) | |||
261 | case IB_QPT_UD: | 263 | case IB_QPT_UD: |
262 | return sizeof (struct mlx4_wqe_ctrl_seg) + | 264 | return sizeof (struct mlx4_wqe_ctrl_seg) + |
263 | sizeof (struct mlx4_wqe_datagram_seg) + | 265 | sizeof (struct mlx4_wqe_datagram_seg) + |
264 | ((flags & MLX4_IB_QP_LSO) ? 64 : 0); | 266 | ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); |
265 | case IB_QPT_UC: | 267 | case IB_QPT_UC: |
266 | return sizeof (struct mlx4_wqe_ctrl_seg) + | 268 | return sizeof (struct mlx4_wqe_ctrl_seg) + |
267 | sizeof (struct mlx4_wqe_raddr_seg); | 269 | sizeof (struct mlx4_wqe_raddr_seg); |
@@ -1466,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) | |||
1466 | 1468 | ||
1467 | static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, | 1469 | static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, |
1468 | struct mlx4_ib_qp *qp, unsigned *lso_seg_len, | 1470 | struct mlx4_ib_qp *qp, unsigned *lso_seg_len, |
1469 | __be32 *lso_hdr_sz) | 1471 | __be32 *lso_hdr_sz, __be32 *blh) |
1470 | { | 1472 | { |
1471 | unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); | 1473 | unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); |
1472 | 1474 | ||
1473 | /* | 1475 | if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) |
1474 | * This is a temporary limitation and will be removed in | 1476 | *blh = cpu_to_be32(1 << 6); |
1475 | * a forthcoming FW release: | ||
1476 | */ | ||
1477 | if (unlikely(halign > 64)) | ||
1478 | return -EINVAL; | ||
1479 | 1477 | ||
1480 | if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && | 1478 | if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && |
1481 | wr->num_sge > qp->sq.max_gs - (halign >> 4))) | 1479 | wr->num_sge > qp->sq.max_gs - (halign >> 4))) |
@@ -1521,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1521 | __be32 dummy; | 1519 | __be32 dummy; |
1522 | __be32 *lso_wqe; | 1520 | __be32 *lso_wqe; |
1523 | __be32 uninitialized_var(lso_hdr_sz); | 1521 | __be32 uninitialized_var(lso_hdr_sz); |
1522 | __be32 blh; | ||
1524 | int i; | 1523 | int i; |
1525 | 1524 | ||
1526 | spin_lock_irqsave(&qp->sq.lock, flags); | 1525 | spin_lock_irqsave(&qp->sq.lock, flags); |
@@ -1529,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1529 | 1528 | ||
1530 | for (nreq = 0; wr; ++nreq, wr = wr->next) { | 1529 | for (nreq = 0; wr; ++nreq, wr = wr->next) { |
1531 | lso_wqe = &dummy; | 1530 | lso_wqe = &dummy; |
1531 | blh = 0; | ||
1532 | 1532 | ||
1533 | if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { | 1533 | if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { |
1534 | err = -ENOMEM; | 1534 | err = -ENOMEM; |
@@ -1615,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1615 | size += sizeof (struct mlx4_wqe_datagram_seg) / 16; | 1615 | size += sizeof (struct mlx4_wqe_datagram_seg) / 16; |
1616 | 1616 | ||
1617 | if (wr->opcode == IB_WR_LSO) { | 1617 | if (wr->opcode == IB_WR_LSO) { |
1618 | err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz); | 1618 | err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh); |
1619 | if (unlikely(err)) { | 1619 | if (unlikely(err)) { |
1620 | *bad_wr = wr; | 1620 | *bad_wr = wr; |
1621 | goto out; | 1621 | goto out; |
@@ -1686,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1686 | } | 1686 | } |
1687 | 1687 | ||
1688 | ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | | 1688 | ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | |
1689 | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); | 1689 | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; |
1690 | 1690 | ||
1691 | stamp = ind + qp->sq_spare_wqes; | 1691 | stamp = ind + qp->sq_spare_wqes; |
1692 | ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); | 1692 | ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); |