aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Cohen <eli@mellanox.co.il>2008-07-15 02:48:44 -0400
committerRoland Dreier <rolandd@cisco.com>2008-07-15 02:48:44 -0400
commit9670e553915e67fb68f13258644342c68dc26b84 (patch)
tree68730cafe26cdb14d591f5f9c80bdeb1d6c85642
parent164ba0893c27a216557396320b6063fdac040392 (diff)
IB/mlx4: Optimize QP stamping
The idea is that for QPs with fixed size work requests (eg selective signaling QPs), before stamping the WQE, we read the value of the DS field, which gives the effective size of the descriptor as used in the previous post. Then we stamp only that area, since the rest of the descriptor is already stamped. When initializing the send queue buffer, make sure the DS field is initialized to the max descriptor size so that the subsequent stamping will be done on the entire descriptor area. Signed-off-by: Eli Cohen <eli@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c8
1 files changed, 6 insertions, 2 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index a80df22deae8..4b0ac5d68c46 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -129,9 +129,10 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
129 int ind; 129 int ind;
130 void *buf; 130 void *buf;
131 __be32 stamp; 131 __be32 stamp;
132 struct mlx4_wqe_ctrl_seg *ctrl;
132 133
133 s = roundup(size, 1U << qp->sq.wqe_shift);
134 if (qp->sq_max_wqes_per_wr > 1) { 134 if (qp->sq_max_wqes_per_wr > 1) {
135 s = roundup(size, 1U << qp->sq.wqe_shift);
135 for (i = 0; i < s; i += 64) { 136 for (i = 0; i < s; i += 64) {
136 ind = (i >> qp->sq.wqe_shift) + n; 137 ind = (i >> qp->sq.wqe_shift) + n;
137 stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : 138 stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
@@ -141,7 +142,8 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
141 *wqe = stamp; 142 *wqe = stamp;
142 } 143 }
143 } else { 144 } else {
144 buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); 145 ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
146 s = (ctrl->fence_size & 0x3f) << 4;
145 for (i = 64; i < s; i += 64) { 147 for (i = 64; i < s; i += 64) {
146 wqe = buf + i; 148 wqe = buf + i;
147 *wqe = cpu_to_be32(0xffffffff); 149 *wqe = cpu_to_be32(0xffffffff);
@@ -1063,6 +1065,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1063 for (i = 0; i < qp->sq.wqe_cnt; ++i) { 1065 for (i = 0; i < qp->sq.wqe_cnt; ++i) {
1064 ctrl = get_send_wqe(qp, i); 1066 ctrl = get_send_wqe(qp, i);
1065 ctrl->owner_opcode = cpu_to_be32(1 << 31); 1067 ctrl->owner_opcode = cpu_to_be32(1 << 31);
1068 if (qp->sq_max_wqes_per_wr == 1)
1069 ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
1066 1070
1067 stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); 1071 stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
1068 } 1072 }