aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2007-06-18 11:13:48 -0400
committerRoland Dreier <rolandd@cisco.com>2007-06-18 11:13:48 -0400
commit0e6e74162164d908edf7889ac66dca09e7505745 (patch)
tree7828d670c099771492dac599f8a7323c4e681e52 /drivers/infiniband
parent42c059ea2b0aac5f961253ba81c1b464d181a600 (diff)
IB/mlx4: Handle new FW requirement for send request prefetching
New ConnectX firmware introduces FW command interface revision 2, which requires that for each QP, a chunk of send queue entries (the "headroom") is kept marked as invalid, so that the HCA doesn't get confused if it prefetches entries that haven't been posted yet. Add code to the driver to do this, and also update the user ABI so that userspace can request that the prefetcher be turned off for userspace QPs (we just leave the prefetcher on for all kernel QPs). Unfortunately, marking send queue entries this way is confuses older firmware, so we change the driver to allow only FW command interface revisions 2. This means that users will have to update their firmware to work with the new driver, but the firmware is changing quickly and the old firmware has lots of other bugs anyway, so this shouldn't be too big a deal. Based on a patch from Jack Morgenstein <jackm@dev.mellanox.co.il>. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c101
-rw-r--r--drivers/infiniband/hw/mlx4/user.h9
4 files changed, 82 insertions, 37 deletions
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 048c5274ab1..e940521e9c8 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -355,7 +355,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
355 wq = &(*cur_qp)->sq; 355 wq = &(*cur_qp)->sq;
356 wqe_ctr = be16_to_cpu(cqe->wqe_index); 356 wqe_ctr = be16_to_cpu(cqe->wqe_index);
357 wq->tail += (u16) (wqe_ctr - (u16) wq->tail); 357 wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
358 wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)]; 358 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
359 ++wq->tail; 359 ++wq->tail;
360 } else if ((*cur_qp)->ibqp.srq) { 360 } else if ((*cur_qp)->ibqp.srq) {
361 srq = to_msrq((*cur_qp)->ibqp.srq); 361 srq = to_msrq((*cur_qp)->ibqp.srq);
@@ -364,7 +364,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
364 mlx4_ib_free_srq_wqe(srq, wqe_ctr); 364 mlx4_ib_free_srq_wqe(srq, wqe_ctr);
365 } else { 365 } else {
366 wq = &(*cur_qp)->rq; 366 wq = &(*cur_qp)->rq;
367 wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)]; 367 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
368 ++wq->tail; 368 ++wq->tail;
369 } 369 }
370 370
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 93dac71f323..24ccadd6e4f 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -95,7 +95,8 @@ struct mlx4_ib_mr {
95struct mlx4_ib_wq { 95struct mlx4_ib_wq {
96 u64 *wrid; 96 u64 *wrid;
97 spinlock_t lock; 97 spinlock_t lock;
98 int max; 98 int wqe_cnt;
99 int max_post;
99 int max_gs; 100 int max_gs;
100 int offset; 101 int offset;
101 int wqe_shift; 102 int wqe_shift;
@@ -113,6 +114,7 @@ struct mlx4_ib_qp {
113 114
114 u32 doorbell_qpn; 115 u32 doorbell_qpn;
115 __be32 sq_signal_bits; 116 __be32 sq_signal_bits;
117 int sq_spare_wqes;
116 struct mlx4_ib_wq sq; 118 struct mlx4_ib_wq sq;
117 119
118 struct ib_umem *umem; 120 struct ib_umem *umem;
@@ -123,6 +125,7 @@ struct mlx4_ib_qp {
123 u8 alt_port; 125 u8 alt_port;
124 u8 atomic_rd_en; 126 u8 atomic_rd_en;
125 u8 resp_depth; 127 u8 resp_depth;
128 u8 sq_no_prefetch;
126 u8 state; 129 u8 state;
127}; 130};
128 131
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4c15fa3426b..8fabe0da323 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -109,6 +109,20 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
109 return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); 109 return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
110} 110}
111 111
112/*
113 * Stamp a SQ WQE so that it is invalid if prefetched by marking the
114 * first four bytes of every 64 byte chunk with 0xffffffff, except for
115 * the very first chunk of the WQE.
116 */
117static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
118{
119 u32 *wqe = get_send_wqe(qp, n);
120 int i;
121
122 for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16)
123 wqe[i] = 0xffffffff;
124}
125
112static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) 126static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
113{ 127{
114 struct ib_event event; 128 struct ib_event event;
@@ -201,18 +215,18 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
201 if (cap->max_recv_wr) 215 if (cap->max_recv_wr)
202 return -EINVAL; 216 return -EINVAL;
203 217
204 qp->rq.max = qp->rq.max_gs = 0; 218 qp->rq.wqe_cnt = qp->rq.max_gs = 0;
205 } else { 219 } else {
206 /* HW requires >= 1 RQ entry with >= 1 gather entry */ 220 /* HW requires >= 1 RQ entry with >= 1 gather entry */
207 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) 221 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
208 return -EINVAL; 222 return -EINVAL;
209 223
210 qp->rq.max = roundup_pow_of_two(max(1U, cap->max_recv_wr)); 224 qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr));
211 qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); 225 qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
212 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); 226 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
213 } 227 }
214 228
215 cap->max_recv_wr = qp->rq.max; 229 cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt;
216 cap->max_recv_sge = qp->rq.max_gs; 230 cap->max_recv_sge = qp->rq.max_gs;
217 231
218 return 0; 232 return 0;
@@ -236,8 +250,6 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
236 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 250 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
237 return -EINVAL; 251 return -EINVAL;
238 252
239 qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 1;
240
241 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * 253 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
242 sizeof (struct mlx4_wqe_data_seg), 254 sizeof (struct mlx4_wqe_data_seg),
243 cap->max_inline_data + 255 cap->max_inline_data +
@@ -246,18 +258,25 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
246 qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) / 258 qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /
247 sizeof (struct mlx4_wqe_data_seg); 259 sizeof (struct mlx4_wqe_data_seg);
248 260
249 qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) + 261 /*
250 (qp->sq.max << qp->sq.wqe_shift); 262 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
263 * allow HW to prefetch.
264 */
265 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
266 qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes);
267
268 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
269 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
251 if (qp->rq.wqe_shift > qp->sq.wqe_shift) { 270 if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
252 qp->rq.offset = 0; 271 qp->rq.offset = 0;
253 qp->sq.offset = qp->rq.max << qp->rq.wqe_shift; 272 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
254 } else { 273 } else {
255 qp->rq.offset = qp->sq.max << qp->sq.wqe_shift; 274 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
256 qp->sq.offset = 0; 275 qp->sq.offset = 0;
257 } 276 }
258 277
259 cap->max_send_wr = qp->sq.max; 278 cap->max_send_wr = qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes;
260 cap->max_send_sge = qp->sq.max_gs; 279 cap->max_send_sge = qp->sq.max_gs;
261 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) - 280 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) -
262 sizeof (struct mlx4_wqe_inline_seg); 281 sizeof (struct mlx4_wqe_inline_seg);
263 282
@@ -267,11 +286,11 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
267static int set_user_sq_size(struct mlx4_ib_qp *qp, 286static int set_user_sq_size(struct mlx4_ib_qp *qp,
268 struct mlx4_ib_create_qp *ucmd) 287 struct mlx4_ib_create_qp *ucmd)
269{ 288{
270 qp->sq.max = 1 << ucmd->log_sq_bb_count; 289 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
271 qp->sq.wqe_shift = ucmd->log_sq_stride; 290 qp->sq.wqe_shift = ucmd->log_sq_stride;
272 291
273 qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) + 292 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
274 (qp->sq.max << qp->sq.wqe_shift); 293 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
275 294
276 return 0; 295 return 0;
277} 296}
@@ -307,6 +326,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
307 goto err; 326 goto err;
308 } 327 }
309 328
329 qp->sq_no_prefetch = ucmd.sq_no_prefetch;
330
310 err = set_user_sq_size(qp, &ucmd); 331 err = set_user_sq_size(qp, &ucmd);
311 if (err) 332 if (err)
312 goto err; 333 goto err;
@@ -334,6 +355,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
334 goto err_mtt; 355 goto err_mtt;
335 } 356 }
336 } else { 357 } else {
358 qp->sq_no_prefetch = 0;
359
337 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); 360 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
338 if (err) 361 if (err)
339 goto err; 362 goto err;
@@ -360,8 +383,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
360 if (err) 383 if (err)
361 goto err_mtt; 384 goto err_mtt;
362 385
363 qp->sq.wrid = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL); 386 qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
364 qp->rq.wrid = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL); 387 qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
365 388
366 if (!qp->sq.wrid || !qp->rq.wrid) { 389 if (!qp->sq.wrid || !qp->rq.wrid) {
367 err = -ENOMEM; 390 err = -ENOMEM;
@@ -743,14 +766,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
743 context->mtu_msgmax = (attr->path_mtu << 5) | 31; 766 context->mtu_msgmax = (attr->path_mtu << 5) | 31;
744 } 767 }
745 768
746 if (qp->rq.max) 769 if (qp->rq.wqe_cnt)
747 context->rq_size_stride = ilog2(qp->rq.max) << 3; 770 context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
748 context->rq_size_stride |= qp->rq.wqe_shift - 4; 771 context->rq_size_stride |= qp->rq.wqe_shift - 4;
749 772
750 if (qp->sq.max) 773 if (qp->sq.wqe_cnt)
751 context->sq_size_stride = ilog2(qp->sq.max) << 3; 774 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
752 context->sq_size_stride |= qp->sq.wqe_shift - 4; 775 context->sq_size_stride |= qp->sq.wqe_shift - 4;
753 776
777 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
778 context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
779
754 if (qp->ibqp.uobject) 780 if (qp->ibqp.uobject)
755 context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); 781 context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
756 else 782 else
@@ -884,16 +910,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
884 910
885 /* 911 /*
886 * Before passing a kernel QP to the HW, make sure that the 912 * Before passing a kernel QP to the HW, make sure that the
887 * ownership bits of the send queue are set so that the 913 * ownership bits of the send queue are set and the SQ
888 * hardware doesn't start processing stale work requests. 914 * headroom is stamped so that the hardware doesn't start
915 * processing stale work requests.
889 */ 916 */
890 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 917 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
891 struct mlx4_wqe_ctrl_seg *ctrl; 918 struct mlx4_wqe_ctrl_seg *ctrl;
892 int i; 919 int i;
893 920
894 for (i = 0; i < qp->sq.max; ++i) { 921 for (i = 0; i < qp->sq.wqe_cnt; ++i) {
895 ctrl = get_send_wqe(qp, i); 922 ctrl = get_send_wqe(qp, i);
896 ctrl->owner_opcode = cpu_to_be32(1 << 31); 923 ctrl->owner_opcode = cpu_to_be32(1 << 31);
924
925 stamp_send_wqe(qp, i);
897 } 926 }
898 } 927 }
899 928
@@ -1124,7 +1153,7 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
1124 struct mlx4_ib_cq *cq; 1153 struct mlx4_ib_cq *cq;
1125 1154
1126 cur = wq->head - wq->tail; 1155 cur = wq->head - wq->tail;
1127 if (likely(cur + nreq < wq->max)) 1156 if (likely(cur + nreq < wq->max_post))
1128 return 0; 1157 return 0;
1129 1158
1130 cq = to_mcq(ib_cq); 1159 cq = to_mcq(ib_cq);
@@ -1132,7 +1161,7 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
1132 cur = wq->head - wq->tail; 1161 cur = wq->head - wq->tail;
1133 spin_unlock(&cq->lock); 1162 spin_unlock(&cq->lock);
1134 1163
1135 return cur + nreq >= wq->max; 1164 return cur + nreq >= wq->max_post;
1136} 1165}
1137 1166
1138int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1167int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
@@ -1165,8 +1194,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1165 goto out; 1194 goto out;
1166 } 1195 }
1167 1196
1168 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1)); 1197 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
1169 qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id; 1198 qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
1170 1199
1171 ctrl->srcrb_flags = 1200 ctrl->srcrb_flags =
1172 (wr->send_flags & IB_SEND_SIGNALED ? 1201 (wr->send_flags & IB_SEND_SIGNALED ?
@@ -1301,7 +1330,16 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1301 } 1330 }
1302 1331
1303 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | 1332 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
1304 (ind & qp->sq.max ? cpu_to_be32(1 << 31) : 0); 1333 (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
1334
1335 /*
1336 * We can improve latency by not stamping the last
1337 * send queue WQE until after ringing the doorbell, so
1338 * only stamp here if there are still more WQEs to post.
1339 */
1340 if (wr->next)
1341 stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &
1342 (qp->sq.wqe_cnt - 1));
1305 1343
1306 ++ind; 1344 ++ind;
1307 } 1345 }
@@ -1324,6 +1362,9 @@ out:
1324 * and reach the HCA out of order. 1362 * and reach the HCA out of order.
1325 */ 1363 */
1326 mmiowb(); 1364 mmiowb();
1365
1366 stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &
1367 (qp->sq.wqe_cnt - 1));
1327 } 1368 }
1328 1369
1329 spin_unlock_irqrestore(&qp->rq.lock, flags); 1370 spin_unlock_irqrestore(&qp->rq.lock, flags);
@@ -1344,7 +1385,7 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1344 1385
1345 spin_lock_irqsave(&qp->rq.lock, flags); 1386 spin_lock_irqsave(&qp->rq.lock, flags);
1346 1387
1347 ind = qp->rq.head & (qp->rq.max - 1); 1388 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
1348 1389
1349 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1390 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1350 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) { 1391 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {
@@ -1375,7 +1416,7 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1375 1416
1376 qp->rq.wrid[ind] = wr->wr_id; 1417 qp->rq.wrid[ind] = wr->wr_id;
1377 1418
1378 ind = (ind + 1) & (qp->rq.max - 1); 1419 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
1379 } 1420 }
1380 1421
1381out: 1422out:
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index 88c72d56368..e2d11be4525 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -39,7 +39,7 @@
39 * Increment this value if any changes that break userspace ABI 39 * Increment this value if any changes that break userspace ABI
40 * compatibility are made. 40 * compatibility are made.
41 */ 41 */
42#define MLX4_IB_UVERBS_ABI_VERSION 2 42#define MLX4_IB_UVERBS_ABI_VERSION 3
43 43
44/* 44/*
45 * Make sure that all structs defined in this file remain laid out so 45 * Make sure that all structs defined in this file remain laid out so
@@ -87,9 +87,10 @@ struct mlx4_ib_create_srq_resp {
87struct mlx4_ib_create_qp { 87struct mlx4_ib_create_qp {
88 __u64 buf_addr; 88 __u64 buf_addr;
89 __u64 db_addr; 89 __u64 db_addr;
90 __u8 log_sq_bb_count; 90 __u8 log_sq_bb_count;
91 __u8 log_sq_stride; 91 __u8 log_sq_stride;
92 __u8 reserved[6]; 92 __u8 sq_no_prefetch;
93 __u8 reserved[5];
93}; 94};
94 95
95#endif /* MLX4_IB_USER_H */ 96#endif /* MLX4_IB_USER_H */