aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2013-08-06 11:34:35 -0400
committerRoland Dreier <roland@purestorage.com>2013-08-13 14:55:45 -0400
commit1cf24dcef4e1dd0c34d8c39b09a9ce9a01accc72 (patch)
tree31fc9b6019990693eac87713c15a97b5571932ee
parent97d7ec0c410e89ece852e768b8bfd42d4d5822fd (diff)
RDMA/cxgb4: Fix QP flush logic
This patch makes following fixes in QP flush logic: - correctly flushes unsignaled WRs followed by a signaled WR - supports for flushing a CQ bound to multiple QPs - resets cidx_flush if a active queue starts getting HW CQEs again - marks WQ in error when we leave RTS. This was only being done for user queues, but we need it for kernel queues too so that post_send/post_recv will start returning the appropriate error synchronously - eats unsignaled read resp CQEs. HW always inserts CQEs so we must silently discard them if the read work request was unsignaled. - handles QP flushes with pending SW CQEs. The flush and out of order completion logic has a bug where if out of order completions are flushed but not yet polled by the consumer and the qp is then flushed then we end up inserting duplicate completions. - c4iw_flush_sq() should only flush wrs that have not already been flushed. Since we already track where in the SQ we've flushed via sq.cidx_flush, just start at that point and flush any remaining. This bug only caused a problem in the presence of unsignaled work requests. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Vipul Pandya <vipul@chelsio.com> [ Fixed sparse warning due to htonl/ntohl confusion. - Roland ] Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c326
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h5
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c34
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h25
4 files changed, 254 insertions, 136 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 0f1607c8325a..6657390de956 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -225,43 +225,186 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
225 t4_swcq_produce(cq); 225 t4_swcq_produce(cq);
226} 226}
227 227
228int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count) 228static void advance_oldest_read(struct t4_wq *wq);
229
230int c4iw_flush_sq(struct c4iw_qp *qhp)
229{ 231{
230 int flushed = 0; 232 int flushed = 0;
231 struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count]; 233 struct t4_wq *wq = &qhp->wq;
232 int in_use = wq->sq.in_use - count; 234 struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
233 235 struct t4_cq *cq = &chp->cq;
234 BUG_ON(in_use < 0); 236 int idx;
235 while (in_use--) { 237 struct t4_swsqe *swsqe;
236 swsqe->signaled = 0; 238 int error = (qhp->attr.state != C4IW_QP_STATE_CLOSING &&
237 insert_sq_cqe(wq, cq, swsqe); 239 qhp->attr.state != C4IW_QP_STATE_IDLE);
238 swsqe++; 240
239 if (swsqe == (wq->sq.sw_sq + wq->sq.size)) 241 if (wq->sq.flush_cidx == -1)
240 swsqe = wq->sq.sw_sq; 242 wq->sq.flush_cidx = wq->sq.cidx;
241 flushed++; 243 idx = wq->sq.flush_cidx;
244 BUG_ON(idx >= wq->sq.size);
245 while (idx != wq->sq.pidx) {
246 if (error) {
247 swsqe = &wq->sq.sw_sq[idx];
248 BUG_ON(swsqe->flushed);
249 swsqe->flushed = 1;
250 insert_sq_cqe(wq, cq, swsqe);
251 if (wq->sq.oldest_read == swsqe) {
252 BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
253 advance_oldest_read(wq);
254 }
255 flushed++;
256 } else {
257 t4_sq_consume(wq);
258 }
259 if (++idx == wq->sq.size)
260 idx = 0;
242 } 261 }
262 wq->sq.flush_cidx += flushed;
263 if (wq->sq.flush_cidx >= wq->sq.size)
264 wq->sq.flush_cidx -= wq->sq.size;
243 return flushed; 265 return flushed;
244} 266}
245 267
268static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
269{
270 struct t4_swsqe *swsqe;
271 int cidx;
272
273 if (wq->sq.flush_cidx == -1)
274 wq->sq.flush_cidx = wq->sq.cidx;
275 cidx = wq->sq.flush_cidx;
276 BUG_ON(cidx > wq->sq.size);
277
278 while (cidx != wq->sq.pidx) {
279 swsqe = &wq->sq.sw_sq[cidx];
280 if (!swsqe->signaled) {
281 if (++cidx == wq->sq.size)
282 cidx = 0;
283 } else if (swsqe->complete) {
284
285 BUG_ON(swsqe->flushed);
286
287 /*
288 * Insert this completed cqe into the swcq.
289 */
290 PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
291 __func__, cidx, cq->sw_pidx);
292 swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
293 cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
294 t4_swcq_produce(cq);
295 swsqe->flushed = 1;
296 if (++cidx == wq->sq.size)
297 cidx = 0;
298 wq->sq.flush_cidx = cidx;
299 } else
300 break;
301 }
302}
303
304static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
305 struct t4_cqe *read_cqe)
306{
307 read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
308 read_cqe->len = htonl(wq->sq.oldest_read->read_len);
309 read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
310 V_CQE_SWCQE(SW_CQE(hw_cqe)) |
311 V_CQE_OPCODE(FW_RI_READ_REQ) |
312 V_CQE_TYPE(1));
313 read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
314}
315
316static void advance_oldest_read(struct t4_wq *wq)
317{
318
319 u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
320
321 if (rptr == wq->sq.size)
322 rptr = 0;
323 while (rptr != wq->sq.pidx) {
324 wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
325
326 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
327 return;
328 if (++rptr == wq->sq.size)
329 rptr = 0;
330 }
331 wq->sq.oldest_read = NULL;
332}
333
246/* 334/*
247 * Move all CQEs from the HWCQ into the SWCQ. 335 * Move all CQEs from the HWCQ into the SWCQ.
336 * Deal with out-of-order and/or completions that complete
337 * prior unsignalled WRs.
248 */ 338 */
249void c4iw_flush_hw_cq(struct t4_cq *cq) 339void c4iw_flush_hw_cq(struct c4iw_cq *chp)
250{ 340{
251 struct t4_cqe *cqe = NULL, *swcqe; 341 struct t4_cqe *hw_cqe, *swcqe, read_cqe;
342 struct c4iw_qp *qhp;
343 struct t4_swsqe *swsqe;
252 int ret; 344 int ret;
253 345
254 PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); 346 PDBG("%s cqid 0x%x\n", __func__, chp->cq.cqid);
255 ret = t4_next_hw_cqe(cq, &cqe); 347 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
348
349 /*
350 * This logic is similar to poll_cq(), but not quite the same
351 * unfortunately. Need to move pertinent HW CQEs to the SW CQ but
352 * also do any translation magic that poll_cq() normally does.
353 */
256 while (!ret) { 354 while (!ret) {
257 PDBG("%s flushing hwcq cidx 0x%x swcq pidx 0x%x\n", 355 qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
258 __func__, cq->cidx, cq->sw_pidx); 356
259 swcqe = &cq->sw_queue[cq->sw_pidx]; 357 /*
260 *swcqe = *cqe; 358 * drop CQEs with no associated QP
261 swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); 359 */
262 t4_swcq_produce(cq); 360 if (qhp == NULL)
263 t4_hwcq_consume(cq); 361 goto next_cqe;
264 ret = t4_next_hw_cqe(cq, &cqe); 362
363 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
364 goto next_cqe;
365
366 if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
367
368 /*
369 * drop peer2peer RTR reads.
370 */
371 if (CQE_WRID_STAG(hw_cqe) == 1)
372 goto next_cqe;
373
374 /*
375 * Eat completions for unsignaled read WRs.
376 */
377 if (!qhp->wq.sq.oldest_read->signaled) {
378 advance_oldest_read(&qhp->wq);
379 goto next_cqe;
380 }
381
382 /*
383 * Don't write to the HWCQ, create a new read req CQE
384 * in local memory and move it into the swcq.
385 */
386 create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
387 hw_cqe = &read_cqe;
388 advance_oldest_read(&qhp->wq);
389 }
390
391 /* if its a SQ completion, then do the magic to move all the
392 * unsignaled and now in-order completions into the swcq.
393 */
394 if (SQ_TYPE(hw_cqe)) {
395 swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
396 swsqe->cqe = *hw_cqe;
397 swsqe->complete = 1;
398 flush_completed_wrs(&qhp->wq, &chp->cq);
399 } else {
400 swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
401 *swcqe = *hw_cqe;
402 swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
403 t4_swcq_produce(&chp->cq);
404 }
405next_cqe:
406 t4_hwcq_consume(&chp->cq);
407 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
265 } 408 }
266} 409}
267 410
@@ -281,25 +424,6 @@ static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
281 return 1; 424 return 1;
282} 425}
283 426
284void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
285{
286 struct t4_cqe *cqe;
287 u32 ptr;
288
289 *count = 0;
290 ptr = cq->sw_cidx;
291 while (ptr != cq->sw_pidx) {
292 cqe = &cq->sw_queue[ptr];
293 if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) &&
294 wq->sq.oldest_read)) &&
295 (CQE_QPID(cqe) == wq->sq.qid))
296 (*count)++;
297 if (++ptr == cq->size)
298 ptr = 0;
299 }
300 PDBG("%s cq %p count %d\n", __func__, cq, *count);
301}
302
303void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) 427void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
304{ 428{
305 struct t4_cqe *cqe; 429 struct t4_cqe *cqe;
@@ -319,70 +443,6 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
319 PDBG("%s cq %p count %d\n", __func__, cq, *count); 443 PDBG("%s cq %p count %d\n", __func__, cq, *count);
320} 444}
321 445
322static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
323{
324 struct t4_swsqe *swsqe;
325 u16 ptr = wq->sq.cidx;
326 int count = wq->sq.in_use;
327 int unsignaled = 0;
328
329 swsqe = &wq->sq.sw_sq[ptr];
330 while (count--)
331 if (!swsqe->signaled) {
332 if (++ptr == wq->sq.size)
333 ptr = 0;
334 swsqe = &wq->sq.sw_sq[ptr];
335 unsignaled++;
336 } else if (swsqe->complete) {
337
338 /*
339 * Insert this completed cqe into the swcq.
340 */
341 PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
342 __func__, ptr, cq->sw_pidx);
343 swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
344 cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
345 t4_swcq_produce(cq);
346 swsqe->signaled = 0;
347 wq->sq.in_use -= unsignaled;
348 break;
349 } else
350 break;
351}
352
353static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
354 struct t4_cqe *read_cqe)
355{
356 read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
357 read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len);
358 read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
359 V_CQE_SWCQE(SW_CQE(hw_cqe)) |
360 V_CQE_OPCODE(FW_RI_READ_REQ) |
361 V_CQE_TYPE(1));
362 read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
363}
364
365/*
366 * Return a ptr to the next read wr in the SWSQ or NULL.
367 */
368static void advance_oldest_read(struct t4_wq *wq)
369{
370
371 u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
372
373 if (rptr == wq->sq.size)
374 rptr = 0;
375 while (rptr != wq->sq.pidx) {
376 wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
377
378 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
379 return;
380 if (++rptr == wq->sq.size)
381 rptr = 0;
382 }
383 wq->sq.oldest_read = NULL;
384}
385
386/* 446/*
387 * poll_cq 447 * poll_cq
388 * 448 *
@@ -427,6 +487,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
427 } 487 }
428 488
429 /* 489 /*
490 * skip hw cqe's if the wq is flushed.
491 */
492 if (wq->flushed && !SW_CQE(hw_cqe)) {
493 ret = -EAGAIN;
494 goto skip_cqe;
495 }
496
497 /*
498 * skip TERMINATE cqes...
499 */
500 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
501 ret = -EAGAIN;
502 goto skip_cqe;
503 }
504
505 /*
430 * Gotta tweak READ completions: 506 * Gotta tweak READ completions:
431 * 1) the cqe doesn't contain the sq_wptr from the wr. 507 * 1) the cqe doesn't contain the sq_wptr from the wr.
432 * 2) opcode not reflected from the wr. 508 * 2) opcode not reflected from the wr.
@@ -440,7 +516,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
440 * was generated by the kernel driver as part of peer-2-peer 516 * was generated by the kernel driver as part of peer-2-peer
441 * connection setup. So ignore the completion. 517 * connection setup. So ignore the completion.
442 */ 518 */
443 if (!wq->sq.oldest_read) { 519 if (CQE_WRID_STAG(hw_cqe) == 1) {
444 if (CQE_STATUS(hw_cqe)) 520 if (CQE_STATUS(hw_cqe))
445 t4_set_wq_in_error(wq); 521 t4_set_wq_in_error(wq);
446 ret = -EAGAIN; 522 ret = -EAGAIN;
@@ -448,6 +524,15 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
448 } 524 }
449 525
450 /* 526 /*
527 * Eat completions for unsignaled read WRs.
528 */
529 if (!wq->sq.oldest_read->signaled) {
530 advance_oldest_read(wq);
531 ret = -EAGAIN;
532 goto skip_cqe;
533 }
534
535 /*
451 * Don't write to the HWCQ, so create a new read req CQE 536 * Don't write to the HWCQ, so create a new read req CQE
452 * in local memory. 537 * in local memory.
453 */ 538 */
@@ -457,14 +542,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
457 } 542 }
458 543
459 if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { 544 if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
460 *cqe_flushed = t4_wq_in_error(wq); 545 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
461 t4_set_wq_in_error(wq); 546 t4_set_wq_in_error(wq);
462 goto proc_cqe;
463 }
464
465 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
466 ret = -EAGAIN;
467 goto skip_cqe;
468 } 547 }
469 548
470 /* 549 /*
@@ -523,7 +602,21 @@ proc_cqe:
523 * completion. 602 * completion.
524 */ 603 */
525 if (SQ_TYPE(hw_cqe)) { 604 if (SQ_TYPE(hw_cqe)) {
526 wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe); 605 int idx = CQE_WRID_SQ_IDX(hw_cqe);
606 BUG_ON(idx > wq->sq.size);
607
608 /*
609 * Account for any unsignaled completions completed by
610 * this signaled completion. In this case, cidx points
611 * to the first unsignaled one, and idx points to the
612 * signaled one. So adjust in_use based on this delta.
613 * if this is not completing any unsigned wrs, then the
614 * delta will be 0.
615 */
616 wq->sq.in_use -= idx - wq->sq.cidx;
617 BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size);
618
619 wq->sq.cidx = (uint16_t)idx;
527 PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx); 620 PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
528 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; 621 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
529 t4_sq_consume(wq); 622 t4_sq_consume(wq);
@@ -532,6 +625,7 @@ proc_cqe:
532 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; 625 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
533 BUG_ON(t4_rq_empty(wq)); 626 BUG_ON(t4_rq_empty(wq));
534 t4_rq_consume(wq); 627 t4_rq_consume(wq);
628 goto skip_cqe;
535 } 629 }
536 630
537flush_wq: 631flush_wq:
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index b3cae9f4c80e..23eaeabab93b 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -917,12 +917,11 @@ void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
917u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); 917u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size);
918void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); 918void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size);
919int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb); 919int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb);
920void c4iw_flush_hw_cq(struct t4_cq *cq); 920void c4iw_flush_hw_cq(struct c4iw_cq *chp);
921void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); 921void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
922void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
923int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); 922int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
924int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count); 923int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
925int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count); 924int c4iw_flush_sq(struct c4iw_qp *qhp);
926int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid); 925int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid);
927u16 c4iw_rqes_posted(struct c4iw_qp *qhp); 926u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
928int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe); 927int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index a4975e1654a6..22d1f01dd951 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -737,6 +737,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
737 swsqe->idx = qhp->wq.sq.pidx; 737 swsqe->idx = qhp->wq.sq.pidx;
738 swsqe->complete = 0; 738 swsqe->complete = 0;
739 swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); 739 swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED);
740 swsqe->flushed = 0;
740 swsqe->wr_id = wr->wr_id; 741 swsqe->wr_id = wr->wr_id;
741 742
742 init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); 743 init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
@@ -1006,7 +1007,15 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
1006 /* locking hierarchy: cq lock first, then qp lock. */ 1007 /* locking hierarchy: cq lock first, then qp lock. */
1007 spin_lock_irqsave(&rchp->lock, flag); 1008 spin_lock_irqsave(&rchp->lock, flag);
1008 spin_lock(&qhp->lock); 1009 spin_lock(&qhp->lock);
1009 c4iw_flush_hw_cq(&rchp->cq); 1010
1011 if (qhp->wq.flushed) {
1012 spin_unlock(&qhp->lock);
1013 spin_unlock_irqrestore(&rchp->lock, flag);
1014 return;
1015 }
1016 qhp->wq.flushed = 1;
1017
1018 c4iw_flush_hw_cq(rchp);
1010 c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); 1019 c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
1011 flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); 1020 flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
1012 spin_unlock(&qhp->lock); 1021 spin_unlock(&qhp->lock);
@@ -1020,9 +1029,9 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
1020 /* locking hierarchy: cq lock first, then qp lock. */ 1029 /* locking hierarchy: cq lock first, then qp lock. */
1021 spin_lock_irqsave(&schp->lock, flag); 1030 spin_lock_irqsave(&schp->lock, flag);
1022 spin_lock(&qhp->lock); 1031 spin_lock(&qhp->lock);
1023 c4iw_flush_hw_cq(&schp->cq); 1032 if (schp != rchp)
1024 c4iw_count_scqes(&schp->cq, &qhp->wq, &count); 1033 c4iw_flush_hw_cq(schp);
1025 flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); 1034 flushed = c4iw_flush_sq(qhp);
1026 spin_unlock(&qhp->lock); 1035 spin_unlock(&qhp->lock);
1027 spin_unlock_irqrestore(&schp->lock, flag); 1036 spin_unlock_irqrestore(&schp->lock, flag);
1028 if (flushed) { 1037 if (flushed) {
@@ -1037,11 +1046,11 @@ static void flush_qp(struct c4iw_qp *qhp)
1037 struct c4iw_cq *rchp, *schp; 1046 struct c4iw_cq *rchp, *schp;
1038 unsigned long flag; 1047 unsigned long flag;
1039 1048
1040 rchp = get_chp(qhp->rhp, qhp->attr.rcq); 1049 rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
1041 schp = get_chp(qhp->rhp, qhp->attr.scq); 1050 schp = to_c4iw_cq(qhp->ibqp.send_cq);
1042 1051
1052 t4_set_wq_in_error(&qhp->wq);
1043 if (qhp->ibqp.uobject) { 1053 if (qhp->ibqp.uobject) {
1044 t4_set_wq_in_error(&qhp->wq);
1045 t4_set_cq_in_error(&rchp->cq); 1054 t4_set_cq_in_error(&rchp->cq);
1046 spin_lock_irqsave(&rchp->comp_handler_lock, flag); 1055 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1047 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); 1056 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
@@ -1330,8 +1339,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1330 disconnect = 1; 1339 disconnect = 1;
1331 c4iw_get_ep(&qhp->ep->com); 1340 c4iw_get_ep(&qhp->ep->com);
1332 } 1341 }
1333 if (qhp->ibqp.uobject) 1342 t4_set_wq_in_error(&qhp->wq);
1334 t4_set_wq_in_error(&qhp->wq);
1335 ret = rdma_fini(rhp, qhp, ep); 1343 ret = rdma_fini(rhp, qhp, ep);
1336 if (ret) 1344 if (ret)
1337 goto err; 1345 goto err;
@@ -1340,8 +1348,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1340 set_state(qhp, C4IW_QP_STATE_TERMINATE); 1348 set_state(qhp, C4IW_QP_STATE_TERMINATE);
1341 qhp->attr.layer_etype = attrs->layer_etype; 1349 qhp->attr.layer_etype = attrs->layer_etype;
1342 qhp->attr.ecode = attrs->ecode; 1350 qhp->attr.ecode = attrs->ecode;
1343 if (qhp->ibqp.uobject) 1351 t4_set_wq_in_error(&qhp->wq);
1344 t4_set_wq_in_error(&qhp->wq);
1345 ep = qhp->ep; 1352 ep = qhp->ep;
1346 if (!internal) 1353 if (!internal)
1347 terminate = 1; 1354 terminate = 1;
@@ -1350,8 +1357,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1350 break; 1357 break;
1351 case C4IW_QP_STATE_ERROR: 1358 case C4IW_QP_STATE_ERROR:
1352 set_state(qhp, C4IW_QP_STATE_ERROR); 1359 set_state(qhp, C4IW_QP_STATE_ERROR);
1353 if (qhp->ibqp.uobject) 1360 t4_set_wq_in_error(&qhp->wq);
1354 t4_set_wq_in_error(&qhp->wq);
1355 if (!internal) { 1361 if (!internal) {
1356 abort = 1; 1362 abort = 1;
1357 disconnect = 1; 1363 disconnect = 1;
@@ -1552,12 +1558,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1552 1558
1553 ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; 1559 ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
1554 1560
1555
1556 qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); 1561 qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
1557 if (!qhp) 1562 if (!qhp)
1558 return ERR_PTR(-ENOMEM); 1563 return ERR_PTR(-ENOMEM);
1559 qhp->wq.sq.size = sqsize; 1564 qhp->wq.sq.size = sqsize;
1560 qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue; 1565 qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue;
1566 qhp->wq.sq.flush_cidx = -1;
1561 qhp->wq.rq.size = rqsize; 1567 qhp->wq.rq.size = rqsize;
1562 qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue; 1568 qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue;
1563 1569
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index ebcb03bd1b72..3a6a289b9d3e 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -36,9 +36,9 @@
36#include "t4_msg.h" 36#include "t4_msg.h"
37#include "t4fw_ri_api.h" 37#include "t4fw_ri_api.h"
38 38
39#define T4_MAX_NUM_QP (1<<16) 39#define T4_MAX_NUM_QP 65536
40#define T4_MAX_NUM_CQ (1<<15) 40#define T4_MAX_NUM_CQ 65536
41#define T4_MAX_NUM_PD (1<<15) 41#define T4_MAX_NUM_PD 65536
42#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) 42#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1)
43#define T4_MAX_EQ_SIZE (65520 - T4_EQ_STATUS_ENTRIES) 43#define T4_MAX_EQ_SIZE (65520 - T4_EQ_STATUS_ENTRIES)
44#define T4_MAX_IQ_SIZE (65520 - 1) 44#define T4_MAX_IQ_SIZE (65520 - 1)
@@ -269,6 +269,7 @@ struct t4_swsqe {
269 int complete; 269 int complete;
270 int signaled; 270 int signaled;
271 u16 idx; 271 u16 idx;
272 int flushed;
272}; 273};
273 274
274static inline pgprot_t t4_pgprot_wc(pgprot_t prot) 275static inline pgprot_t t4_pgprot_wc(pgprot_t prot)
@@ -300,6 +301,7 @@ struct t4_sq {
300 u16 pidx; 301 u16 pidx;
301 u16 wq_pidx; 302 u16 wq_pidx;
302 u16 flags; 303 u16 flags;
304 short flush_cidx;
303}; 305};
304 306
305struct t4_swrqe { 307struct t4_swrqe {
@@ -330,6 +332,7 @@ struct t4_wq {
330 void __iomem *db; 332 void __iomem *db;
331 void __iomem *gts; 333 void __iomem *gts;
332 struct c4iw_rdev *rdev; 334 struct c4iw_rdev *rdev;
335 int flushed;
333}; 336};
334 337
335static inline int t4_rqes_posted(struct t4_wq *wq) 338static inline int t4_rqes_posted(struct t4_wq *wq)
@@ -412,6 +415,9 @@ static inline void t4_sq_produce(struct t4_wq *wq, u8 len16)
412 415
413static inline void t4_sq_consume(struct t4_wq *wq) 416static inline void t4_sq_consume(struct t4_wq *wq)
414{ 417{
418 BUG_ON(wq->sq.in_use < 1);
419 if (wq->sq.cidx == wq->sq.flush_cidx)
420 wq->sq.flush_cidx = -1;
415 wq->sq.in_use--; 421 wq->sq.in_use--;
416 if (++wq->sq.cidx == wq->sq.size) 422 if (++wq->sq.cidx == wq->sq.size)
417 wq->sq.cidx = 0; 423 wq->sq.cidx = 0;
@@ -505,12 +511,18 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se)
505static inline void t4_swcq_produce(struct t4_cq *cq) 511static inline void t4_swcq_produce(struct t4_cq *cq)
506{ 512{
507 cq->sw_in_use++; 513 cq->sw_in_use++;
514 if (cq->sw_in_use == cq->size) {
515 PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
516 cq->error = 1;
517 BUG_ON(1);
518 }
508 if (++cq->sw_pidx == cq->size) 519 if (++cq->sw_pidx == cq->size)
509 cq->sw_pidx = 0; 520 cq->sw_pidx = 0;
510} 521}
511 522
512static inline void t4_swcq_consume(struct t4_cq *cq) 523static inline void t4_swcq_consume(struct t4_cq *cq)
513{ 524{
525 BUG_ON(cq->sw_in_use < 1);
514 cq->sw_in_use--; 526 cq->sw_in_use--;
515 if (++cq->sw_cidx == cq->size) 527 if (++cq->sw_cidx == cq->size)
516 cq->sw_cidx = 0; 528 cq->sw_cidx = 0;
@@ -552,6 +564,7 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
552 ret = -EOVERFLOW; 564 ret = -EOVERFLOW;
553 cq->error = 1; 565 cq->error = 1;
554 printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid); 566 printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
567 BUG_ON(1);
555 } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { 568 } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
556 *cqe = &cq->queue[cq->cidx]; 569 *cqe = &cq->queue[cq->cidx];
557 ret = 0; 570 ret = 0;
@@ -562,6 +575,12 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
562 575
563static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq) 576static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq)
564{ 577{
578 if (cq->sw_in_use == cq->size) {
579 PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
580 cq->error = 1;
581 BUG_ON(1);
582 return NULL;
583 }
565 if (cq->sw_in_use) 584 if (cq->sw_in_use)
566 return &cq->sw_queue[cq->sw_cidx]; 585 return &cq->sw_queue[cq->sw_cidx];
567 return NULL; 586 return NULL;