aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRalph Campbell <ralph.campbell@qlogic.com>2008-05-13 14:41:29 -0400
committerRoland Dreier <rolandd@cisco.com>2008-05-13 14:41:29 -0400
commite509be898d8937634437caa474b57ac12795e5bc (patch)
treef99b0e3965ee0eaf89acccdba382689719881a29
parent53dc1ca194c062aa9771e194047f27ec1ca592df (diff)
IB/ipath: Fix many locking issues when switching to error state
The send DMA hardware queue voided a number of prior assumptions about when a send is complete which led to completions being generated out of order. There were also a number of locking issues when switching the QP to the error or reset states, and we implement the IB_QPS_SQD state. Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c183
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c151
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c168
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c57
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c66
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c174
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h57
8 files changed, 554 insertions, 304 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 6f98632877eb..4715911101e4 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
242{ 242{
243 struct ipath_qp *q, **qpp; 243 struct ipath_qp *q, **qpp;
244 unsigned long flags; 244 unsigned long flags;
245 int fnd = 0;
246 245
247 spin_lock_irqsave(&qpt->lock, flags); 246 spin_lock_irqsave(&qpt->lock, flags);
248 247
@@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
253 *qpp = qp->next; 252 *qpp = qp->next;
254 qp->next = NULL; 253 qp->next = NULL;
255 atomic_dec(&qp->refcount); 254 atomic_dec(&qp->refcount);
256 fnd = 1;
257 break; 255 break;
258 } 256 }
259 } 257 }
260 258
261 spin_unlock_irqrestore(&qpt->lock, flags); 259 spin_unlock_irqrestore(&qpt->lock, flags);
262
263 if (!fnd)
264 return;
265
266 free_qpn(qpt, qp->ibqp.qp_num);
267
268 wait_event(qp->wait, !atomic_read(&qp->refcount));
269} 260}
270 261
271/** 262/**
272 * ipath_free_all_qps - remove all QPs from the table 263 * ipath_free_all_qps - check for QPs still in use
273 * @qpt: the QP table to empty 264 * @qpt: the QP table to empty
265 *
266 * There should not be any QPs still in use.
267 * Free memory for table.
274 */ 268 */
275void ipath_free_all_qps(struct ipath_qp_table *qpt) 269unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
276{ 270{
277 unsigned long flags; 271 unsigned long flags;
278 struct ipath_qp *qp, *nqp; 272 struct ipath_qp *qp;
279 u32 n; 273 u32 n, qp_inuse = 0;
280 274
275 spin_lock_irqsave(&qpt->lock, flags);
281 for (n = 0; n < qpt->max; n++) { 276 for (n = 0; n < qpt->max; n++) {
282 spin_lock_irqsave(&qpt->lock, flags);
283 qp = qpt->table[n]; 277 qp = qpt->table[n];
284 qpt->table[n] = NULL; 278 qpt->table[n] = NULL;
285 spin_unlock_irqrestore(&qpt->lock, flags); 279
286 280 for (; qp; qp = qp->next)
287 while (qp) { 281 qp_inuse++;
288 nqp = qp->next;
289 free_qpn(qpt, qp->ibqp.qp_num);
290 if (!atomic_dec_and_test(&qp->refcount) ||
291 !ipath_destroy_qp(&qp->ibqp))
292 ipath_dbg("QP memory leak!\n");
293 qp = nqp;
294 }
295 } 282 }
283 spin_unlock_irqrestore(&qpt->lock, flags);
296 284
297 for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { 285 for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
298 if (qpt->map[n].page) 286 if (qpt->map[n].page)
299 free_page((unsigned long)qpt->map[n].page); 287 free_page((unsigned long) qpt->map[n].page);
300 } 288 return qp_inuse;
301} 289}
302 290
303/** 291/**
@@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
336 qp->remote_qpn = 0; 324 qp->remote_qpn = 0;
337 qp->qkey = 0; 325 qp->qkey = 0;
338 qp->qp_access_flags = 0; 326 qp->qp_access_flags = 0;
339 qp->s_busy = 0; 327 atomic_set(&qp->s_dma_busy, 0);
340 qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; 328 qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
341 qp->s_hdrwords = 0; 329 qp->s_hdrwords = 0;
342 qp->s_wqe = NULL; 330 qp->s_wqe = NULL;
343 qp->s_pkt_delay = 0; 331 qp->s_pkt_delay = 0;
332 qp->s_draining = 0;
344 qp->s_psn = 0; 333 qp->s_psn = 0;
345 qp->r_psn = 0; 334 qp->r_psn = 0;
346 qp->r_msn = 0; 335 qp->r_msn = 0;
@@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
353 } 342 }
354 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 343 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
355 qp->r_nak_state = 0; 344 qp->r_nak_state = 0;
356 qp->r_wrid_valid = 0; 345 qp->r_aflags = 0;
346 qp->r_flags = 0;
357 qp->s_rnr_timeout = 0; 347 qp->s_rnr_timeout = 0;
358 qp->s_head = 0; 348 qp->s_head = 0;
359 qp->s_tail = 0; 349 qp->s_tail = 0;
@@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
361 qp->s_last = 0; 351 qp->s_last = 0;
362 qp->s_ssn = 1; 352 qp->s_ssn = 1;
363 qp->s_lsn = 0; 353 qp->s_lsn = 0;
364 qp->s_wait_credit = 0;
365 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); 354 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
366 qp->r_head_ack_queue = 0; 355 qp->r_head_ack_queue = 0;
367 qp->s_tail_ack_queue = 0; 356 qp->s_tail_ack_queue = 0;
@@ -370,7 +359,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
370 qp->r_rq.wq->head = 0; 359 qp->r_rq.wq->head = 0;
371 qp->r_rq.wq->tail = 0; 360 qp->r_rq.wq->tail = 0;
372 } 361 }
373 qp->r_reuse_sge = 0;
374} 362}
375 363
376/** 364/**
@@ -402,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
402 list_del_init(&qp->piowait); 390 list_del_init(&qp->piowait);
403 spin_unlock(&dev->pending_lock); 391 spin_unlock(&dev->pending_lock);
404 392
405 wc.vendor_err = 0; 393 /* Schedule the sending tasklet to drain the send work queue. */
406 wc.byte_len = 0; 394 if (qp->s_last != qp->s_head)
407 wc.imm_data = 0; 395 ipath_schedule_send(qp);
396
397 memset(&wc, 0, sizeof(wc));
408 wc.qp = &qp->ibqp; 398 wc.qp = &qp->ibqp;
409 wc.src_qp = 0; 399 wc.opcode = IB_WC_RECV;
410 wc.wc_flags = 0; 400
411 wc.pkey_index = 0; 401 if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
412 wc.slid = 0;
413 wc.sl = 0;
414 wc.dlid_path_bits = 0;
415 wc.port_num = 0;
416 if (qp->r_wrid_valid) {
417 qp->r_wrid_valid = 0;
418 wc.wr_id = qp->r_wr_id; 402 wc.wr_id = qp->r_wr_id;
419 wc.opcode = IB_WC_RECV;
420 wc.status = err; 403 wc.status = err;
421 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 404 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
422 } 405 }
423 wc.status = IB_WC_WR_FLUSH_ERR; 406 wc.status = IB_WC_WR_FLUSH_ERR;
424 407
425 while (qp->s_last != qp->s_head) {
426 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
427
428 wc.wr_id = wqe->wr.wr_id;
429 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
430 if (++qp->s_last >= qp->s_size)
431 qp->s_last = 0;
432 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
433 }
434 qp->s_cur = qp->s_tail = qp->s_head;
435 qp->s_hdrwords = 0;
436 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
437
438 if (qp->r_rq.wq) { 408 if (qp->r_rq.wq) {
439 struct ipath_rwq *wq; 409 struct ipath_rwq *wq;
440 u32 head; 410 u32 head;
@@ -450,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
450 tail = wq->tail; 420 tail = wq->tail;
451 if (tail >= qp->r_rq.size) 421 if (tail >= qp->r_rq.size)
452 tail = 0; 422 tail = 0;
453 wc.opcode = IB_WC_RECV;
454 while (tail != head) { 423 while (tail != head) {
455 wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; 424 wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
456 if (++tail >= qp->r_rq.size) 425 if (++tail >= qp->r_rq.size)
@@ -482,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
482 struct ipath_ibdev *dev = to_idev(ibqp->device); 451 struct ipath_ibdev *dev = to_idev(ibqp->device);
483 struct ipath_qp *qp = to_iqp(ibqp); 452 struct ipath_qp *qp = to_iqp(ibqp);
484 enum ib_qp_state cur_state, new_state; 453 enum ib_qp_state cur_state, new_state;
485 unsigned long flags;
486 int lastwqe = 0; 454 int lastwqe = 0;
487 int ret; 455 int ret;
488 456
489 spin_lock_irqsave(&qp->s_lock, flags); 457 spin_lock_irq(&qp->s_lock);
490 458
491 cur_state = attr_mask & IB_QP_CUR_STATE ? 459 cur_state = attr_mask & IB_QP_CUR_STATE ?
492 attr->cur_qp_state : qp->state; 460 attr->cur_qp_state : qp->state;
@@ -539,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
539 507
540 switch (new_state) { 508 switch (new_state) {
541 case IB_QPS_RESET: 509 case IB_QPS_RESET:
510 if (qp->state != IB_QPS_RESET) {
511 qp->state = IB_QPS_RESET;
512 spin_lock(&dev->pending_lock);
513 if (!list_empty(&qp->timerwait))
514 list_del_init(&qp->timerwait);
515 if (!list_empty(&qp->piowait))
516 list_del_init(&qp->piowait);
517 spin_unlock(&dev->pending_lock);
518 qp->s_flags &= ~IPATH_S_ANY_WAIT;
519 spin_unlock_irq(&qp->s_lock);
520 /* Stop the sending tasklet */
521 tasklet_kill(&qp->s_task);
522 wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
523 spin_lock_irq(&qp->s_lock);
524 }
542 ipath_reset_qp(qp, ibqp->qp_type); 525 ipath_reset_qp(qp, ibqp->qp_type);
543 break; 526 break;
544 527
528 case IB_QPS_SQD:
529 qp->s_draining = qp->s_last != qp->s_cur;
530 qp->state = new_state;
531 break;
532
533 case IB_QPS_SQE:
534 if (qp->ibqp.qp_type == IB_QPT_RC)
535 goto inval;
536 qp->state = new_state;
537 break;
538
545 case IB_QPS_ERR: 539 case IB_QPS_ERR:
546 lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); 540 lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
547 break; 541 break;
548 542
549 default: 543 default:
544 qp->state = new_state;
550 break; 545 break;
551
552 } 546 }
553 547
554 if (attr_mask & IB_QP_PKEY_INDEX) 548 if (attr_mask & IB_QP_PKEY_INDEX)
@@ -601,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
601 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) 595 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
602 qp->s_max_rd_atomic = attr->max_rd_atomic; 596 qp->s_max_rd_atomic = attr->max_rd_atomic;
603 597
604 qp->state = new_state; 598 spin_unlock_irq(&qp->s_lock);
605 spin_unlock_irqrestore(&qp->s_lock, flags);
606 599
607 if (lastwqe) { 600 if (lastwqe) {
608 struct ib_event ev; 601 struct ib_event ev;
@@ -616,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
616 goto bail; 609 goto bail;
617 610
618inval: 611inval:
619 spin_unlock_irqrestore(&qp->s_lock, flags); 612 spin_unlock_irq(&qp->s_lock);
620 ret = -EINVAL; 613 ret = -EINVAL;
621 614
622bail: 615bail:
@@ -647,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
647 attr->pkey_index = qp->s_pkey_index; 640 attr->pkey_index = qp->s_pkey_index;
648 attr->alt_pkey_index = 0; 641 attr->alt_pkey_index = 0;
649 attr->en_sqd_async_notify = 0; 642 attr->en_sqd_async_notify = 0;
650 attr->sq_draining = 0; 643 attr->sq_draining = qp->s_draining;
651 attr->max_rd_atomic = qp->s_max_rd_atomic; 644 attr->max_rd_atomic = qp->s_max_rd_atomic;
652 attr->max_dest_rd_atomic = qp->r_max_rd_atomic; 645 attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
653 attr->min_rnr_timer = qp->r_min_rnr_timer; 646 attr->min_rnr_timer = qp->r_min_rnr_timer;
@@ -837,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
837 spin_lock_init(&qp->r_rq.lock); 830 spin_lock_init(&qp->r_rq.lock);
838 atomic_set(&qp->refcount, 0); 831 atomic_set(&qp->refcount, 0);
839 init_waitqueue_head(&qp->wait); 832 init_waitqueue_head(&qp->wait);
833 init_waitqueue_head(&qp->wait_dma);
840 tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); 834 tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
841 INIT_LIST_HEAD(&qp->piowait); 835 INIT_LIST_HEAD(&qp->piowait);
842 INIT_LIST_HEAD(&qp->timerwait); 836 INIT_LIST_HEAD(&qp->timerwait);
@@ -930,6 +924,7 @@ bail_ip:
930 else 924 else
931 vfree(qp->r_rq.wq); 925 vfree(qp->r_rq.wq);
932 ipath_free_qp(&dev->qp_table, qp); 926 ipath_free_qp(&dev->qp_table, qp);
927 free_qpn(&dev->qp_table, qp->ibqp.qp_num);
933bail_qp: 928bail_qp:
934 kfree(qp); 929 kfree(qp);
935bail_swq: 930bail_swq:
@@ -951,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
951{ 946{
952 struct ipath_qp *qp = to_iqp(ibqp); 947 struct ipath_qp *qp = to_iqp(ibqp);
953 struct ipath_ibdev *dev = to_idev(ibqp->device); 948 struct ipath_ibdev *dev = to_idev(ibqp->device);
954 unsigned long flags;
955 949
956 spin_lock_irqsave(&qp->s_lock, flags); 950 /* Make sure HW and driver activity is stopped. */
957 qp->state = IB_QPS_ERR; 951 spin_lock_irq(&qp->s_lock);
958 spin_unlock_irqrestore(&qp->s_lock, flags); 952 if (qp->state != IB_QPS_RESET) {
959 spin_lock(&dev->n_qps_lock); 953 qp->state = IB_QPS_RESET;
960 dev->n_qps_allocated--; 954 spin_lock(&dev->pending_lock);
961 spin_unlock(&dev->n_qps_lock); 955 if (!list_empty(&qp->timerwait))
956 list_del_init(&qp->timerwait);
957 if (!list_empty(&qp->piowait))
958 list_del_init(&qp->piowait);
959 spin_unlock(&dev->pending_lock);
960 qp->s_flags &= ~IPATH_S_ANY_WAIT;
961 spin_unlock_irq(&qp->s_lock);
962 /* Stop the sending tasklet */
963 tasklet_kill(&qp->s_task);
964 wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
965 } else
966 spin_unlock_irq(&qp->s_lock);
962 967
963 /* Stop the sending tasklet. */ 968 ipath_free_qp(&dev->qp_table, qp);
964 tasklet_kill(&qp->s_task);
965 969
966 if (qp->s_tx) { 970 if (qp->s_tx) {
967 atomic_dec(&qp->refcount); 971 atomic_dec(&qp->refcount);
968 if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) 972 if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
969 kfree(qp->s_tx->txreq.map_addr); 973 kfree(qp->s_tx->txreq.map_addr);
974 spin_lock_irq(&dev->pending_lock);
975 list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
976 spin_unlock_irq(&dev->pending_lock);
977 qp->s_tx = NULL;
970 } 978 }
971 979
972 /* Make sure the QP isn't on the timeout list. */ 980 wait_event(qp->wait, !atomic_read(&qp->refcount));
973 spin_lock_irqsave(&dev->pending_lock, flags);
974 if (!list_empty(&qp->timerwait))
975 list_del_init(&qp->timerwait);
976 if (!list_empty(&qp->piowait))
977 list_del_init(&qp->piowait);
978 if (qp->s_tx)
979 list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
980 spin_unlock_irqrestore(&dev->pending_lock, flags);
981 981
982 /* 982 /* all user's cleaned up, mark it available */
983 * Make sure that the QP is not in the QPN table so receive 983 free_qpn(&dev->qp_table, qp->ibqp.qp_num);
984 * interrupts will discard packets for this QP. XXX Also remove QP 984 spin_lock(&dev->n_qps_lock);
985 * from multicast table. 985 dev->n_qps_allocated--;
986 */ 986 spin_unlock(&dev->n_qps_lock);
987 if (atomic_read(&qp->refcount) != 0)
988 ipath_free_qp(&dev->qp_table, qp);
989 987
990 if (qp->ip) 988 if (qp->ip)
991 kref_put(&qp->ip->ref, ipath_release_mmap_info); 989 kref_put(&qp->ip->ref, ipath_release_mmap_info);
@@ -1055,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
1055 } 1053 }
1056 1054
1057 /* Restart sending if it was blocked due to lack of credits. */ 1055 /* Restart sending if it was blocked due to lack of credits. */
1058 if (qp->s_cur != qp->s_head && 1056 if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
1057 qp->s_cur != qp->s_head &&
1059 (qp->s_lsn == (u32) -1 || 1058 (qp->s_lsn == (u32) -1 ||
1060 ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, 1059 ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
1061 qp->s_lsn + 1) <= 0)) 1060 qp->s_lsn + 1) <= 0))
1062 tasklet_hi_schedule(&qp->s_task); 1061 ipath_schedule_send(qp);
1063} 1062}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index b4b26c3aa613..5b5276a270bc 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
92 u32 bth0; 92 u32 bth0;
93 u32 bth2; 93 u32 bth2;
94 94
95 /* Don't send an ACK if we aren't supposed to. */
96 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
97 goto bail;
98
95 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 99 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
96 hwords = 5; 100 hwords = 5;
97 101
@@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp)
238 ipath_make_rc_ack(dev, qp, ohdr, pmtu)) 242 ipath_make_rc_ack(dev, qp, ohdr, pmtu))
239 goto done; 243 goto done;
240 244
241 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || 245 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
242 qp->s_rnr_timeout || qp->s_wait_credit) 246 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
243 goto bail; 247 goto bail;
248 /* We are in the error state, flush the work request. */
249 if (qp->s_last == qp->s_head)
250 goto bail;
251 /* If DMAs are in progress, we can't flush immediately. */
252 if (atomic_read(&qp->s_dma_busy)) {
253 qp->s_flags |= IPATH_S_WAIT_DMA;
254 goto bail;
255 }
256 wqe = get_swqe_ptr(qp, qp->s_last);
257 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
258 goto done;
259 }
244 260
245 /* Limit the number of packets sent without an ACK. */ 261 /* Leave BUSY set until RNR timeout. */
246 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { 262 if (qp->s_rnr_timeout) {
247 qp->s_wait_credit = 1; 263 qp->s_flags |= IPATH_S_WAITING;
248 dev->n_rc_stalls++;
249 goto bail; 264 goto bail;
250 } 265 }
251 266
@@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
257 wqe = get_swqe_ptr(qp, qp->s_cur); 272 wqe = get_swqe_ptr(qp, qp->s_cur);
258 switch (qp->s_state) { 273 switch (qp->s_state) {
259 default: 274 default:
275 if (!(ib_ipath_state_ops[qp->state] &
276 IPATH_PROCESS_NEXT_SEND_OK))
277 goto bail;
260 /* 278 /*
261 * Resend an old request or start a new one. 279 * Resend an old request or start a new one.
262 * 280 *
@@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
294 case IB_WR_SEND_WITH_IMM: 312 case IB_WR_SEND_WITH_IMM:
295 /* If no credit, return. */ 313 /* If no credit, return. */
296 if (qp->s_lsn != (u32) -1 && 314 if (qp->s_lsn != (u32) -1 &&
297 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 315 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
316 qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
298 goto bail; 317 goto bail;
318 }
299 wqe->lpsn = wqe->psn; 319 wqe->lpsn = wqe->psn;
300 if (len > pmtu) { 320 if (len > pmtu) {
301 wqe->lpsn += (len - 1) / pmtu; 321 wqe->lpsn += (len - 1) / pmtu;
@@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
325 case IB_WR_RDMA_WRITE_WITH_IMM: 345 case IB_WR_RDMA_WRITE_WITH_IMM:
326 /* If no credit, return. */ 346 /* If no credit, return. */
327 if (qp->s_lsn != (u32) -1 && 347 if (qp->s_lsn != (u32) -1 &&
328 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 348 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
349 qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
329 goto bail; 350 goto bail;
351 }
330 ohdr->u.rc.reth.vaddr = 352 ohdr->u.rc.reth.vaddr =
331 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 353 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
332 ohdr->u.rc.reth.rkey = 354 ohdr->u.rc.reth.rkey =
@@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp)
570 ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); 592 ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
571done: 593done:
572 ret = 1; 594 ret = 1;
595 goto unlock;
596
573bail: 597bail:
598 qp->s_flags &= ~IPATH_S_BUSY;
599unlock:
574 spin_unlock_irqrestore(&qp->s_lock, flags); 600 spin_unlock_irqrestore(&qp->s_lock, flags);
575 return ret; 601 return ret;
576} 602}
@@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp)
606 632
607 spin_unlock_irqrestore(&qp->s_lock, flags); 633 spin_unlock_irqrestore(&qp->s_lock, flags);
608 634
635 /* Don't try to send ACKs if the link isn't ACTIVE */
609 dd = dev->dd; 636 dd = dev->dd;
637 if (!(dd->ipath_flags & IPATH_LINKACTIVE))
638 goto done;
639
610 piobuf = ipath_getpiobuf(dd, 0, NULL); 640 piobuf = ipath_getpiobuf(dd, 0, NULL);
611 if (!piobuf) { 641 if (!piobuf) {
612 /* 642 /*
@@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp)
668 goto done; 698 goto done;
669 699
670queue_ack: 700queue_ack:
671 dev->n_rc_qacks++; 701 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
672 qp->s_flags |= IPATH_S_ACK_PENDING; 702 dev->n_rc_qacks++;
673 qp->s_nak_state = qp->r_nak_state; 703 qp->s_flags |= IPATH_S_ACK_PENDING;
674 qp->s_ack_psn = qp->r_ack_psn; 704 qp->s_nak_state = qp->r_nak_state;
705 qp->s_ack_psn = qp->r_ack_psn;
706
707 /* Schedule the send tasklet. */
708 ipath_schedule_send(qp);
709 }
675 spin_unlock_irqrestore(&qp->s_lock, flags); 710 spin_unlock_irqrestore(&qp->s_lock, flags);
676
677 /* Call ipath_do_rc_send() in another thread. */
678 tasklet_hi_schedule(&qp->s_task);
679
680done: 711done:
681 return; 712 return;
682} 713}
@@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)
735 /* 766 /*
736 * Set the state to restart in the middle of a request. 767 * Set the state to restart in the middle of a request.
737 * Don't change the s_sge, s_cur_sge, or s_cur_size. 768 * Don't change the s_sge, s_cur_sge, or s_cur_size.
738 * See ipath_do_rc_send(). 769 * See ipath_make_rc_req().
739 */ 770 */
740 switch (opcode) { 771 switch (opcode) {
741 case IB_WR_SEND: 772 case IB_WR_SEND:
@@ -801,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
801 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; 832 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
802 833
803 reset_psn(qp, psn); 834 reset_psn(qp, psn);
804 tasklet_hi_schedule(&qp->s_task); 835 ipath_schedule_send(qp);
805 836
806bail: 837bail:
807 return; 838 return;
@@ -809,13 +840,7 @@ bail:
809 840
810static inline void update_last_psn(struct ipath_qp *qp, u32 psn) 841static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
811{ 842{
812 if (qp->s_last_psn != psn) { 843 qp->s_last_psn = psn;
813 qp->s_last_psn = psn;
814 if (qp->s_wait_credit) {
815 qp->s_wait_credit = 0;
816 tasklet_hi_schedule(&qp->s_task);
817 }
818 }
819} 844}
820 845
821/** 846/**
@@ -915,14 +940,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
915 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { 940 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
916 qp->s_num_rd_atomic--; 941 qp->s_num_rd_atomic--;
917 /* Restart sending task if fence is complete */ 942 /* Restart sending task if fence is complete */
918 if ((qp->s_flags & IPATH_S_FENCE_PENDING) && 943 if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
919 !qp->s_num_rd_atomic) { 944 !qp->s_num_rd_atomic) ||
920 qp->s_flags &= ~IPATH_S_FENCE_PENDING; 945 qp->s_flags & IPATH_S_RDMAR_PENDING)
921 tasklet_hi_schedule(&qp->s_task); 946 ipath_schedule_send(qp);
922 } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
923 qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
924 tasklet_hi_schedule(&qp->s_task);
925 }
926 } 947 }
927 /* Post a send completion queue entry if requested. */ 948 /* Post a send completion queue entry if requested. */
928 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || 949 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
@@ -956,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
956 } else { 977 } else {
957 if (++qp->s_last >= qp->s_size) 978 if (++qp->s_last >= qp->s_size)
958 qp->s_last = 0; 979 qp->s_last = 0;
980 if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
981 qp->s_draining = 0;
959 if (qp->s_last == qp->s_tail) 982 if (qp->s_last == qp->s_tail)
960 break; 983 break;
961 wqe = get_swqe_ptr(qp, qp->s_last); 984 wqe = get_swqe_ptr(qp, qp->s_last);
@@ -979,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
979 */ 1002 */
980 if (ipath_cmp24(qp->s_psn, psn) <= 0) { 1003 if (ipath_cmp24(qp->s_psn, psn) <= 0) {
981 reset_psn(qp, psn + 1); 1004 reset_psn(qp, psn + 1);
982 tasklet_hi_schedule(&qp->s_task); 1005 ipath_schedule_send(qp);
983 } 1006 }
984 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { 1007 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
985 qp->s_state = OP(SEND_LAST); 1008 qp->s_state = OP(SEND_LAST);
@@ -1018,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
1018 ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & 1041 ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
1019 IPATH_AETH_CREDIT_MASK]; 1042 IPATH_AETH_CREDIT_MASK];
1020 ipath_insert_rnr_queue(qp); 1043 ipath_insert_rnr_queue(qp);
1044 ipath_schedule_send(qp);
1021 goto bail; 1045 goto bail;
1022 1046
1023 case 3: /* NAK */ 1047 case 3: /* NAK */
@@ -1108,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1108 1132
1109 spin_lock_irqsave(&qp->s_lock, flags); 1133 spin_lock_irqsave(&qp->s_lock, flags);
1110 1134
1135 /* Double check we can process this now that we hold the s_lock. */
1136 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1137 goto ack_done;
1138
1111 /* Ignore invalid responses. */ 1139 /* Ignore invalid responses. */
1112 if (ipath_cmp24(psn, qp->s_next_psn) >= 0) 1140 if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
1113 goto ack_done; 1141 goto ack_done;
@@ -1343,7 +1371,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1343 psn &= IPATH_PSN_MASK; 1371 psn &= IPATH_PSN_MASK;
1344 e = NULL; 1372 e = NULL;
1345 old_req = 1; 1373 old_req = 1;
1374
1346 spin_lock_irqsave(&qp->s_lock, flags); 1375 spin_lock_irqsave(&qp->s_lock, flags);
1376 /* Double check we can process this now that we hold the s_lock. */
1377 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1378 goto unlock_done;
1379
1347 for (i = qp->r_head_ack_queue; ; i = prev) { 1380 for (i = qp->r_head_ack_queue; ; i = prev) {
1348 if (i == qp->s_tail_ack_queue) 1381 if (i == qp->s_tail_ack_queue)
1349 old_req = 0; 1382 old_req = 0;
@@ -1471,7 +1504,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1471 break; 1504 break;
1472 } 1505 }
1473 qp->r_nak_state = 0; 1506 qp->r_nak_state = 0;
1474 tasklet_hi_schedule(&qp->s_task); 1507 ipath_schedule_send(qp);
1475 1508
1476unlock_done: 1509unlock_done:
1477 spin_unlock_irqrestore(&qp->s_lock, flags); 1510 spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1503,18 +1536,15 @@ void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
1503 1536
1504static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) 1537static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
1505{ 1538{
1506 unsigned long flags;
1507 unsigned next; 1539 unsigned next;
1508 1540
1509 next = n + 1; 1541 next = n + 1;
1510 if (next > IPATH_MAX_RDMA_ATOMIC) 1542 if (next > IPATH_MAX_RDMA_ATOMIC)
1511 next = 0; 1543 next = 0;
1512 spin_lock_irqsave(&qp->s_lock, flags);
1513 if (n == qp->s_tail_ack_queue) { 1544 if (n == qp->s_tail_ack_queue) {
1514 qp->s_tail_ack_queue = next; 1545 qp->s_tail_ack_queue = next;
1515 qp->s_ack_state = OP(ACKNOWLEDGE); 1546 qp->s_ack_state = OP(ACKNOWLEDGE);
1516 } 1547 }
1517 spin_unlock_irqrestore(&qp->s_lock, flags);
1518} 1548}
1519 1549
1520/** 1550/**
@@ -1543,6 +1573,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1543 int diff; 1573 int diff;
1544 struct ib_reth *reth; 1574 struct ib_reth *reth;
1545 int header_in_data; 1575 int header_in_data;
1576 unsigned long flags;
1546 1577
1547 /* Validate the SLID. See Ch. 9.6.1.5 */ 1578 /* Validate the SLID. See Ch. 9.6.1.5 */
1548 if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) 1579 if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
@@ -1690,9 +1721,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1690 goto nack_inv; 1721 goto nack_inv;
1691 ipath_copy_sge(&qp->r_sge, data, tlen); 1722 ipath_copy_sge(&qp->r_sge, data, tlen);
1692 qp->r_msn++; 1723 qp->r_msn++;
1693 if (!qp->r_wrid_valid) 1724 if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
1694 break; 1725 break;
1695 qp->r_wrid_valid = 0;
1696 wc.wr_id = qp->r_wr_id; 1726 wc.wr_id = qp->r_wr_id;
1697 wc.status = IB_WC_SUCCESS; 1727 wc.status = IB_WC_SUCCESS;
1698 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || 1728 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
@@ -1764,9 +1794,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1764 next = qp->r_head_ack_queue + 1; 1794 next = qp->r_head_ack_queue + 1;
1765 if (next > IPATH_MAX_RDMA_ATOMIC) 1795 if (next > IPATH_MAX_RDMA_ATOMIC)
1766 next = 0; 1796 next = 0;
1797 spin_lock_irqsave(&qp->s_lock, flags);
1798 /* Double check we can process this while holding the s_lock. */
1799 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1800 goto unlock;
1767 if (unlikely(next == qp->s_tail_ack_queue)) { 1801 if (unlikely(next == qp->s_tail_ack_queue)) {
1768 if (!qp->s_ack_queue[next].sent) 1802 if (!qp->s_ack_queue[next].sent)
1769 goto nack_inv; 1803 goto nack_inv_unlck;
1770 ipath_update_ack_queue(qp, next); 1804 ipath_update_ack_queue(qp, next);
1771 } 1805 }
1772 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 1806 e = &qp->s_ack_queue[qp->r_head_ack_queue];
@@ -1787,7 +1821,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1787 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, 1821 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
1788 rkey, IB_ACCESS_REMOTE_READ); 1822 rkey, IB_ACCESS_REMOTE_READ);
1789 if (unlikely(!ok)) 1823 if (unlikely(!ok))
1790 goto nack_acc; 1824 goto nack_acc_unlck;
1791 /* 1825 /*
1792 * Update the next expected PSN. We add 1 later 1826 * Update the next expected PSN. We add 1 later
1793 * below, so only add the remainder here. 1827 * below, so only add the remainder here.
@@ -1814,13 +1848,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1814 qp->r_psn++; 1848 qp->r_psn++;
1815 qp->r_state = opcode; 1849 qp->r_state = opcode;
1816 qp->r_nak_state = 0; 1850 qp->r_nak_state = 0;
1817 barrier();
1818 qp->r_head_ack_queue = next; 1851 qp->r_head_ack_queue = next;
1819 1852
1820 /* Call ipath_do_rc_send() in another thread. */ 1853 /* Schedule the send tasklet. */
1821 tasklet_hi_schedule(&qp->s_task); 1854 ipath_schedule_send(qp);
1822 1855
1823 goto done; 1856 goto unlock;
1824 } 1857 }
1825 1858
1826 case OP(COMPARE_SWAP): 1859 case OP(COMPARE_SWAP):
@@ -1839,9 +1872,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1839 next = qp->r_head_ack_queue + 1; 1872 next = qp->r_head_ack_queue + 1;
1840 if (next > IPATH_MAX_RDMA_ATOMIC) 1873 if (next > IPATH_MAX_RDMA_ATOMIC)
1841 next = 0; 1874 next = 0;
1875 spin_lock_irqsave(&qp->s_lock, flags);
1876 /* Double check we can process this while holding the s_lock. */
1877 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1878 goto unlock;
1842 if (unlikely(next == qp->s_tail_ack_queue)) { 1879 if (unlikely(next == qp->s_tail_ack_queue)) {
1843 if (!qp->s_ack_queue[next].sent) 1880 if (!qp->s_ack_queue[next].sent)
1844 goto nack_inv; 1881 goto nack_inv_unlck;
1845 ipath_update_ack_queue(qp, next); 1882 ipath_update_ack_queue(qp, next);
1846 } 1883 }
1847 if (!header_in_data) 1884 if (!header_in_data)
@@ -1851,13 +1888,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1851 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | 1888 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
1852 be32_to_cpu(ateth->vaddr[1]); 1889 be32_to_cpu(ateth->vaddr[1]);
1853 if (unlikely(vaddr & (sizeof(u64) - 1))) 1890 if (unlikely(vaddr & (sizeof(u64) - 1)))
1854 goto nack_inv; 1891 goto nack_inv_unlck;
1855 rkey = be32_to_cpu(ateth->rkey); 1892 rkey = be32_to_cpu(ateth->rkey);
1856 /* Check rkey & NAK */ 1893 /* Check rkey & NAK */
1857 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, 1894 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
1858 sizeof(u64), vaddr, rkey, 1895 sizeof(u64), vaddr, rkey,
1859 IB_ACCESS_REMOTE_ATOMIC))) 1896 IB_ACCESS_REMOTE_ATOMIC)))
1860 goto nack_acc; 1897 goto nack_acc_unlck;
1861 /* Perform atomic OP and save result. */ 1898 /* Perform atomic OP and save result. */
1862 maddr = (atomic64_t *) qp->r_sge.sge.vaddr; 1899 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
1863 sdata = be64_to_cpu(ateth->swap_data); 1900 sdata = be64_to_cpu(ateth->swap_data);
@@ -1874,13 +1911,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1874 qp->r_psn++; 1911 qp->r_psn++;
1875 qp->r_state = opcode; 1912 qp->r_state = opcode;
1876 qp->r_nak_state = 0; 1913 qp->r_nak_state = 0;
1877 barrier();
1878 qp->r_head_ack_queue = next; 1914 qp->r_head_ack_queue = next;
1879 1915
1880 /* Call ipath_do_rc_send() in another thread. */ 1916 /* Schedule the send tasklet. */
1881 tasklet_hi_schedule(&qp->s_task); 1917 ipath_schedule_send(qp);
1882 1918
1883 goto done; 1919 goto unlock;
1884 } 1920 }
1885 1921
1886 default: 1922 default:
@@ -1901,19 +1937,26 @@ rnr_nak:
1901 qp->r_ack_psn = qp->r_psn; 1937 qp->r_ack_psn = qp->r_psn;
1902 goto send_ack; 1938 goto send_ack;
1903 1939
1940nack_inv_unlck:
1941 spin_unlock_irqrestore(&qp->s_lock, flags);
1904nack_inv: 1942nack_inv:
1905 ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); 1943 ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
1906 qp->r_nak_state = IB_NAK_INVALID_REQUEST; 1944 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1907 qp->r_ack_psn = qp->r_psn; 1945 qp->r_ack_psn = qp->r_psn;
1908 goto send_ack; 1946 goto send_ack;
1909 1947
1948nack_acc_unlck:
1949 spin_unlock_irqrestore(&qp->s_lock, flags);
1910nack_acc: 1950nack_acc:
1911 ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); 1951 ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
1912 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; 1952 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1913 qp->r_ack_psn = qp->r_psn; 1953 qp->r_ack_psn = qp->r_psn;
1914send_ack: 1954send_ack:
1915 send_rc_ack(qp); 1955 send_rc_ack(qp);
1956 goto done;
1916 1957
1958unlock:
1959 spin_unlock_irqrestore(&qp->s_lock, flags);
1917done: 1960done:
1918 return; 1961 return;
1919} 1962}
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index c716a03dd399..a4b5521567fe 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = {
78 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device 78 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
79 * @qp: the QP 79 * @qp: the QP
80 * 80 *
81 * Called with the QP s_lock held and interrupts disabled.
81 * XXX Use a simple list for now. We might need a priority 82 * XXX Use a simple list for now. We might need a priority
82 * queue if we have lots of QPs waiting for RNR timeouts 83 * queue if we have lots of QPs waiting for RNR timeouts
83 * but that should be rare. 84 * but that should be rare.
@@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = {
85void ipath_insert_rnr_queue(struct ipath_qp *qp) 86void ipath_insert_rnr_queue(struct ipath_qp *qp)
86{ 87{
87 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 88 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
88 unsigned long flags;
89 89
90 spin_lock_irqsave(&dev->pending_lock, flags); 90 /* We already did a spin_lock_irqsave(), so just use spin_lock */
91 spin_lock(&dev->pending_lock);
91 if (list_empty(&dev->rnrwait)) 92 if (list_empty(&dev->rnrwait))
92 list_add(&qp->timerwait, &dev->rnrwait); 93 list_add(&qp->timerwait, &dev->rnrwait);
93 else { 94 else {
@@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
109 nqp->s_rnr_timeout -= qp->s_rnr_timeout; 110 nqp->s_rnr_timeout -= qp->s_rnr_timeout;
110 list_add(&qp->timerwait, l); 111 list_add(&qp->timerwait, l);
111 } 112 }
112 spin_unlock_irqrestore(&dev->pending_lock, flags); 113 spin_unlock(&dev->pending_lock);
113} 114}
114 115
115/** 116/**
@@ -185,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
185 } 186 }
186 187
187 spin_lock_irqsave(&rq->lock, flags); 188 spin_lock_irqsave(&rq->lock, flags);
189 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
190 ret = 0;
191 goto unlock;
192 }
193
188 wq = rq->wq; 194 wq = rq->wq;
189 tail = wq->tail; 195 tail = wq->tail;
190 /* Validate tail before using it since it is user writable. */ 196 /* Validate tail before using it since it is user writable. */
@@ -192,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
192 tail = 0; 198 tail = 0;
193 do { 199 do {
194 if (unlikely(tail == wq->head)) { 200 if (unlikely(tail == wq->head)) {
195 spin_unlock_irqrestore(&rq->lock, flags);
196 ret = 0; 201 ret = 0;
197 goto bail; 202 goto unlock;
198 } 203 }
199 /* Make sure entry is read after head index is read. */ 204 /* Make sure entry is read after head index is read. */
200 smp_rmb(); 205 smp_rmb();
@@ -207,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
207 wq->tail = tail; 212 wq->tail = tail;
208 213
209 ret = 1; 214 ret = 1;
210 qp->r_wrid_valid = 1; 215 set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
211 if (handler) { 216 if (handler) {
212 u32 n; 217 u32 n;
213 218
@@ -234,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
234 goto bail; 239 goto bail;
235 } 240 }
236 } 241 }
242unlock:
237 spin_unlock_irqrestore(&rq->lock, flags); 243 spin_unlock_irqrestore(&rq->lock, flags);
238
239bail: 244bail:
240 return ret; 245 return ret;
241} 246}
@@ -263,35 +268,59 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
263 atomic64_t *maddr; 268 atomic64_t *maddr;
264 enum ib_wc_status send_status; 269 enum ib_wc_status send_status;
265 270
271 /*
272 * Note that we check the responder QP state after
273 * checking the requester's state.
274 */
266 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); 275 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
267 if (!qp) {
268 dev->n_pkt_drops++;
269 return;
270 }
271 276
272again:
273 spin_lock_irqsave(&sqp->s_lock, flags); 277 spin_lock_irqsave(&sqp->s_lock, flags);
274 278
275 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || 279 /* Return if we are already busy processing a work request. */
276 sqp->s_rnr_timeout) { 280 if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
277 spin_unlock_irqrestore(&sqp->s_lock, flags); 281 !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
278 goto done; 282 goto unlock;
279 }
280 283
281 /* Get the next send request. */ 284 sqp->s_flags |= IPATH_S_BUSY;
282 if (sqp->s_last == sqp->s_head) { 285
283 /* Send work queue is empty. */ 286again:
284 spin_unlock_irqrestore(&sqp->s_lock, flags); 287 if (sqp->s_last == sqp->s_head)
285 goto done; 288 goto clr_busy;
289 wqe = get_swqe_ptr(sqp, sqp->s_last);
290
291 /* Return if it is not OK to start a new work reqeust. */
292 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
293 if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
294 goto clr_busy;
295 /* We are in the error state, flush the work request. */
296 send_status = IB_WC_WR_FLUSH_ERR;
297 goto flush_send;
286 } 298 }
287 299
288 /* 300 /*
289 * We can rely on the entry not changing without the s_lock 301 * We can rely on the entry not changing without the s_lock
290 * being held until we update s_last. 302 * being held until we update s_last.
303 * We increment s_cur to indicate s_last is in progress.
291 */ 304 */
292 wqe = get_swqe_ptr(sqp, sqp->s_last); 305 if (sqp->s_last == sqp->s_cur) {
306 if (++sqp->s_cur >= sqp->s_size)
307 sqp->s_cur = 0;
308 }
293 spin_unlock_irqrestore(&sqp->s_lock, flags); 309 spin_unlock_irqrestore(&sqp->s_lock, flags);
294 310
311 if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
312 dev->n_pkt_drops++;
313 /*
314 * For RC, the requester would timeout and retry so
315 * shortcut the timeouts and just signal too many retries.
316 */
317 if (sqp->ibqp.qp_type == IB_QPT_RC)
318 send_status = IB_WC_RETRY_EXC_ERR;
319 else
320 send_status = IB_WC_SUCCESS;
321 goto serr;
322 }
323
295 memset(&wc, 0, sizeof wc); 324 memset(&wc, 0, sizeof wc);
296 send_status = IB_WC_SUCCESS; 325 send_status = IB_WC_SUCCESS;
297 326
@@ -396,8 +425,7 @@ again:
396 sqp->s_len -= len; 425 sqp->s_len -= len;
397 } 426 }
398 427
399 if (wqe->wr.opcode == IB_WR_RDMA_WRITE || 428 if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
400 wqe->wr.opcode == IB_WR_RDMA_READ)
401 goto send_comp; 429 goto send_comp;
402 430
403 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) 431 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -417,6 +445,8 @@ again:
417 wqe->wr.send_flags & IB_SEND_SOLICITED); 445 wqe->wr.send_flags & IB_SEND_SOLICITED);
418 446
419send_comp: 447send_comp:
448 spin_lock_irqsave(&sqp->s_lock, flags);
449flush_send:
420 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 450 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
421 ipath_send_complete(sqp, wqe, send_status); 451 ipath_send_complete(sqp, wqe, send_status);
422 goto again; 452 goto again;
@@ -437,11 +467,12 @@ rnr_nak:
437 sqp->s_rnr_retry--; 467 sqp->s_rnr_retry--;
438 spin_lock_irqsave(&sqp->s_lock, flags); 468 spin_lock_irqsave(&sqp->s_lock, flags);
439 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) 469 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
440 goto unlock; 470 goto clr_busy;
471 sqp->s_flags |= IPATH_S_WAITING;
441 dev->n_rnr_naks++; 472 dev->n_rnr_naks++;
442 sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; 473 sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
443 ipath_insert_rnr_queue(sqp); 474 ipath_insert_rnr_queue(sqp);
444 goto unlock; 475 goto clr_busy;
445 476
446inv_err: 477inv_err:
447 send_status = IB_WC_REM_INV_REQ_ERR; 478 send_status = IB_WC_REM_INV_REQ_ERR;
@@ -473,17 +504,19 @@ serr:
473 } 504 }
474 goto done; 505 goto done;
475 } 506 }
507clr_busy:
508 sqp->s_flags &= ~IPATH_S_BUSY;
476unlock: 509unlock:
477 spin_unlock_irqrestore(&sqp->s_lock, flags); 510 spin_unlock_irqrestore(&sqp->s_lock, flags);
478done: 511done:
479 if (atomic_dec_and_test(&qp->refcount)) 512 if (qp && atomic_dec_and_test(&qp->refcount))
480 wake_up(&qp->wait); 513 wake_up(&qp->wait);
481} 514}
482 515
483static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) 516static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
484{ 517{
485 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || 518 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
486 qp->ibqp.qp_type == IB_QPT_SMI) { 519 qp->ibqp.qp_type == IB_QPT_SMI) {
487 unsigned long flags; 520 unsigned long flags;
488 521
489 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 522 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
501 * @dev: the device we ran out of buffers on 534 * @dev: the device we ran out of buffers on
502 * 535 *
503 * Called when we run out of PIO buffers. 536 * Called when we run out of PIO buffers.
537 * If we are now in the error state, return zero to flush the
538 * send work request.
504 */ 539 */
505static void ipath_no_bufs_available(struct ipath_qp *qp, 540static int ipath_no_bufs_available(struct ipath_qp *qp,
506 struct ipath_ibdev *dev) 541 struct ipath_ibdev *dev)
507{ 542{
508 unsigned long flags; 543 unsigned long flags;
544 int ret = 1;
509 545
510 /* 546 /*
511 * Note that as soon as want_buffer() is called and 547 * Note that as soon as want_buffer() is called and
512 * possibly before it returns, ipath_ib_piobufavail() 548 * possibly before it returns, ipath_ib_piobufavail()
513 * could be called. If we are still in the tasklet function, 549 * could be called. Therefore, put QP on the piowait list before
514 * tasklet_hi_schedule() will not call us until the next time 550 * enabling the PIO avail interrupt.
515 * tasklet_hi_schedule() is called.
516 * We leave the busy flag set so that another post send doesn't
517 * try to put the same QP on the piowait list again.
518 */ 551 */
519 spin_lock_irqsave(&dev->pending_lock, flags); 552 spin_lock_irqsave(&qp->s_lock, flags);
520 list_add_tail(&qp->piowait, &dev->piowait); 553 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
521 spin_unlock_irqrestore(&dev->pending_lock, flags); 554 dev->n_piowait++;
522 want_buffer(dev->dd, qp); 555 qp->s_flags |= IPATH_S_WAITING;
523 dev->n_piowait++; 556 qp->s_flags &= ~IPATH_S_BUSY;
557 spin_lock(&dev->pending_lock);
558 if (list_empty(&qp->piowait))
559 list_add_tail(&qp->piowait, &dev->piowait);
560 spin_unlock(&dev->pending_lock);
561 } else
562 ret = 0;
563 spin_unlock_irqrestore(&qp->s_lock, flags);
564 if (ret)
565 want_buffer(dev->dd, qp);
566 return ret;
524} 567}
525 568
526/** 569/**
@@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data)
596 struct ipath_qp *qp = (struct ipath_qp *)data; 639 struct ipath_qp *qp = (struct ipath_qp *)data;
597 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 640 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
598 int (*make_req)(struct ipath_qp *qp); 641 int (*make_req)(struct ipath_qp *qp);
599 642 unsigned long flags;
600 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
601 goto bail;
602 643
603 if ((qp->ibqp.qp_type == IB_QPT_RC || 644 if ((qp->ibqp.qp_type == IB_QPT_RC ||
604 qp->ibqp.qp_type == IB_QPT_UC) && 645 qp->ibqp.qp_type == IB_QPT_UC) &&
605 qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { 646 qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
606 ipath_ruc_loopback(qp); 647 ipath_ruc_loopback(qp);
607 goto clear; 648 goto bail;
608 } 649 }
609 650
610 if (qp->ibqp.qp_type == IB_QPT_RC) 651 if (qp->ibqp.qp_type == IB_QPT_RC)
@@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data)
614 else 655 else
615 make_req = ipath_make_ud_req; 656 make_req = ipath_make_ud_req;
616 657
658 spin_lock_irqsave(&qp->s_lock, flags);
659
660 /* Return if we are already busy processing a work request. */
661 if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
662 !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
663 spin_unlock_irqrestore(&qp->s_lock, flags);
664 goto bail;
665 }
666
667 qp->s_flags |= IPATH_S_BUSY;
668
669 spin_unlock_irqrestore(&qp->s_lock, flags);
670
617again: 671again:
618 /* Check for a constructed packet to be sent. */ 672 /* Check for a constructed packet to be sent. */
619 if (qp->s_hdrwords != 0) { 673 if (qp->s_hdrwords != 0) {
@@ -623,8 +677,8 @@ again:
623 */ 677 */
624 if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, 678 if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
625 qp->s_cur_sge, qp->s_cur_size)) { 679 qp->s_cur_sge, qp->s_cur_size)) {
626 ipath_no_bufs_available(qp, dev); 680 if (ipath_no_bufs_available(qp, dev))
627 goto bail; 681 goto bail;
628 } 682 }
629 dev->n_unicast_xmit++; 683 dev->n_unicast_xmit++;
630 /* Record that we sent the packet and s_hdr is empty. */ 684 /* Record that we sent the packet and s_hdr is empty. */
@@ -633,16 +687,20 @@ again:
633 687
634 if (make_req(qp)) 688 if (make_req(qp))
635 goto again; 689 goto again;
636clear: 690
637 clear_bit(IPATH_S_BUSY, &qp->s_busy);
638bail:; 691bail:;
639} 692}
640 693
694/*
695 * This should be called with s_lock held.
696 */
641void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, 697void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
642 enum ib_wc_status status) 698 enum ib_wc_status status)
643{ 699{
644 unsigned long flags; 700 u32 old_last, last;
645 u32 last; 701
702 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
703 return;
646 704
647 /* See ch. 11.2.4.1 and 10.7.3.1 */ 705 /* See ch. 11.2.4.1 and 10.7.3.1 */
648 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || 706 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
@@ -661,10 +719,14 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
661 status != IB_WC_SUCCESS); 719 status != IB_WC_SUCCESS);
662 } 720 }
663 721
664 spin_lock_irqsave(&qp->s_lock, flags); 722 old_last = last = qp->s_last;
665 last = qp->s_last;
666 if (++last >= qp->s_size) 723 if (++last >= qp->s_size)
667 last = 0; 724 last = 0;
668 qp->s_last = last; 725 qp->s_last = last;
669 spin_unlock_irqrestore(&qp->s_lock, flags); 726 if (qp->s_cur == old_last)
727 qp->s_cur = last;
728 if (qp->s_tail == old_last)
729 qp->s_tail = last;
730 if (qp->state == IB_QPS_SQD && last == qp->s_cur)
731 qp->s_draining = 0;
670} 732}
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index bfe8926b5514..7fd18e833907 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp)
47{ 47{
48 struct ipath_other_headers *ohdr; 48 struct ipath_other_headers *ohdr;
49 struct ipath_swqe *wqe; 49 struct ipath_swqe *wqe;
50 unsigned long flags;
50 u32 hwords; 51 u32 hwords;
51 u32 bth0; 52 u32 bth0;
52 u32 len; 53 u32 len;
53 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 54 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
54 int ret = 0; 55 int ret = 0;
55 56
56 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) 57 spin_lock_irqsave(&qp->s_lock, flags);
58
59 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
60 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
61 goto bail;
62 /* We are in the error state, flush the work request. */
63 if (qp->s_last == qp->s_head)
64 goto bail;
65 /* If DMAs are in progress, we can't flush immediately. */
66 if (atomic_read(&qp->s_dma_busy)) {
67 qp->s_flags |= IPATH_S_WAIT_DMA;
68 goto bail;
69 }
70 wqe = get_swqe_ptr(qp, qp->s_last);
71 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
57 goto done; 72 goto done;
73 }
58 74
59 ohdr = &qp->s_hdr.u.oth; 75 ohdr = &qp->s_hdr.u.oth;
60 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) 76 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
@@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp)
69 qp->s_wqe = NULL; 85 qp->s_wqe = NULL;
70 switch (qp->s_state) { 86 switch (qp->s_state) {
71 default: 87 default:
88 if (!(ib_ipath_state_ops[qp->state] &
89 IPATH_PROCESS_NEXT_SEND_OK))
90 goto bail;
72 /* Check if send work queue is empty. */ 91 /* Check if send work queue is empty. */
73 if (qp->s_cur == qp->s_head) 92 if (qp->s_cur == qp->s_head)
74 goto done; 93 goto bail;
75 /* 94 /*
76 * Start a new request. 95 * Start a new request.
77 */ 96 */
@@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
134 break; 153 break;
135 154
136 default: 155 default:
137 goto done; 156 goto bail;
138 } 157 }
139 break; 158 break;
140 159
@@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp)
194 ipath_make_ruc_header(to_idev(qp->ibqp.device), 213 ipath_make_ruc_header(to_idev(qp->ibqp.device),
195 qp, ohdr, bth0 | (qp->s_state << 24), 214 qp, ohdr, bth0 | (qp->s_state << 24),
196 qp->s_next_psn++ & IPATH_PSN_MASK); 215 qp->s_next_psn++ & IPATH_PSN_MASK);
216done:
197 ret = 1; 217 ret = 1;
218 goto unlock;
198 219
199done: 220bail:
221 qp->s_flags &= ~IPATH_S_BUSY;
222unlock:
223 spin_unlock_irqrestore(&qp->s_lock, flags);
200 return ret; 224 return ret;
201} 225}
202 226
@@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
258 */ 282 */
259 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 283 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
260 284
261 wc.imm_data = 0; 285 memset(&wc, 0, sizeof wc);
262 wc.wc_flags = 0;
263 286
264 /* Compare the PSN verses the expected PSN. */ 287 /* Compare the PSN verses the expected PSN. */
265 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { 288 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
@@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
322 case OP(SEND_ONLY): 345 case OP(SEND_ONLY):
323 case OP(SEND_ONLY_WITH_IMMEDIATE): 346 case OP(SEND_ONLY_WITH_IMMEDIATE):
324 send_first: 347 send_first:
325 if (qp->r_reuse_sge) { 348 if (qp->r_flags & IPATH_R_REUSE_SGE) {
326 qp->r_reuse_sge = 0; 349 qp->r_flags &= ~IPATH_R_REUSE_SGE;
327 qp->r_sge = qp->s_rdma_read_sge; 350 qp->r_sge = qp->s_rdma_read_sge;
328 } else if (!ipath_get_rwqe(qp, 0)) { 351 } else if (!ipath_get_rwqe(qp, 0)) {
329 dev->n_pkt_drops++; 352 dev->n_pkt_drops++;
@@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
340 case OP(SEND_MIDDLE): 363 case OP(SEND_MIDDLE):
341 /* Check for invalid length PMTU or posted rwqe len. */ 364 /* Check for invalid length PMTU or posted rwqe len. */
342 if (unlikely(tlen != (hdrsize + pmtu + 4))) { 365 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
343 qp->r_reuse_sge = 1; 366 qp->r_flags |= IPATH_R_REUSE_SGE;
344 dev->n_pkt_drops++; 367 dev->n_pkt_drops++;
345 goto done; 368 goto done;
346 } 369 }
347 qp->r_rcv_len += pmtu; 370 qp->r_rcv_len += pmtu;
348 if (unlikely(qp->r_rcv_len > qp->r_len)) { 371 if (unlikely(qp->r_rcv_len > qp->r_len)) {
349 qp->r_reuse_sge = 1; 372 qp->r_flags |= IPATH_R_REUSE_SGE;
350 dev->n_pkt_drops++; 373 dev->n_pkt_drops++;
351 goto done; 374 goto done;
352 } 375 }
@@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
372 /* Check for invalid length. */ 395 /* Check for invalid length. */
373 /* XXX LAST len should be >= 1 */ 396 /* XXX LAST len should be >= 1 */
374 if (unlikely(tlen < (hdrsize + pad + 4))) { 397 if (unlikely(tlen < (hdrsize + pad + 4))) {
375 qp->r_reuse_sge = 1; 398 qp->r_flags |= IPATH_R_REUSE_SGE;
376 dev->n_pkt_drops++; 399 dev->n_pkt_drops++;
377 goto done; 400 goto done;
378 } 401 }
@@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
380 tlen -= (hdrsize + pad + 4); 403 tlen -= (hdrsize + pad + 4);
381 wc.byte_len = tlen + qp->r_rcv_len; 404 wc.byte_len = tlen + qp->r_rcv_len;
382 if (unlikely(wc.byte_len > qp->r_len)) { 405 if (unlikely(wc.byte_len > qp->r_len)) {
383 qp->r_reuse_sge = 1; 406 qp->r_flags |= IPATH_R_REUSE_SGE;
384 dev->n_pkt_drops++; 407 dev->n_pkt_drops++;
385 goto done; 408 goto done;
386 } 409 }
@@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
390 wc.wr_id = qp->r_wr_id; 413 wc.wr_id = qp->r_wr_id;
391 wc.status = IB_WC_SUCCESS; 414 wc.status = IB_WC_SUCCESS;
392 wc.opcode = IB_WC_RECV; 415 wc.opcode = IB_WC_RECV;
393 wc.vendor_err = 0;
394 wc.qp = &qp->ibqp; 416 wc.qp = &qp->ibqp;
395 wc.src_qp = qp->remote_qpn; 417 wc.src_qp = qp->remote_qpn;
396 wc.pkey_index = 0;
397 wc.slid = qp->remote_ah_attr.dlid; 418 wc.slid = qp->remote_ah_attr.dlid;
398 wc.sl = qp->remote_ah_attr.sl; 419 wc.sl = qp->remote_ah_attr.sl;
399 wc.dlid_path_bits = 0;
400 wc.port_num = 0;
401 /* Signal completion event if the solicited bit is set. */ 420 /* Signal completion event if the solicited bit is set. */
402 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 421 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
403 (ohdr->bth[0] & 422 (ohdr->bth[0] &
@@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
488 dev->n_pkt_drops++; 507 dev->n_pkt_drops++;
489 goto done; 508 goto done;
490 } 509 }
491 if (qp->r_reuse_sge) 510 if (qp->r_flags & IPATH_R_REUSE_SGE)
492 qp->r_reuse_sge = 0; 511 qp->r_flags &= ~IPATH_R_REUSE_SGE;
493 else if (!ipath_get_rwqe(qp, 1)) { 512 else if (!ipath_get_rwqe(qp, 1)) {
494 dev->n_pkt_drops++; 513 dev->n_pkt_drops++;
495 goto done; 514 goto done;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 8b6a261c89e3..77ca8ca74e78 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
65 u32 length; 65 u32 length;
66 66
67 qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); 67 qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
68 if (!qp) { 68 if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
69 dev->n_pkt_drops++; 69 dev->n_pkt_drops++;
70 goto send_comp; 70 goto done;
71 } 71 }
72 72
73 rsge.sg_list = NULL; 73 rsge.sg_list = NULL;
@@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
91 * present on the wire. 91 * present on the wire.
92 */ 92 */
93 length = swqe->length; 93 length = swqe->length;
94 memset(&wc, 0, sizeof wc);
94 wc.byte_len = length + sizeof(struct ib_grh); 95 wc.byte_len = length + sizeof(struct ib_grh);
95 96
96 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 97 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
97 wc.wc_flags = IB_WC_WITH_IMM; 98 wc.wc_flags = IB_WC_WITH_IMM;
98 wc.imm_data = swqe->wr.ex.imm_data; 99 wc.imm_data = swqe->wr.ex.imm_data;
99 } else {
100 wc.wc_flags = 0;
101 wc.imm_data = 0;
102 } 100 }
103 101
104 /* 102 /*
@@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
229 } 227 }
230 wc.status = IB_WC_SUCCESS; 228 wc.status = IB_WC_SUCCESS;
231 wc.opcode = IB_WC_RECV; 229 wc.opcode = IB_WC_RECV;
232 wc.vendor_err = 0;
233 wc.qp = &qp->ibqp; 230 wc.qp = &qp->ibqp;
234 wc.src_qp = sqp->ibqp.qp_num; 231 wc.src_qp = sqp->ibqp.qp_num;
235 /* XXX do we know which pkey matched? Only needed for GSI. */ 232 /* XXX do we know which pkey matched? Only needed for GSI. */
@@ -248,8 +245,7 @@ drop:
248 kfree(rsge.sg_list); 245 kfree(rsge.sg_list);
249 if (atomic_dec_and_test(&qp->refcount)) 246 if (atomic_dec_and_test(&qp->refcount))
250 wake_up(&qp->wait); 247 wake_up(&qp->wait);
251send_comp: 248done:;
252 ipath_send_complete(sqp, swqe, IB_WC_SUCCESS);
253} 249}
254 250
255/** 251/**
@@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
264 struct ipath_other_headers *ohdr; 260 struct ipath_other_headers *ohdr;
265 struct ib_ah_attr *ah_attr; 261 struct ib_ah_attr *ah_attr;
266 struct ipath_swqe *wqe; 262 struct ipath_swqe *wqe;
263 unsigned long flags;
267 u32 nwords; 264 u32 nwords;
268 u32 extra_bytes; 265 u32 extra_bytes;
269 u32 bth0; 266 u32 bth0;
@@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp)
271 u16 lid; 268 u16 lid;
272 int ret = 0; 269 int ret = 0;
273 270
274 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))) 271 spin_lock_irqsave(&qp->s_lock, flags);
275 goto bail; 272
273 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
274 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
275 goto bail;
276 /* We are in the error state, flush the work request. */
277 if (qp->s_last == qp->s_head)
278 goto bail;
279 /* If DMAs are in progress, we can't flush immediately. */
280 if (atomic_read(&qp->s_dma_busy)) {
281 qp->s_flags |= IPATH_S_WAIT_DMA;
282 goto bail;
283 }
284 wqe = get_swqe_ptr(qp, qp->s_last);
285 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
286 goto done;
287 }
276 288
277 if (qp->s_cur == qp->s_head) 289 if (qp->s_cur == qp->s_head)
278 goto bail; 290 goto bail;
279 291
280 wqe = get_swqe_ptr(qp, qp->s_cur); 292 wqe = get_swqe_ptr(qp, qp->s_cur);
293 if (++qp->s_cur >= qp->s_size)
294 qp->s_cur = 0;
281 295
282 /* Construct the header. */ 296 /* Construct the header. */
283 ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; 297 ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
@@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp)
288 dev->n_unicast_xmit++; 302 dev->n_unicast_xmit++;
289 } else { 303 } else {
290 dev->n_unicast_xmit++; 304 dev->n_unicast_xmit++;
291 lid = ah_attr->dlid & 305 lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
292 ~((1 << dev->dd->ipath_lmc) - 1);
293 if (unlikely(lid == dev->dd->ipath_lid)) { 306 if (unlikely(lid == dev->dd->ipath_lid)) {
307 /*
308 * If DMAs are in progress, we can't generate
309 * a completion for the loopback packet since
310 * it would be out of order.
311 * XXX Instead of waiting, we could queue a
312 * zero length descriptor so we get a callback.
313 */
314 if (atomic_read(&qp->s_dma_busy)) {
315 qp->s_flags |= IPATH_S_WAIT_DMA;
316 goto bail;
317 }
318 spin_unlock_irqrestore(&qp->s_lock, flags);
294 ipath_ud_loopback(qp, wqe); 319 ipath_ud_loopback(qp, wqe);
320 spin_lock_irqsave(&qp->s_lock, flags);
321 ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
295 goto done; 322 goto done;
296 } 323 }
297 } 324 }
@@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp)
368 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); 395 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
369 396
370done: 397done:
371 if (++qp->s_cur >= qp->s_size)
372 qp->s_cur = 0;
373 ret = 1; 398 ret = 1;
399 goto unlock;
374 400
375bail: 401bail:
402 qp->s_flags &= ~IPATH_S_BUSY;
403unlock:
404 spin_unlock_irqrestore(&qp->s_lock, flags);
376 return ret; 405 return ret;
377} 406}
378 407
@@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
506 /* 535 /*
507 * Get the next work request entry to find where to put the data. 536 * Get the next work request entry to find where to put the data.
508 */ 537 */
509 if (qp->r_reuse_sge) 538 if (qp->r_flags & IPATH_R_REUSE_SGE)
510 qp->r_reuse_sge = 0; 539 qp->r_flags &= ~IPATH_R_REUSE_SGE;
511 else if (!ipath_get_rwqe(qp, 0)) { 540 else if (!ipath_get_rwqe(qp, 0)) {
512 /* 541 /*
513 * Count VL15 packets dropped due to no receive buffer. 542 * Count VL15 packets dropped due to no receive buffer.
@@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
523 } 552 }
524 /* Silently drop packets which are too big. */ 553 /* Silently drop packets which are too big. */
525 if (wc.byte_len > qp->r_len) { 554 if (wc.byte_len > qp->r_len) {
526 qp->r_reuse_sge = 1; 555 qp->r_flags |= IPATH_R_REUSE_SGE;
527 dev->n_pkt_drops++; 556 dev->n_pkt_drops++;
528 goto bail; 557 goto bail;
529 } 558 }
@@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
535 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); 564 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
536 ipath_copy_sge(&qp->r_sge, data, 565 ipath_copy_sge(&qp->r_sge, data,
537 wc.byte_len - sizeof(struct ib_grh)); 566 wc.byte_len - sizeof(struct ib_grh));
538 qp->r_wrid_valid = 0; 567 if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
568 goto bail;
539 wc.wr_id = qp->r_wr_id; 569 wc.wr_id = qp->r_wr_id;
540 wc.status = IB_WC_SUCCESS; 570 wc.status = IB_WC_SUCCESS;
541 wc.opcode = IB_WC_RECV; 571 wc.opcode = IB_WC_RECV;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
index e70946c1428c..fc76316c4a58 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
@@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
45int ipath_user_sdma_make_progress(struct ipath_devdata *dd, 45int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
46 struct ipath_user_sdma_queue *pq); 46 struct ipath_user_sdma_queue *pq);
47 47
48int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq,
49 u32 counter);
50void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, 48void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
51 struct ipath_user_sdma_queue *pq); 49 struct ipath_user_sdma_queue *pq);
52 50
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 22bb42dc8f73..e0ec540042bf 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -111,16 +111,24 @@ static unsigned int ib_ipath_disable_sma;
111module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); 111module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
112MODULE_PARM_DESC(disable_sma, "Disable the SMA"); 112MODULE_PARM_DESC(disable_sma, "Disable the SMA");
113 113
114/*
115 * Note that it is OK to post send work requests in the SQE and ERR
116 * states; ipath_do_send() will process them and generate error
117 * completions as per IB 1.2 C10-96.
118 */
114const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { 119const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
115 [IB_QPS_RESET] = 0, 120 [IB_QPS_RESET] = 0,
116 [IB_QPS_INIT] = IPATH_POST_RECV_OK, 121 [IB_QPS_INIT] = IPATH_POST_RECV_OK,
117 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, 122 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
118 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 123 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
119 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, 124 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
125 IPATH_PROCESS_NEXT_SEND_OK,
120 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 126 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
121 IPATH_POST_SEND_OK, 127 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
122 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, 128 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
123 [IB_QPS_ERR] = 0, 129 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
130 [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
131 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
124}; 132};
125 133
126struct ipath_ucontext { 134struct ipath_ucontext {
@@ -230,18 +238,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
230 } 238 }
231} 239}
232 240
233static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr)
234{
235 struct ib_wc wc;
236
237 memset(&wc, 0, sizeof(wc));
238 wc.wr_id = wr->wr_id;
239 wc.status = IB_WC_WR_FLUSH_ERR;
240 wc.opcode = ib_ipath_wc_opcode[wr->opcode];
241 wc.qp = &qp->ibqp;
242 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
243}
244
245/* 241/*
246 * Count the number of DMA descriptors needed to send length bytes of data. 242 * Count the number of DMA descriptors needed to send length bytes of data.
247 * Don't modify the ipath_sge_state to get the count. 243 * Don't modify the ipath_sge_state to get the count.
@@ -347,14 +343,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
347 spin_lock_irqsave(&qp->s_lock, flags); 343 spin_lock_irqsave(&qp->s_lock, flags);
348 344
349 /* Check that state is OK to post send. */ 345 /* Check that state is OK to post send. */
350 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) { 346 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
351 if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR) 347 goto bail_inval;
352 goto bail_inval;
353 /* C10-96 says generate a flushed completion entry. */
354 ipath_flush_wqe(qp, wr);
355 ret = 0;
356 goto bail;
357 }
358 348
359 /* IB spec says that num_sge == 0 is OK. */ 349 /* IB spec says that num_sge == 0 is OK. */
360 if (wr->num_sge > qp->s_max_sge) 350 if (wr->num_sge > qp->s_max_sge)
@@ -677,6 +667,7 @@ bail:;
677static void ipath_ib_timer(struct ipath_ibdev *dev) 667static void ipath_ib_timer(struct ipath_ibdev *dev)
678{ 668{
679 struct ipath_qp *resend = NULL; 669 struct ipath_qp *resend = NULL;
670 struct ipath_qp *rnr = NULL;
680 struct list_head *last; 671 struct list_head *last;
681 struct ipath_qp *qp; 672 struct ipath_qp *qp;
682 unsigned long flags; 673 unsigned long flags;
@@ -703,7 +694,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
703 if (--qp->s_rnr_timeout == 0) { 694 if (--qp->s_rnr_timeout == 0) {
704 do { 695 do {
705 list_del_init(&qp->timerwait); 696 list_del_init(&qp->timerwait);
706 tasklet_hi_schedule(&qp->s_task); 697 qp->timer_next = rnr;
698 rnr = qp;
699 atomic_inc(&qp->refcount);
707 if (list_empty(last)) 700 if (list_empty(last))
708 break; 701 break;
709 qp = list_entry(last->next, struct ipath_qp, 702 qp = list_entry(last->next, struct ipath_qp,
@@ -743,9 +736,13 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
743 spin_unlock_irqrestore(&dev->pending_lock, flags); 736 spin_unlock_irqrestore(&dev->pending_lock, flags);
744 737
745 /* XXX What if timer fires again while this is running? */ 738 /* XXX What if timer fires again while this is running? */
746 for (qp = resend; qp != NULL; qp = qp->timer_next) { 739 while (resend != NULL) {
740 qp = resend;
741 resend = qp->timer_next;
742
747 spin_lock_irqsave(&qp->s_lock, flags); 743 spin_lock_irqsave(&qp->s_lock, flags);
748 if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { 744 if (qp->s_last != qp->s_tail &&
745 ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
749 dev->n_timeouts++; 746 dev->n_timeouts++;
750 ipath_restart_rc(qp, qp->s_last_psn + 1); 747 ipath_restart_rc(qp, qp->s_last_psn + 1);
751 } 748 }
@@ -755,6 +752,19 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
755 if (atomic_dec_and_test(&qp->refcount)) 752 if (atomic_dec_and_test(&qp->refcount))
756 wake_up(&qp->wait); 753 wake_up(&qp->wait);
757 } 754 }
755 while (rnr != NULL) {
756 qp = rnr;
757 rnr = qp->timer_next;
758
759 spin_lock_irqsave(&qp->s_lock, flags);
760 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
761 ipath_schedule_send(qp);
762 spin_unlock_irqrestore(&qp->s_lock, flags);
763
764 /* Notify ipath_destroy_qp() if it is waiting. */
765 if (atomic_dec_and_test(&qp->refcount))
766 wake_up(&qp->wait);
767 }
758} 768}
759 769
760static void update_sge(struct ipath_sge_state *ss, u32 length) 770static void update_sge(struct ipath_sge_state *ss, u32 length)
@@ -1010,13 +1020,24 @@ static void sdma_complete(void *cookie, int status)
1010 struct ipath_verbs_txreq *tx = cookie; 1020 struct ipath_verbs_txreq *tx = cookie;
1011 struct ipath_qp *qp = tx->qp; 1021 struct ipath_qp *qp = tx->qp;
1012 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 1022 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1023 unsigned int flags;
1024 enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
1025 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
1013 1026
1014 /* Generate a completion queue entry if needed */ 1027 if (atomic_dec_and_test(&qp->s_dma_busy)) {
1015 if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { 1028 spin_lock_irqsave(&qp->s_lock, flags);
1016 enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? 1029 if (tx->wqe)
1017 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; 1030 ipath_send_complete(qp, tx->wqe, ibs);
1018 1031 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1032 qp->s_last != qp->s_head) ||
1033 (qp->s_flags & IPATH_S_WAIT_DMA))
1034 ipath_schedule_send(qp);
1035 spin_unlock_irqrestore(&qp->s_lock, flags);
1036 wake_up(&qp->wait_dma);
1037 } else if (tx->wqe) {
1038 spin_lock_irqsave(&qp->s_lock, flags);
1019 ipath_send_complete(qp, tx->wqe, ibs); 1039 ipath_send_complete(qp, tx->wqe, ibs);
1040 spin_unlock_irqrestore(&qp->s_lock, flags);
1020 } 1041 }
1021 1042
1022 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) 1043 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
@@ -1027,6 +1048,21 @@ static void sdma_complete(void *cookie, int status)
1027 wake_up(&qp->wait); 1048 wake_up(&qp->wait);
1028} 1049}
1029 1050
1051static void decrement_dma_busy(struct ipath_qp *qp)
1052{
1053 unsigned int flags;
1054
1055 if (atomic_dec_and_test(&qp->s_dma_busy)) {
1056 spin_lock_irqsave(&qp->s_lock, flags);
1057 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1058 qp->s_last != qp->s_head) ||
1059 (qp->s_flags & IPATH_S_WAIT_DMA))
1060 ipath_schedule_send(qp);
1061 spin_unlock_irqrestore(&qp->s_lock, flags);
1062 wake_up(&qp->wait_dma);
1063 }
1064}
1065
1030/* 1066/*
1031 * Compute the number of clock cycles of delay before sending the next packet. 1067 * Compute the number of clock cycles of delay before sending the next packet.
1032 * The multipliers reflect the number of clocks for the fastest rate so 1068 * The multipliers reflect the number of clocks for the fastest rate so
@@ -1065,9 +1101,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
1065 if (tx) { 1101 if (tx) {
1066 qp->s_tx = NULL; 1102 qp->s_tx = NULL;
1067 /* resend previously constructed packet */ 1103 /* resend previously constructed packet */
1104 atomic_inc(&qp->s_dma_busy);
1068 ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); 1105 ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
1069 if (ret) 1106 if (ret) {
1070 qp->s_tx = tx; 1107 qp->s_tx = tx;
1108 decrement_dma_busy(qp);
1109 }
1071 goto bail; 1110 goto bail;
1072 } 1111 }
1073 1112
@@ -1118,12 +1157,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
1118 tx->txreq.sg_count = ndesc; 1157 tx->txreq.sg_count = ndesc;
1119 tx->map_len = (hdrwords + 2) << 2; 1158 tx->map_len = (hdrwords + 2) << 2;
1120 tx->txreq.map_addr = &tx->hdr; 1159 tx->txreq.map_addr = &tx->hdr;
1160 atomic_inc(&qp->s_dma_busy);
1121 ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); 1161 ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
1122 if (ret) { 1162 if (ret) {
1123 /* save ss and length in dwords */ 1163 /* save ss and length in dwords */
1124 tx->ss = ss; 1164 tx->ss = ss;
1125 tx->len = dwords; 1165 tx->len = dwords;
1126 qp->s_tx = tx; 1166 qp->s_tx = tx;
1167 decrement_dma_busy(qp);
1127 } 1168 }
1128 goto bail; 1169 goto bail;
1129 } 1170 }
@@ -1144,6 +1185,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
1144 memcpy(piobuf, hdr, hdrwords << 2); 1185 memcpy(piobuf, hdr, hdrwords << 2);
1145 ipath_copy_from_sge(piobuf + hdrwords, ss, len); 1186 ipath_copy_from_sge(piobuf + hdrwords, ss, len);
1146 1187
1188 atomic_inc(&qp->s_dma_busy);
1147 ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); 1189 ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
1148 /* 1190 /*
1149 * If we couldn't queue the DMA request, save the info 1191 * If we couldn't queue the DMA request, save the info
@@ -1154,6 +1196,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
1154 tx->ss = NULL; 1196 tx->ss = NULL;
1155 tx->len = 0; 1197 tx->len = 0;
1156 qp->s_tx = tx; 1198 qp->s_tx = tx;
1199 decrement_dma_busy(qp);
1157 } 1200 }
1158 dev->n_unaligned++; 1201 dev->n_unaligned++;
1159 goto bail; 1202 goto bail;
@@ -1177,6 +1220,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
1177 unsigned flush_wc; 1220 unsigned flush_wc;
1178 u32 control; 1221 u32 control;
1179 int ret; 1222 int ret;
1223 unsigned int flags;
1180 1224
1181 piobuf = ipath_getpiobuf(dd, plen, NULL); 1225 piobuf = ipath_getpiobuf(dd, plen, NULL);
1182 if (unlikely(piobuf == NULL)) { 1226 if (unlikely(piobuf == NULL)) {
@@ -1247,8 +1291,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
1247 } 1291 }
1248 copy_io(piobuf, ss, len, flush_wc); 1292 copy_io(piobuf, ss, len, flush_wc);
1249done: 1293done:
1250 if (qp->s_wqe) 1294 if (qp->s_wqe) {
1295 spin_lock_irqsave(&qp->s_lock, flags);
1251 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); 1296 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1297 spin_unlock_irqrestore(&qp->s_lock, flags);
1298 }
1252 ret = 0; 1299 ret = 0;
1253bail: 1300bail:
1254 return ret; 1301 return ret;
@@ -1281,19 +1328,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
1281 * can defer SDMA restart until link goes ACTIVE without 1328 * can defer SDMA restart until link goes ACTIVE without
1282 * worrying about just how we got there. 1329 * worrying about just how we got there.
1283 */ 1330 */
1284 if (qp->ibqp.qp_type == IB_QPT_SMI) 1331 if (qp->ibqp.qp_type == IB_QPT_SMI ||
1332 !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
1285 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, 1333 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1286 plen, dwords); 1334 plen, dwords);
1287 /* All non-VL15 packets are dropped if link is not ACTIVE */
1288 else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) {
1289 if (qp->s_wqe)
1290 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1291 ret = 0;
1292 } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
1293 ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1294 plen, dwords);
1295 else 1335 else
1296 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, 1336 ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1297 plen, dwords); 1337 plen, dwords);
1298 1338
1299 return ret; 1339 return ret;
@@ -1401,27 +1441,46 @@ bail:
1401 * This is called from ipath_intr() at interrupt level when a PIO buffer is 1441 * This is called from ipath_intr() at interrupt level when a PIO buffer is
1402 * available after ipath_verbs_send() returned an error that no buffers were 1442 * available after ipath_verbs_send() returned an error that no buffers were
1403 * available. Return 1 if we consumed all the PIO buffers and we still have 1443 * available. Return 1 if we consumed all the PIO buffers and we still have
1404 * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and 1444 * QPs waiting for buffers (for now, just restart the send tasklet and
1405 * return zero). 1445 * return zero).
1406 */ 1446 */
1407int ipath_ib_piobufavail(struct ipath_ibdev *dev) 1447int ipath_ib_piobufavail(struct ipath_ibdev *dev)
1408{ 1448{
1449 struct list_head *list;
1450 struct ipath_qp *qplist;
1409 struct ipath_qp *qp; 1451 struct ipath_qp *qp;
1410 unsigned long flags; 1452 unsigned long flags;
1411 1453
1412 if (dev == NULL) 1454 if (dev == NULL)
1413 goto bail; 1455 goto bail;
1414 1456
1457 list = &dev->piowait;
1458 qplist = NULL;
1459
1415 spin_lock_irqsave(&dev->pending_lock, flags); 1460 spin_lock_irqsave(&dev->pending_lock, flags);
1416 while (!list_empty(&dev->piowait)) { 1461 while (!list_empty(list)) {
1417 qp = list_entry(dev->piowait.next, struct ipath_qp, 1462 qp = list_entry(list->next, struct ipath_qp, piowait);
1418 piowait);
1419 list_del_init(&qp->piowait); 1463 list_del_init(&qp->piowait);
1420 clear_bit(IPATH_S_BUSY, &qp->s_busy); 1464 qp->pio_next = qplist;
1421 tasklet_hi_schedule(&qp->s_task); 1465 qplist = qp;
1466 atomic_inc(&qp->refcount);
1422 } 1467 }
1423 spin_unlock_irqrestore(&dev->pending_lock, flags); 1468 spin_unlock_irqrestore(&dev->pending_lock, flags);
1424 1469
1470 while (qplist != NULL) {
1471 qp = qplist;
1472 qplist = qp->pio_next;
1473
1474 spin_lock_irqsave(&qp->s_lock, flags);
1475 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
1476 ipath_schedule_send(qp);
1477 spin_unlock_irqrestore(&qp->s_lock, flags);
1478
1479 /* Notify ipath_destroy_qp() if it is waiting. */
1480 if (atomic_dec_and_test(&qp->refcount))
1481 wake_up(&qp->wait);
1482 }
1483
1425bail: 1484bail:
1426 return 0; 1485 return 0;
1427} 1486}
@@ -2143,11 +2202,12 @@ bail:
2143void ipath_unregister_ib_device(struct ipath_ibdev *dev) 2202void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2144{ 2203{
2145 struct ib_device *ibdev = &dev->ibdev; 2204 struct ib_device *ibdev = &dev->ibdev;
2146 2205 u32 qps_inuse;
2147 disable_timer(dev->dd);
2148 2206
2149 ib_unregister_device(ibdev); 2207 ib_unregister_device(ibdev);
2150 2208
2209 disable_timer(dev->dd);
2210
2151 if (!list_empty(&dev->pending[0]) || 2211 if (!list_empty(&dev->pending[0]) ||
2152 !list_empty(&dev->pending[1]) || 2212 !list_empty(&dev->pending[1]) ||
2153 !list_empty(&dev->pending[2])) 2213 !list_empty(&dev->pending[2]))
@@ -2162,7 +2222,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2162 * Note that ipath_unregister_ib_device() can be called before all 2222 * Note that ipath_unregister_ib_device() can be called before all
2163 * the QPs are destroyed! 2223 * the QPs are destroyed!
2164 */ 2224 */
2165 ipath_free_all_qps(&dev->qp_table); 2225 qps_inuse = ipath_free_all_qps(&dev->qp_table);
2226 if (qps_inuse)
2227 ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
2228 qps_inuse);
2166 kfree(dev->qp_table.table); 2229 kfree(dev->qp_table.table);
2167 kfree(dev->lk_table.table); 2230 kfree(dev->lk_table.table);
2168 kfree(dev->txreq_bufs); 2231 kfree(dev->txreq_bufs);
@@ -2213,17 +2276,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr,
2213 "RC OTH NAKs %d\n" 2276 "RC OTH NAKs %d\n"
2214 "RC timeouts %d\n" 2277 "RC timeouts %d\n"
2215 "RC RDMA dup %d\n" 2278 "RC RDMA dup %d\n"
2216 "RC stalls %d\n"
2217 "piobuf wait %d\n" 2279 "piobuf wait %d\n"
2218 "no piobuf %d\n"
2219 "unaligned %d\n" 2280 "unaligned %d\n"
2220 "PKT drops %d\n" 2281 "PKT drops %d\n"
2221 "WQE errs %d\n", 2282 "WQE errs %d\n",
2222 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, 2283 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
2223 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, 2284 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
2224 dev->n_other_naks, dev->n_timeouts, 2285 dev->n_other_naks, dev->n_timeouts,
2225 dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, 2286 dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
2226 dev->n_no_piobuf, dev->n_unaligned,
2227 dev->n_pkt_drops, dev->n_wqe_errs); 2287 dev->n_pkt_drops, dev->n_wqe_errs);
2228 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { 2288 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
2229 const struct ipath_opcode_stats *si = &dev->opstats[i]; 2289 const struct ipath_opcode_stats *si = &dev->opstats[i];
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 4c7c2aa8e19d..deee02ca7ca4 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -74,6 +74,11 @@
74#define IPATH_POST_RECV_OK 0x02 74#define IPATH_POST_RECV_OK 0x02
75#define IPATH_PROCESS_RECV_OK 0x04 75#define IPATH_PROCESS_RECV_OK 0x04
76#define IPATH_PROCESS_SEND_OK 0x08 76#define IPATH_PROCESS_SEND_OK 0x08
77#define IPATH_PROCESS_NEXT_SEND_OK 0x10
78#define IPATH_FLUSH_SEND 0x20
79#define IPATH_FLUSH_RECV 0x40
80#define IPATH_PROCESS_OR_FLUSH_SEND \
81 (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
77 82
78/* IB Performance Manager status values */ 83/* IB Performance Manager status values */
79#define IB_PMA_SAMPLE_STATUS_DONE 0x00 84#define IB_PMA_SAMPLE_STATUS_DONE 0x00
@@ -353,12 +358,14 @@ struct ipath_qp {
353 struct ib_qp ibqp; 358 struct ib_qp ibqp;
354 struct ipath_qp *next; /* link list for QPN hash table */ 359 struct ipath_qp *next; /* link list for QPN hash table */
355 struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ 360 struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */
361 struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */
356 struct list_head piowait; /* link for wait PIO buf */ 362 struct list_head piowait; /* link for wait PIO buf */
357 struct list_head timerwait; /* link for waiting for timeouts */ 363 struct list_head timerwait; /* link for waiting for timeouts */
358 struct ib_ah_attr remote_ah_attr; 364 struct ib_ah_attr remote_ah_attr;
359 struct ipath_ib_header s_hdr; /* next packet header to send */ 365 struct ipath_ib_header s_hdr; /* next packet header to send */
360 atomic_t refcount; 366 atomic_t refcount;
361 wait_queue_head_t wait; 367 wait_queue_head_t wait;
368 wait_queue_head_t wait_dma;
362 struct tasklet_struct s_task; 369 struct tasklet_struct s_task;
363 struct ipath_mmap_info *ip; 370 struct ipath_mmap_info *ip;
364 struct ipath_sge_state *s_cur_sge; 371 struct ipath_sge_state *s_cur_sge;
@@ -369,7 +376,7 @@ struct ipath_qp {
369 struct ipath_sge_state s_rdma_read_sge; 376 struct ipath_sge_state s_rdma_read_sge;
370 struct ipath_sge_state r_sge; /* current receive data */ 377 struct ipath_sge_state r_sge; /* current receive data */
371 spinlock_t s_lock; 378 spinlock_t s_lock;
372 unsigned long s_busy; 379 atomic_t s_dma_busy;
373 u16 s_pkt_delay; 380 u16 s_pkt_delay;
374 u16 s_hdrwords; /* size of s_hdr in 32 bit words */ 381 u16 s_hdrwords; /* size of s_hdr in 32 bit words */
375 u32 s_cur_size; /* size of send packet in bytes */ 382 u32 s_cur_size; /* size of send packet in bytes */
@@ -383,6 +390,7 @@ struct ipath_qp {
383 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ 390 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
384 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ 391 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
385 u64 r_wr_id; /* ID for current receive WQE */ 392 u64 r_wr_id; /* ID for current receive WQE */
393 unsigned long r_aflags;
386 u32 r_len; /* total length of r_sge */ 394 u32 r_len; /* total length of r_sge */
387 u32 r_rcv_len; /* receive data len processed */ 395 u32 r_rcv_len; /* receive data len processed */
388 u32 r_psn; /* expected rcv packet sequence number */ 396 u32 r_psn; /* expected rcv packet sequence number */
@@ -394,8 +402,7 @@ struct ipath_qp {
394 u8 r_state; /* opcode of last packet received */ 402 u8 r_state; /* opcode of last packet received */
395 u8 r_nak_state; /* non-zero if NAK is pending */ 403 u8 r_nak_state; /* non-zero if NAK is pending */
396 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ 404 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
397 u8 r_reuse_sge; /* for UC receive errors */ 405 u8 r_flags;
398 u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
399 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ 406 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
400 u8 r_head_ack_queue; /* index into s_ack_queue[] */ 407 u8 r_head_ack_queue; /* index into s_ack_queue[] */
401 u8 qp_access_flags; 408 u8 qp_access_flags;
@@ -404,13 +411,13 @@ struct ipath_qp {
404 u8 s_rnr_retry_cnt; 411 u8 s_rnr_retry_cnt;
405 u8 s_retry; /* requester retry counter */ 412 u8 s_retry; /* requester retry counter */
406 u8 s_rnr_retry; /* requester RNR retry counter */ 413 u8 s_rnr_retry; /* requester RNR retry counter */
407 u8 s_wait_credit; /* limit number of unacked packets sent */
408 u8 s_pkey_index; /* PKEY index to use */ 414 u8 s_pkey_index; /* PKEY index to use */
409 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ 415 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
410 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ 416 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
411 u8 s_tail_ack_queue; /* index into s_ack_queue[] */ 417 u8 s_tail_ack_queue; /* index into s_ack_queue[] */
412 u8 s_flags; 418 u8 s_flags;
413 u8 s_dmult; 419 u8 s_dmult;
420 u8 s_draining;
414 u8 timeout; /* Timeout for this QP */ 421 u8 timeout; /* Timeout for this QP */
415 enum ib_mtu path_mtu; 422 enum ib_mtu path_mtu;
416 u32 remote_qpn; 423 u32 remote_qpn;
@@ -428,16 +435,39 @@ struct ipath_qp {
428 struct ipath_sge r_sg_list[0]; /* verified SGEs */ 435 struct ipath_sge r_sg_list[0]; /* verified SGEs */
429}; 436};
430 437
431/* Bit definition for s_busy. */ 438/*
432#define IPATH_S_BUSY 0 439 * Atomic bit definitions for r_aflags.
440 */
441#define IPATH_R_WRID_VALID 0
442
443/*
444 * Bit definitions for r_flags.
445 */
446#define IPATH_R_REUSE_SGE 0x01
433 447
434/* 448/*
435 * Bit definitions for s_flags. 449 * Bit definitions for s_flags.
450 *
451 * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
452 * before processing the next SWQE
453 * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
454 * before processing the next SWQE
455 * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
456 * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
457 * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
458 * next send completion entry not via send DMA.
436 */ 459 */
437#define IPATH_S_SIGNAL_REQ_WR 0x01 460#define IPATH_S_SIGNAL_REQ_WR 0x01
438#define IPATH_S_FENCE_PENDING 0x02 461#define IPATH_S_FENCE_PENDING 0x02
439#define IPATH_S_RDMAR_PENDING 0x04 462#define IPATH_S_RDMAR_PENDING 0x04
440#define IPATH_S_ACK_PENDING 0x08 463#define IPATH_S_ACK_PENDING 0x08
464#define IPATH_S_BUSY 0x10
465#define IPATH_S_WAITING 0x20
466#define IPATH_S_WAIT_SSN_CREDIT 0x40
467#define IPATH_S_WAIT_DMA 0x80
468
469#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
470 IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
441 471
442#define IPATH_PSN_CREDIT 512 472#define IPATH_PSN_CREDIT 512
443 473
@@ -573,13 +603,11 @@ struct ipath_ibdev {
573 u32 n_rnr_naks; 603 u32 n_rnr_naks;
574 u32 n_other_naks; 604 u32 n_other_naks;
575 u32 n_timeouts; 605 u32 n_timeouts;
576 u32 n_rc_stalls;
577 u32 n_pkt_drops; 606 u32 n_pkt_drops;
578 u32 n_vl15_dropped; 607 u32 n_vl15_dropped;
579 u32 n_wqe_errs; 608 u32 n_wqe_errs;
580 u32 n_rdma_dup_busy; 609 u32 n_rdma_dup_busy;
581 u32 n_piowait; 610 u32 n_piowait;
582 u32 n_no_piobuf;
583 u32 n_unaligned; 611 u32 n_unaligned;
584 u32 port_cap_flags; 612 u32 port_cap_flags;
585 u32 pma_sample_start; 613 u32 pma_sample_start;
@@ -657,6 +685,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
657 return container_of(ibdev, struct ipath_ibdev, ibdev); 685 return container_of(ibdev, struct ipath_ibdev, ibdev);
658} 686}
659 687
688/*
689 * This must be called with s_lock held.
690 */
691static inline void ipath_schedule_send(struct ipath_qp *qp)
692{
693 if (qp->s_flags & IPATH_S_ANY_WAIT)
694 qp->s_flags &= ~IPATH_S_ANY_WAIT;
695 if (!(qp->s_flags & IPATH_S_BUSY))
696 tasklet_hi_schedule(&qp->s_task);
697}
698
660int ipath_process_mad(struct ib_device *ibdev, 699int ipath_process_mad(struct ib_device *ibdev,
661 int mad_flags, 700 int mad_flags,
662 u8 port_num, 701 u8 port_num,
@@ -706,7 +745,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
706int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 745int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
707 int attr_mask, struct ib_qp_init_attr *init_attr); 746 int attr_mask, struct ib_qp_init_attr *init_attr);
708 747
709void ipath_free_all_qps(struct ipath_qp_table *qpt); 748unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
710 749
711int ipath_init_qp_table(struct ipath_ibdev *idev, int size); 750int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
712 751