aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c50
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c43
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c383
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c37
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h12
6 files changed, 265 insertions, 269 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index e9d5d7e983bb..46773c673a1a 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -121,6 +121,7 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
121 struct ib_sge *sge, int acc) 121 struct ib_sge *sge, int acc)
122{ 122{
123 struct ipath_mregion *mr; 123 struct ipath_mregion *mr;
124 unsigned n, m;
124 size_t off; 125 size_t off;
125 int ret; 126 int ret;
126 127
@@ -152,20 +153,22 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
152 } 153 }
153 154
154 off += mr->offset; 155 off += mr->offset;
155 isge->mr = mr; 156 m = 0;
156 isge->m = 0; 157 n = 0;
157 isge->n = 0; 158 while (off >= mr->map[m]->segs[n].length) {
158 while (off >= mr->map[isge->m]->segs[isge->n].length) { 159 off -= mr->map[m]->segs[n].length;
159 off -= mr->map[isge->m]->segs[isge->n].length; 160 n++;
160 isge->n++; 161 if (n >= IPATH_SEGSZ) {
161 if (isge->n >= IPATH_SEGSZ) { 162 m++;
162 isge->m++; 163 n = 0;
163 isge->n = 0;
164 } 164 }
165 } 165 }
166 isge->vaddr = mr->map[isge->m]->segs[isge->n].vaddr + off; 166 isge->mr = mr;
167 isge->length = mr->map[isge->m]->segs[isge->n].length - off; 167 isge->vaddr = mr->map[m]->segs[n].vaddr + off;
168 isge->length = mr->map[m]->segs[n].length - off;
168 isge->sge_length = sge->length; 169 isge->sge_length = sge->length;
170 isge->m = m;
171 isge->n = n;
169 172
170 ret = 1; 173 ret = 1;
171 174
@@ -190,6 +193,7 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
190 struct ipath_lkey_table *rkt = &dev->lk_table; 193 struct ipath_lkey_table *rkt = &dev->lk_table;
191 struct ipath_sge *sge = &ss->sge; 194 struct ipath_sge *sge = &ss->sge;
192 struct ipath_mregion *mr; 195 struct ipath_mregion *mr;
196 unsigned n, m;
193 size_t off; 197 size_t off;
194 int ret; 198 int ret;
195 199
@@ -207,20 +211,22 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
207 } 211 }
208 212
209 off += mr->offset; 213 off += mr->offset;
210 sge->mr = mr; 214 m = 0;
211 sge->m = 0; 215 n = 0;
212 sge->n = 0; 216 while (off >= mr->map[m]->segs[n].length) {
213 while (off >= mr->map[sge->m]->segs[sge->n].length) { 217 off -= mr->map[m]->segs[n].length;
214 off -= mr->map[sge->m]->segs[sge->n].length; 218 n++;
215 sge->n++; 219 if (n >= IPATH_SEGSZ) {
216 if (sge->n >= IPATH_SEGSZ) { 220 m++;
217 sge->m++; 221 n = 0;
218 sge->n = 0;
219 } 222 }
220 } 223 }
221 sge->vaddr = mr->map[sge->m]->segs[sge->n].vaddr + off; 224 sge->mr = mr;
222 sge->length = mr->map[sge->m]->segs[sge->n].length - off; 225 sge->vaddr = mr->map[m]->segs[n].vaddr + off;
226 sge->length = mr->map[m]->segs[n].length - off;
223 sge->sge_length = len; 227 sge->sge_length = len;
228 sge->m = m;
229 sge->n = n;
224 ss->sg_list = NULL; 230 ss->sg_list = NULL;
225 ss->num_sge = 1; 231 ss->num_sge = 1;
226 232
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 83b9a6a5e2c6..68f01513214f 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -333,10 +333,11 @@ static void ipath_reset_qp(struct ipath_qp *qp)
333 qp->remote_qpn = 0; 333 qp->remote_qpn = 0;
334 qp->qkey = 0; 334 qp->qkey = 0;
335 qp->qp_access_flags = 0; 335 qp->qp_access_flags = 0;
336 clear_bit(IPATH_S_BUSY, &qp->s_flags);
336 qp->s_hdrwords = 0; 337 qp->s_hdrwords = 0;
337 qp->s_psn = 0; 338 qp->s_psn = 0;
338 qp->r_psn = 0; 339 qp->r_psn = 0;
339 atomic_set(&qp->msn, 0); 340 qp->r_msn = 0;
340 if (qp->ibqp.qp_type == IB_QPT_RC) { 341 if (qp->ibqp.qp_type == IB_QPT_RC) {
341 qp->s_state = IB_OPCODE_RC_SEND_LAST; 342 qp->s_state = IB_OPCODE_RC_SEND_LAST;
342 qp->r_state = IB_OPCODE_RC_SEND_LAST; 343 qp->r_state = IB_OPCODE_RC_SEND_LAST;
@@ -345,7 +346,8 @@ static void ipath_reset_qp(struct ipath_qp *qp)
345 qp->r_state = IB_OPCODE_UC_SEND_LAST; 346 qp->r_state = IB_OPCODE_UC_SEND_LAST;
346 } 347 }
347 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 348 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
348 qp->s_nak_state = 0; 349 qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
350 qp->r_nak_state = 0;
349 qp->s_rnr_timeout = 0; 351 qp->s_rnr_timeout = 0;
350 qp->s_head = 0; 352 qp->s_head = 0;
351 qp->s_tail = 0; 353 qp->s_tail = 0;
@@ -363,10 +365,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
363 * @qp: the QP to put into an error state 365 * @qp: the QP to put into an error state
364 * 366 *
365 * Flushes both send and receive work queues. 367 * Flushes both send and receive work queues.
366 * QP r_rq.lock and s_lock should be held. 368 * QP s_lock should be held and interrupts disabled.
367 */ 369 */
368 370
369static void ipath_error_qp(struct ipath_qp *qp) 371void ipath_error_qp(struct ipath_qp *qp)
370{ 372{
371 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 373 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
372 struct ib_wc wc; 374 struct ib_wc wc;
@@ -409,12 +411,14 @@ static void ipath_error_qp(struct ipath_qp *qp)
409 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 411 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
410 412
411 wc.opcode = IB_WC_RECV; 413 wc.opcode = IB_WC_RECV;
414 spin_lock(&qp->r_rq.lock);
412 while (qp->r_rq.tail != qp->r_rq.head) { 415 while (qp->r_rq.tail != qp->r_rq.head) {
413 wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id; 416 wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
414 if (++qp->r_rq.tail >= qp->r_rq.size) 417 if (++qp->r_rq.tail >= qp->r_rq.size)
415 qp->r_rq.tail = 0; 418 qp->r_rq.tail = 0;
416 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 419 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
417 } 420 }
421 spin_unlock(&qp->r_rq.lock);
418} 422}
419 423
420/** 424/**
@@ -434,8 +438,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
434 unsigned long flags; 438 unsigned long flags;
435 int ret; 439 int ret;
436 440
437 spin_lock_irqsave(&qp->r_rq.lock, flags); 441 spin_lock_irqsave(&qp->s_lock, flags);
438 spin_lock(&qp->s_lock);
439 442
440 cur_state = attr_mask & IB_QP_CUR_STATE ? 443 cur_state = attr_mask & IB_QP_CUR_STATE ?
441 attr->cur_qp_state : qp->state; 444 attr->cur_qp_state : qp->state;
@@ -506,31 +509,19 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
506 } 509 }
507 510
508 if (attr_mask & IB_QP_MIN_RNR_TIMER) 511 if (attr_mask & IB_QP_MIN_RNR_TIMER)
509 qp->s_min_rnr_timer = attr->min_rnr_timer; 512 qp->r_min_rnr_timer = attr->min_rnr_timer;
510 513
511 if (attr_mask & IB_QP_QKEY) 514 if (attr_mask & IB_QP_QKEY)
512 qp->qkey = attr->qkey; 515 qp->qkey = attr->qkey;
513 516
514 qp->state = new_state; 517 qp->state = new_state;
515 spin_unlock(&qp->s_lock); 518 spin_unlock_irqrestore(&qp->s_lock, flags);
516 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
517
518 /*
519 * If QP1 changed to the RTS state, try to move to the link to INIT
520 * even if it was ACTIVE so the SM will reinitialize the SMA's
521 * state.
522 */
523 if (qp->ibqp.qp_num == 1 && new_state == IB_QPS_RTS) {
524 struct ipath_ibdev *dev = to_idev(ibqp->device);
525 519
526 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
527 }
528 ret = 0; 520 ret = 0;
529 goto bail; 521 goto bail;
530 522
531inval: 523inval:
532 spin_unlock(&qp->s_lock); 524 spin_unlock_irqrestore(&qp->s_lock, flags);
533 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
534 ret = -EINVAL; 525 ret = -EINVAL;
535 526
536bail: 527bail:
@@ -564,7 +555,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
564 attr->sq_draining = 0; 555 attr->sq_draining = 0;
565 attr->max_rd_atomic = 1; 556 attr->max_rd_atomic = 1;
566 attr->max_dest_rd_atomic = 1; 557 attr->max_dest_rd_atomic = 1;
567 attr->min_rnr_timer = qp->s_min_rnr_timer; 558 attr->min_rnr_timer = qp->r_min_rnr_timer;
568 attr->port_num = 1; 559 attr->port_num = 1;
569 attr->timeout = 0; 560 attr->timeout = 0;
570 attr->retry_cnt = qp->s_retry_cnt; 561 attr->retry_cnt = qp->s_retry_cnt;
@@ -591,16 +582,12 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
591 * @qp: the queue pair to compute the AETH for 582 * @qp: the queue pair to compute the AETH for
592 * 583 *
593 * Returns the AETH. 584 * Returns the AETH.
594 *
595 * The QP s_lock should be held.
596 */ 585 */
597__be32 ipath_compute_aeth(struct ipath_qp *qp) 586__be32 ipath_compute_aeth(struct ipath_qp *qp)
598{ 587{
599 u32 aeth = atomic_read(&qp->msn) & IPS_MSN_MASK; 588 u32 aeth = qp->r_msn & IPS_MSN_MASK;
600 589
601 if (qp->s_nak_state) { 590 if (qp->ibqp.srq) {
602 aeth |= qp->s_nak_state << IPS_AETH_CREDIT_SHIFT;
603 } else if (qp->ibqp.srq) {
604 /* 591 /*
605 * Shared receive queues don't generate credits. 592 * Shared receive queues don't generate credits.
606 * Set the credit field to the invalid value. 593 * Set the credit field to the invalid value.
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index bd2c405c4bf0..8568dd0538cf 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -42,7 +42,7 @@
42 * @qp: the QP who's SGE we're restarting 42 * @qp: the QP who's SGE we're restarting
43 * @wqe: the work queue to initialize the QP's SGE from 43 * @wqe: the work queue to initialize the QP's SGE from
44 * 44 *
45 * The QP s_lock should be held. 45 * The QP s_lock should be held and interrupts disabled.
46 */ 46 */
47static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) 47static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
48{ 48{
@@ -77,7 +77,6 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
77 struct ipath_other_headers *ohdr, 77 struct ipath_other_headers *ohdr,
78 u32 pmtu) 78 u32 pmtu)
79{ 79{
80 struct ipath_sge_state *ss;
81 u32 hwords; 80 u32 hwords;
82 u32 len; 81 u32 len;
83 u32 bth0; 82 u32 bth0;
@@ -91,7 +90,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
91 */ 90 */
92 switch (qp->s_ack_state) { 91 switch (qp->s_ack_state) {
93 case OP(RDMA_READ_REQUEST): 92 case OP(RDMA_READ_REQUEST):
94 ss = &qp->s_rdma_sge; 93 qp->s_cur_sge = &qp->s_rdma_sge;
95 len = qp->s_rdma_len; 94 len = qp->s_rdma_len;
96 if (len > pmtu) { 95 if (len > pmtu) {
97 len = pmtu; 96 len = pmtu;
@@ -108,7 +107,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
108 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); 107 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
109 /* FALLTHROUGH */ 108 /* FALLTHROUGH */
110 case OP(RDMA_READ_RESPONSE_MIDDLE): 109 case OP(RDMA_READ_RESPONSE_MIDDLE):
111 ss = &qp->s_rdma_sge; 110 qp->s_cur_sge = &qp->s_rdma_sge;
112 len = qp->s_rdma_len; 111 len = qp->s_rdma_len;
113 if (len > pmtu) 112 if (len > pmtu)
114 len = pmtu; 113 len = pmtu;
@@ -127,41 +126,50 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
127 * We have to prevent new requests from changing 126 * We have to prevent new requests from changing
128 * the r_sge state while a ipath_verbs_send() 127 * the r_sge state while a ipath_verbs_send()
129 * is in progress. 128 * is in progress.
130 * Changing r_state allows the receiver
131 * to continue processing new packets.
132 * We do it here now instead of above so
133 * that we are sure the packet was sent before
134 * changing the state.
135 */ 129 */
136 qp->r_state = OP(RDMA_READ_RESPONSE_LAST);
137 qp->s_ack_state = OP(ACKNOWLEDGE); 130 qp->s_ack_state = OP(ACKNOWLEDGE);
138 return 0; 131 bth0 = 0;
132 goto bail;
139 133
140 case OP(COMPARE_SWAP): 134 case OP(COMPARE_SWAP):
141 case OP(FETCH_ADD): 135 case OP(FETCH_ADD):
142 ss = NULL; 136 qp->s_cur_sge = NULL;
143 len = 0; 137 len = 0;
144 qp->r_state = OP(SEND_LAST); 138 /*
145 qp->s_ack_state = OP(ACKNOWLEDGE); 139 * Set the s_ack_state so the receive interrupt handler
146 bth0 = IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; 140 * won't try to send an ACK (out of order) until this one
141 * is actually sent.
142 */
143 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
144 bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
147 ohdr->u.at.aeth = ipath_compute_aeth(qp); 145 ohdr->u.at.aeth = ipath_compute_aeth(qp);
148 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); 146 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
149 hwords += sizeof(ohdr->u.at) / 4; 147 hwords += sizeof(ohdr->u.at) / 4;
150 break; 148 break;
151 149
152 default: 150 default:
153 /* Send a regular ACK. */ 151 /* Send a regular ACK. */
154 ss = NULL; 152 qp->s_cur_sge = NULL;
155 len = 0; 153 len = 0;
156 qp->s_ack_state = OP(ACKNOWLEDGE); 154 /*
157 bth0 = qp->s_ack_state << 24; 155 * Set the s_ack_state so the receive interrupt handler
158 ohdr->u.aeth = ipath_compute_aeth(qp); 156 * won't try to send an ACK (out of order) until this one
157 * is actually sent.
158 */
159 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
160 bth0 = OP(ACKNOWLEDGE) << 24;
161 if (qp->s_nak_state)
162 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPS_MSN_MASK) |
163 (qp->s_nak_state <<
164 IPS_AETH_CREDIT_SHIFT));
165 else
166 ohdr->u.aeth = ipath_compute_aeth(qp);
159 hwords++; 167 hwords++;
160 } 168 }
161 qp->s_hdrwords = hwords; 169 qp->s_hdrwords = hwords;
162 qp->s_cur_sge = ss;
163 qp->s_cur_size = len; 170 qp->s_cur_size = len;
164 171
172bail:
165 return bth0; 173 return bth0;
166} 174}
167 175
@@ -174,7 +182,7 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp,
174 * @bth2p: pointer to the BTH PSN word 182 * @bth2p: pointer to the BTH PSN word
175 * 183 *
176 * Return 1 if constructed; otherwise, return 0. 184 * Return 1 if constructed; otherwise, return 0.
177 * Note the QP s_lock must be held. 185 * Note the QP s_lock must be held and interrupts disabled.
178 */ 186 */
179int ipath_make_rc_req(struct ipath_qp *qp, 187int ipath_make_rc_req(struct ipath_qp *qp,
180 struct ipath_other_headers *ohdr, 188 struct ipath_other_headers *ohdr,
@@ -356,6 +364,11 @@ int ipath_make_rc_req(struct ipath_qp *qp,
356 bth2 |= qp->s_psn++ & IPS_PSN_MASK; 364 bth2 |= qp->s_psn++ & IPS_PSN_MASK;
357 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 365 if ((int)(qp->s_psn - qp->s_next_psn) > 0)
358 qp->s_next_psn = qp->s_psn; 366 qp->s_next_psn = qp->s_psn;
367 /*
368 * Put the QP on the pending list so lost ACKs will cause
369 * a retry. More than one request can be pending so the
370 * QP may already be on the dev->pending list.
371 */
359 spin_lock(&dev->pending_lock); 372 spin_lock(&dev->pending_lock);
360 if (list_empty(&qp->timerwait)) 373 if (list_empty(&qp->timerwait))
361 list_add_tail(&qp->timerwait, 374 list_add_tail(&qp->timerwait,
@@ -365,8 +378,8 @@ int ipath_make_rc_req(struct ipath_qp *qp,
365 378
366 case OP(RDMA_READ_RESPONSE_FIRST): 379 case OP(RDMA_READ_RESPONSE_FIRST):
367 /* 380 /*
368 * This case can only happen if a send is restarted. See 381 * This case can only happen if a send is restarted.
369 * ipath_restart_rc(). 382 * See ipath_restart_rc().
370 */ 383 */
371 ipath_init_restart(qp, wqe); 384 ipath_init_restart(qp, wqe);
372 /* FALLTHROUGH */ 385 /* FALLTHROUGH */
@@ -526,11 +539,17 @@ static void send_rc_ack(struct ipath_qp *qp)
526 ohdr = &hdr.u.l.oth; 539 ohdr = &hdr.u.l.oth;
527 lrh0 = IPS_LRH_GRH; 540 lrh0 = IPS_LRH_GRH;
528 } 541 }
542 /* read pkey_index w/o lock (its atomic) */
529 bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); 543 bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
530 ohdr->u.aeth = ipath_compute_aeth(qp); 544 if (qp->r_nak_state)
531 if (qp->s_ack_state >= OP(COMPARE_SWAP)) { 545 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPS_MSN_MASK) |
532 bth0 |= IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; 546 (qp->r_nak_state <<
533 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); 547 IPS_AETH_CREDIT_SHIFT));
548 else
549 ohdr->u.aeth = ipath_compute_aeth(qp);
550 if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
551 bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
552 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
534 hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; 553 hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
535 } else 554 } else
536 bth0 |= OP(ACKNOWLEDGE) << 24; 555 bth0 |= OP(ACKNOWLEDGE) << 24;
@@ -541,15 +560,36 @@ static void send_rc_ack(struct ipath_qp *qp)
541 hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); 560 hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
542 ohdr->bth[0] = cpu_to_be32(bth0); 561 ohdr->bth[0] = cpu_to_be32(bth0);
543 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); 562 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
544 ohdr->bth[2] = cpu_to_be32(qp->s_ack_psn & IPS_PSN_MASK); 563 ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPS_PSN_MASK);
545 564
546 /* 565 /*
547 * If we can send the ACK, clear the ACK state. 566 * If we can send the ACK, clear the ACK state.
548 */ 567 */
549 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { 568 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
550 qp->s_ack_state = OP(ACKNOWLEDGE); 569 qp->r_ack_state = OP(ACKNOWLEDGE);
551 dev->n_rc_qacks++;
552 dev->n_unicast_xmit++; 570 dev->n_unicast_xmit++;
571 } else {
572 /*
573 * We are out of PIO buffers at the moment.
574 * Pass responsibility for sending the ACK to the
575 * send tasklet so that when a PIO buffer becomes
576 * available, the ACK is sent ahead of other outgoing
577 * packets.
578 */
579 dev->n_rc_qacks++;
580 spin_lock_irq(&qp->s_lock);
581 /* Don't coalesce if a RDMA read or atomic is pending. */
582 if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
583 qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
584 qp->s_ack_state = qp->r_ack_state;
585 qp->s_nak_state = qp->r_nak_state;
586 qp->s_ack_psn = qp->r_ack_psn;
587 qp->r_ack_state = OP(ACKNOWLEDGE);
588 }
589 spin_unlock_irq(&qp->s_lock);
590
591 /* Call ipath_do_rc_send() in another thread. */
592 tasklet_hi_schedule(&qp->s_task);
553 } 593 }
554} 594}
555 595
@@ -641,7 +681,7 @@ done:
641 * @psn: packet sequence number for the request 681 * @psn: packet sequence number for the request
642 * @wc: the work completion request 682 * @wc: the work completion request
643 * 683 *
644 * The QP s_lock should be held. 684 * The QP s_lock should be held and interrupts disabled.
645 */ 685 */
646void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) 686void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
647{ 687{
@@ -705,7 +745,7 @@ bail:
705 * 745 *
706 * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK 746 * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
707 * for the given QP. 747 * for the given QP.
708 * Called at interrupt level with the QP s_lock held. 748 * Called at interrupt level with the QP s_lock held and interrupts disabled.
709 * Returns 1 if OK, 0 if current operation should be aborted (NAK). 749 * Returns 1 if OK, 0 if current operation should be aborted (NAK).
710 */ 750 */
711static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) 751static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
@@ -1126,18 +1166,16 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1126 * Don't queue the NAK if a RDMA read, atomic, or 1166 * Don't queue the NAK if a RDMA read, atomic, or
1127 * NAK is pending though. 1167 * NAK is pending though.
1128 */ 1168 */
1129 spin_lock(&qp->s_lock); 1169 if (qp->s_ack_state != OP(ACKNOWLEDGE) ||
1130 if ((qp->s_ack_state >= OP(RDMA_READ_REQUEST) && 1170 qp->r_nak_state != 0)
1131 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) ||
1132 qp->s_nak_state != 0) {
1133 spin_unlock(&qp->s_lock);
1134 goto done; 1171 goto done;
1172 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1173 qp->r_ack_state = OP(SEND_ONLY);
1174 qp->r_nak_state = IB_NAK_PSN_ERROR;
1175 /* Use the expected PSN. */
1176 qp->r_ack_psn = qp->r_psn;
1135 } 1177 }
1136 qp->s_ack_state = OP(SEND_ONLY); 1178 goto send_ack;
1137 qp->s_nak_state = IB_NAK_PSN_ERROR;
1138 /* Use the expected PSN. */
1139 qp->s_ack_psn = qp->r_psn;
1140 goto resched;
1141 } 1179 }
1142 1180
1143 /* 1181 /*
@@ -1151,27 +1189,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1151 * send the earliest so that RDMA reads can be restarted at 1189 * send the earliest so that RDMA reads can be restarted at
1152 * the requester's expected PSN. 1190 * the requester's expected PSN.
1153 */ 1191 */
1154 spin_lock(&qp->s_lock); 1192 if (opcode == OP(RDMA_READ_REQUEST)) {
1155 if (qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE &&
1156 ipath_cmp24(psn, qp->s_ack_psn) >= 0) {
1157 if (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST)
1158 qp->s_ack_psn = psn;
1159 spin_unlock(&qp->s_lock);
1160 goto done;
1161 }
1162 switch (opcode) {
1163 case OP(RDMA_READ_REQUEST):
1164 /*
1165 * We have to be careful to not change s_rdma_sge
1166 * while ipath_do_rc_send() is using it and not
1167 * holding the s_lock.
1168 */
1169 if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
1170 qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) {
1171 spin_unlock(&qp->s_lock);
1172 dev->n_rdma_dup_busy++;
1173 goto done;
1174 }
1175 /* RETH comes after BTH */ 1193 /* RETH comes after BTH */
1176 if (!header_in_data) 1194 if (!header_in_data)
1177 reth = &ohdr->u.rc.reth; 1195 reth = &ohdr->u.rc.reth;
@@ -1179,6 +1197,22 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1179 reth = (struct ib_reth *)data; 1197 reth = (struct ib_reth *)data;
1180 data += sizeof(*reth); 1198 data += sizeof(*reth);
1181 } 1199 }
1200 /*
1201 * If we receive a duplicate RDMA request, it means the
1202 * requester saw a sequence error and needs to restart
1203 * from an earlier point. We can abort the current
1204 * RDMA read send in that case.
1205 */
1206 spin_lock_irq(&qp->s_lock);
1207 if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
1208 (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) {
1209 /*
1210 * We are already sending earlier requested data.
1211 * Don't abort it to send later out of sequence data.
1212 */
1213 spin_unlock_irq(&qp->s_lock);
1214 goto done;
1215 }
1182 qp->s_rdma_len = be32_to_cpu(reth->length); 1216 qp->s_rdma_len = be32_to_cpu(reth->length);
1183 if (qp->s_rdma_len != 0) { 1217 if (qp->s_rdma_len != 0) {
1184 u32 rkey = be32_to_cpu(reth->rkey); 1218 u32 rkey = be32_to_cpu(reth->rkey);
@@ -1192,8 +1226,10 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1192 ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, 1226 ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
1193 qp->s_rdma_len, vaddr, rkey, 1227 qp->s_rdma_len, vaddr, rkey,
1194 IB_ACCESS_REMOTE_READ); 1228 IB_ACCESS_REMOTE_READ);
1195 if (unlikely(!ok)) 1229 if (unlikely(!ok)) {
1230 spin_unlock_irq(&qp->s_lock);
1196 goto done; 1231 goto done;
1232 }
1197 } else { 1233 } else {
1198 qp->s_rdma_sge.sg_list = NULL; 1234 qp->s_rdma_sge.sg_list = NULL;
1199 qp->s_rdma_sge.num_sge = 0; 1235 qp->s_rdma_sge.num_sge = 0;
@@ -1202,25 +1238,44 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1202 qp->s_rdma_sge.sge.length = 0; 1238 qp->s_rdma_sge.sge.length = 0;
1203 qp->s_rdma_sge.sge.sge_length = 0; 1239 qp->s_rdma_sge.sge.sge_length = 0;
1204 } 1240 }
1205 break; 1241 qp->s_ack_state = opcode;
1242 qp->s_ack_psn = psn;
1243 spin_unlock_irq(&qp->s_lock);
1244 tasklet_hi_schedule(&qp->s_task);
1245 goto send_ack;
1246 }
1247
1248 /*
1249 * A pending RDMA read will ACK anything before it so
1250 * ignore earlier duplicate requests.
1251 */
1252 if (qp->s_ack_state != OP(ACKNOWLEDGE))
1253 goto done;
1206 1254
1255 /*
1256 * If an ACK is pending, don't replace the pending ACK
1257 * with an earlier one since the later one will ACK the earlier.
1258 * Also, if we already have a pending atomic, send it.
1259 */
1260 if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
1261 (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
1262 qp->r_ack_state >= OP(COMPARE_SWAP)))
1263 goto send_ack;
1264 switch (opcode) {
1207 case OP(COMPARE_SWAP): 1265 case OP(COMPARE_SWAP):
1208 case OP(FETCH_ADD): 1266 case OP(FETCH_ADD):
1209 /* 1267 /*
1210 * Check for the PSN of the last atomic operation 1268 * Check for the PSN of the last atomic operation
1211 * performed and resend the result if found. 1269 * performed and resend the result if found.
1212 */ 1270 */
1213 if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn) { 1271 if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn)
1214 spin_unlock(&qp->s_lock);
1215 goto done; 1272 goto done;
1216 }
1217 qp->s_ack_atomic = qp->r_atomic_data;
1218 break; 1273 break;
1219 } 1274 }
1220 qp->s_ack_state = opcode; 1275 qp->r_ack_state = opcode;
1221 qp->s_nak_state = 0; 1276 qp->r_nak_state = 0;
1222 qp->s_ack_psn = psn; 1277 qp->r_ack_psn = psn;
1223resched: 1278send_ack:
1224 return 0; 1279 return 0;
1225 1280
1226done: 1281done:
@@ -1248,7 +1303,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1248 u32 hdrsize; 1303 u32 hdrsize;
1249 u32 psn; 1304 u32 psn;
1250 u32 pad; 1305 u32 pad;
1251 unsigned long flags;
1252 struct ib_wc wc; 1306 struct ib_wc wc;
1253 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 1307 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
1254 int diff; 1308 int diff;
@@ -1289,18 +1343,16 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1289 opcode <= OP(ATOMIC_ACKNOWLEDGE)) { 1343 opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1290 ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, 1344 ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
1291 hdrsize, pmtu, header_in_data); 1345 hdrsize, pmtu, header_in_data);
1292 goto bail; 1346 goto done;
1293 } 1347 }
1294 1348
1295 spin_lock_irqsave(&qp->r_rq.lock, flags);
1296
1297 /* Compute 24 bits worth of difference. */ 1349 /* Compute 24 bits worth of difference. */
1298 diff = ipath_cmp24(psn, qp->r_psn); 1350 diff = ipath_cmp24(psn, qp->r_psn);
1299 if (unlikely(diff)) { 1351 if (unlikely(diff)) {
1300 if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, 1352 if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
1301 psn, diff, header_in_data)) 1353 psn, diff, header_in_data))
1302 goto done; 1354 goto done;
1303 goto resched; 1355 goto send_ack;
1304 } 1356 }
1305 1357
1306 /* Check for opcode sequence errors. */ 1358 /* Check for opcode sequence errors. */
@@ -1312,22 +1364,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1312 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 1364 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1313 break; 1365 break;
1314 nack_inv: 1366 nack_inv:
1315 /* 1367 /*
1316 * A NAK will ACK earlier sends and RDMA writes. Don't queue the 1368 * A NAK will ACK earlier sends and RDMA writes.
1317 * NAK if a RDMA read, atomic, or NAK is pending though. 1369 * Don't queue the NAK if a RDMA read, atomic, or NAK
1318 */ 1370 * is pending though.
1319 spin_lock(&qp->s_lock); 1371 */
1320 if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) && 1372 if (qp->r_ack_state >= OP(COMPARE_SWAP))
1321 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { 1373 goto send_ack;
1322 spin_unlock(&qp->s_lock); 1374 /* XXX Flush WQEs */
1323 goto done; 1375 qp->state = IB_QPS_ERR;
1324 } 1376 qp->r_ack_state = OP(SEND_ONLY);
1325 /* XXX Flush WQEs */ 1377 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1326 qp->state = IB_QPS_ERR; 1378 qp->r_ack_psn = qp->r_psn;
1327 qp->s_ack_state = OP(SEND_ONLY); 1379 goto send_ack;
1328 qp->s_nak_state = IB_NAK_INVALID_REQUEST;
1329 qp->s_ack_psn = qp->r_psn;
1330 goto resched;
1331 1380
1332 case OP(RDMA_WRITE_FIRST): 1381 case OP(RDMA_WRITE_FIRST):
1333 case OP(RDMA_WRITE_MIDDLE): 1382 case OP(RDMA_WRITE_MIDDLE):
@@ -1337,20 +1386,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1337 break; 1386 break;
1338 goto nack_inv; 1387 goto nack_inv;
1339 1388
1340 case OP(RDMA_READ_REQUEST):
1341 case OP(COMPARE_SWAP):
1342 case OP(FETCH_ADD):
1343 /*
1344 * Drop all new requests until a response has been sent. A
1345 * new request then ACKs the RDMA response we sent. Relaxed
1346 * ordering would allow new requests to be processed but we
1347 * would need to keep a queue of rwqe's for all that are in
1348 * progress. Note that we can't RNR NAK this request since
1349 * the RDMA READ or atomic response is already queued to be
1350 * sent (unless we implement a response send queue).
1351 */
1352 goto done;
1353
1354 default: 1389 default:
1355 if (opcode == OP(SEND_MIDDLE) || 1390 if (opcode == OP(SEND_MIDDLE) ||
1356 opcode == OP(SEND_LAST) || 1391 opcode == OP(SEND_LAST) ||
@@ -1359,6 +1394,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1359 opcode == OP(RDMA_WRITE_LAST) || 1394 opcode == OP(RDMA_WRITE_LAST) ||
1360 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 1395 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1361 goto nack_inv; 1396 goto nack_inv;
1397 /*
1398 * Note that it is up to the requester to not send a new
1399 * RDMA read or atomic operation before receiving an ACK
1400 * for the previous operation.
1401 */
1362 break; 1402 break;
1363 } 1403 }
1364 1404
@@ -1375,17 +1415,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1375 * Don't queue the NAK if a RDMA read or atomic 1415 * Don't queue the NAK if a RDMA read or atomic
1376 * is pending though. 1416 * is pending though.
1377 */ 1417 */
1378 spin_lock(&qp->s_lock); 1418 if (qp->r_ack_state >= OP(COMPARE_SWAP))
1379 if (qp->s_ack_state >= 1419 goto send_ack;
1380 OP(RDMA_READ_REQUEST) && 1420 qp->r_ack_state = OP(SEND_ONLY);
1381 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { 1421 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1382 spin_unlock(&qp->s_lock); 1422 qp->r_ack_psn = qp->r_psn;
1383 goto done; 1423 goto send_ack;
1384 }
1385 qp->s_ack_state = OP(SEND_ONLY);
1386 qp->s_nak_state = IB_RNR_NAK | qp->s_min_rnr_timer;
1387 qp->s_ack_psn = qp->r_psn;
1388 goto resched;
1389 } 1424 }
1390 qp->r_rcv_len = 0; 1425 qp->r_rcv_len = 0;
1391 /* FALLTHROUGH */ 1426 /* FALLTHROUGH */
@@ -1442,7 +1477,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1442 if (unlikely(wc.byte_len > qp->r_len)) 1477 if (unlikely(wc.byte_len > qp->r_len))
1443 goto nack_inv; 1478 goto nack_inv;
1444 ipath_copy_sge(&qp->r_sge, data, tlen); 1479 ipath_copy_sge(&qp->r_sge, data, tlen);
1445 atomic_inc(&qp->msn); 1480 qp->r_msn++;
1446 if (opcode == OP(RDMA_WRITE_LAST) || 1481 if (opcode == OP(RDMA_WRITE_LAST) ||
1447 opcode == OP(RDMA_WRITE_ONLY)) 1482 opcode == OP(RDMA_WRITE_ONLY))
1448 break; 1483 break;
@@ -1486,29 +1521,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1486 ok = ipath_rkey_ok(dev, &qp->r_sge, 1521 ok = ipath_rkey_ok(dev, &qp->r_sge,
1487 qp->r_len, vaddr, rkey, 1522 qp->r_len, vaddr, rkey,
1488 IB_ACCESS_REMOTE_WRITE); 1523 IB_ACCESS_REMOTE_WRITE);
1489 if (unlikely(!ok)) { 1524 if (unlikely(!ok))
1490 nack_acc: 1525 goto nack_acc;
1491 /*
1492 * A NAK will ACK earlier sends and RDMA
1493 * writes. Don't queue the NAK if a RDMA
1494 * read, atomic, or NAK is pending though.
1495 */
1496 spin_lock(&qp->s_lock);
1497 if (qp->s_ack_state >=
1498 OP(RDMA_READ_REQUEST) &&
1499 qp->s_ack_state !=
1500 IB_OPCODE_ACKNOWLEDGE) {
1501 spin_unlock(&qp->s_lock);
1502 goto done;
1503 }
1504 /* XXX Flush WQEs */
1505 qp->state = IB_QPS_ERR;
1506 qp->s_ack_state = OP(RDMA_WRITE_ONLY);
1507 qp->s_nak_state =
1508 IB_NAK_REMOTE_ACCESS_ERROR;
1509 qp->s_ack_psn = qp->r_psn;
1510 goto resched;
1511 }
1512 } else { 1526 } else {
1513 qp->r_sge.sg_list = NULL; 1527 qp->r_sge.sg_list = NULL;
1514 qp->r_sge.sge.mr = NULL; 1528 qp->r_sge.sge.mr = NULL;
@@ -1535,12 +1549,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1535 reth = (struct ib_reth *)data; 1549 reth = (struct ib_reth *)data;
1536 data += sizeof(*reth); 1550 data += sizeof(*reth);
1537 } 1551 }
1538 spin_lock(&qp->s_lock); 1552 if (unlikely(!(qp->qp_access_flags &
1539 if (qp->s_ack_state != OP(ACKNOWLEDGE) && 1553 IB_ACCESS_REMOTE_READ)))
1540 qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) { 1554 goto nack_acc;
1541 spin_unlock(&qp->s_lock); 1555 spin_lock_irq(&qp->s_lock);
1542 goto done;
1543 }
1544 qp->s_rdma_len = be32_to_cpu(reth->length); 1556 qp->s_rdma_len = be32_to_cpu(reth->length);
1545 if (qp->s_rdma_len != 0) { 1557 if (qp->s_rdma_len != 0) {
1546 u32 rkey = be32_to_cpu(reth->rkey); 1558 u32 rkey = be32_to_cpu(reth->rkey);
@@ -1552,7 +1564,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1552 qp->s_rdma_len, vaddr, rkey, 1564 qp->s_rdma_len, vaddr, rkey,
1553 IB_ACCESS_REMOTE_READ); 1565 IB_ACCESS_REMOTE_READ);
1554 if (unlikely(!ok)) { 1566 if (unlikely(!ok)) {
1555 spin_unlock(&qp->s_lock); 1567 spin_unlock_irq(&qp->s_lock);
1556 goto nack_acc; 1568 goto nack_acc;
1557 } 1569 }
1558 /* 1570 /*
@@ -1569,21 +1581,25 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1569 qp->s_rdma_sge.sge.length = 0; 1581 qp->s_rdma_sge.sge.length = 0;
1570 qp->s_rdma_sge.sge.sge_length = 0; 1582 qp->s_rdma_sge.sge.sge_length = 0;
1571 } 1583 }
1572 if (unlikely(!(qp->qp_access_flags &
1573 IB_ACCESS_REMOTE_READ)))
1574 goto nack_acc;
1575 /* 1584 /*
1576 * We need to increment the MSN here instead of when we 1585 * We need to increment the MSN here instead of when we
1577 * finish sending the result since a duplicate request would 1586 * finish sending the result since a duplicate request would
1578 * increment it more than once. 1587 * increment it more than once.
1579 */ 1588 */
1580 atomic_inc(&qp->msn); 1589 qp->r_msn++;
1590
1581 qp->s_ack_state = opcode; 1591 qp->s_ack_state = opcode;
1582 qp->s_nak_state = 0;
1583 qp->s_ack_psn = psn; 1592 qp->s_ack_psn = psn;
1593 spin_unlock_irq(&qp->s_lock);
1594
1584 qp->r_psn++; 1595 qp->r_psn++;
1585 qp->r_state = opcode; 1596 qp->r_state = opcode;
1586 goto rdmadone; 1597 qp->r_nak_state = 0;
1598
1599 /* Call ipath_do_rc_send() in another thread. */
1600 tasklet_hi_schedule(&qp->s_task);
1601
1602 goto done;
1587 1603
1588 case OP(COMPARE_SWAP): 1604 case OP(COMPARE_SWAP):
1589 case OP(FETCH_ADD): { 1605 case OP(FETCH_ADD): {
@@ -1612,7 +1628,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1612 goto nack_acc; 1628 goto nack_acc;
1613 /* Perform atomic OP and save result. */ 1629 /* Perform atomic OP and save result. */
1614 sdata = be64_to_cpu(ateth->swap_data); 1630 sdata = be64_to_cpu(ateth->swap_data);
1615 spin_lock(&dev->pending_lock); 1631 spin_lock_irq(&dev->pending_lock);
1616 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; 1632 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
1617 if (opcode == OP(FETCH_ADD)) 1633 if (opcode == OP(FETCH_ADD))
1618 *(u64 *) qp->r_sge.sge.vaddr = 1634 *(u64 *) qp->r_sge.sge.vaddr =
@@ -1620,8 +1636,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1620 else if (qp->r_atomic_data == 1636 else if (qp->r_atomic_data ==
1621 be64_to_cpu(ateth->compare_data)) 1637 be64_to_cpu(ateth->compare_data))
1622 *(u64 *) qp->r_sge.sge.vaddr = sdata; 1638 *(u64 *) qp->r_sge.sge.vaddr = sdata;
1623 spin_unlock(&dev->pending_lock); 1639 spin_unlock_irq(&dev->pending_lock);
1624 atomic_inc(&qp->msn); 1640 qp->r_msn++;
1625 qp->r_atomic_psn = psn & IPS_PSN_MASK; 1641 qp->r_atomic_psn = psn & IPS_PSN_MASK;
1626 psn |= 1 << 31; 1642 psn |= 1 << 31;
1627 break; 1643 break;
@@ -1633,44 +1649,39 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1633 } 1649 }
1634 qp->r_psn++; 1650 qp->r_psn++;
1635 qp->r_state = opcode; 1651 qp->r_state = opcode;
1652 qp->r_nak_state = 0;
1636 /* Send an ACK if requested or required. */ 1653 /* Send an ACK if requested or required. */
1637 if (psn & (1 << 31)) { 1654 if (psn & (1 << 31)) {
1638 /* 1655 /*
1639 * Coalesce ACKs unless there is a RDMA READ or 1656 * Coalesce ACKs unless there is a RDMA READ or
1640 * ATOMIC pending. 1657 * ATOMIC pending.
1641 */ 1658 */
1642 spin_lock(&qp->s_lock); 1659 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1643 if (qp->s_ack_state == OP(ACKNOWLEDGE) || 1660 qp->r_ack_state = opcode;
1644 qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) { 1661 qp->r_ack_psn = psn;
1645 qp->s_ack_state = opcode;
1646 qp->s_nak_state = 0;
1647 qp->s_ack_psn = psn;
1648 qp->s_ack_atomic = qp->r_atomic_data;
1649 goto resched;
1650 } 1662 }
1651 spin_unlock(&qp->s_lock); 1663 goto send_ack;
1652 } 1664 }
1653done: 1665 goto done;
1654 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1655 goto bail;
1656 1666
1657resched: 1667nack_acc:
1658 /* 1668 /*
1659 * Try to send ACK right away but not if ipath_do_rc_send() is 1669 * A NAK will ACK earlier sends and RDMA writes.
1660 * active. 1670 * Don't queue the NAK if a RDMA read, atomic, or NAK
1671 * is pending though.
1661 */ 1672 */
1662 if (qp->s_hdrwords == 0 && 1673 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1663 (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST || 1674 /* XXX Flush WQEs */
1664 qp->s_ack_state >= IB_OPCODE_COMPARE_SWAP)) 1675 qp->state = IB_QPS_ERR;
1676 qp->r_ack_state = OP(RDMA_WRITE_ONLY);
1677 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1678 qp->r_ack_psn = qp->r_psn;
1679 }
1680send_ack:
1681 /* Send ACK right away unless the send tasklet has a pending ACK. */
1682 if (qp->s_ack_state == OP(ACKNOWLEDGE))
1665 send_rc_ack(qp); 1683 send_rc_ack(qp);
1666 1684
1667rdmadone: 1685done:
1668 spin_unlock(&qp->s_lock);
1669 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1670
1671 /* Call ipath_do_rc_send() in another thread. */
1672 tasklet_hi_schedule(&qp->s_task);
1673
1674bail:
1675 return; 1686 return;
1676} 1687}
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 9a456a7ce352..99c0652d49dc 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -113,20 +113,23 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
113 * 113 *
114 * Return 0 if no RWQE is available, otherwise return 1. 114 * Return 0 if no RWQE is available, otherwise return 1.
115 * 115 *
116 * Called at interrupt level with the QP r_rq.lock held. 116 * Can be called from interrupt level.
117 */ 117 */
118int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) 118int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
119{ 119{
120 unsigned long flags;
120 struct ipath_rq *rq; 121 struct ipath_rq *rq;
121 struct ipath_srq *srq; 122 struct ipath_srq *srq;
122 struct ipath_rwqe *wqe; 123 struct ipath_rwqe *wqe;
123 int ret; 124 int ret = 1;
124 125
125 if (!qp->ibqp.srq) { 126 if (!qp->ibqp.srq) {
126 rq = &qp->r_rq; 127 rq = &qp->r_rq;
128 spin_lock_irqsave(&rq->lock, flags);
129
127 if (unlikely(rq->tail == rq->head)) { 130 if (unlikely(rq->tail == rq->head)) {
128 ret = 0; 131 ret = 0;
129 goto bail; 132 goto done;
130 } 133 }
131 wqe = get_rwqe_ptr(rq, rq->tail); 134 wqe = get_rwqe_ptr(rq, rq->tail);
132 qp->r_wr_id = wqe->wr_id; 135 qp->r_wr_id = wqe->wr_id;
@@ -138,17 +141,16 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
138 } 141 }
139 if (++rq->tail >= rq->size) 142 if (++rq->tail >= rq->size)
140 rq->tail = 0; 143 rq->tail = 0;
141 ret = 1; 144 goto done;
142 goto bail;
143 } 145 }
144 146
145 srq = to_isrq(qp->ibqp.srq); 147 srq = to_isrq(qp->ibqp.srq);
146 rq = &srq->rq; 148 rq = &srq->rq;
147 spin_lock(&rq->lock); 149 spin_lock_irqsave(&rq->lock, flags);
150
148 if (unlikely(rq->tail == rq->head)) { 151 if (unlikely(rq->tail == rq->head)) {
149 spin_unlock(&rq->lock);
150 ret = 0; 152 ret = 0;
151 goto bail; 153 goto done;
152 } 154 }
153 wqe = get_rwqe_ptr(rq, rq->tail); 155 wqe = get_rwqe_ptr(rq, rq->tail);
154 qp->r_wr_id = wqe->wr_id; 156 qp->r_wr_id = wqe->wr_id;
@@ -170,18 +172,18 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
170 n = rq->head - rq->tail; 172 n = rq->head - rq->tail;
171 if (n < srq->limit) { 173 if (n < srq->limit) {
172 srq->limit = 0; 174 srq->limit = 0;
173 spin_unlock(&rq->lock); 175 spin_unlock_irqrestore(&rq->lock, flags);
174 ev.device = qp->ibqp.device; 176 ev.device = qp->ibqp.device;
175 ev.element.srq = qp->ibqp.srq; 177 ev.element.srq = qp->ibqp.srq;
176 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 178 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
177 srq->ibsrq.event_handler(&ev, 179 srq->ibsrq.event_handler(&ev,
178 srq->ibsrq.srq_context); 180 srq->ibsrq.srq_context);
179 } else 181 goto bail;
180 spin_unlock(&rq->lock); 182 }
181 } else 183 }
182 spin_unlock(&rq->lock);
183 ret = 1;
184 184
185done:
186 spin_unlock_irqrestore(&rq->lock, flags);
185bail: 187bail:
186 return ret; 188 return ret;
187} 189}
@@ -248,10 +250,8 @@ again:
248 wc.imm_data = wqe->wr.imm_data; 250 wc.imm_data = wqe->wr.imm_data;
249 /* FALLTHROUGH */ 251 /* FALLTHROUGH */
250 case IB_WR_SEND: 252 case IB_WR_SEND:
251 spin_lock_irqsave(&qp->r_rq.lock, flags);
252 if (!ipath_get_rwqe(qp, 0)) { 253 if (!ipath_get_rwqe(qp, 0)) {
253 rnr_nak: 254 rnr_nak:
254 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
255 /* Handle RNR NAK */ 255 /* Handle RNR NAK */
256 if (qp->ibqp.qp_type == IB_QPT_UC) 256 if (qp->ibqp.qp_type == IB_QPT_UC)
257 goto send_comp; 257 goto send_comp;
@@ -263,20 +263,17 @@ again:
263 sqp->s_rnr_retry--; 263 sqp->s_rnr_retry--;
264 dev->n_rnr_naks++; 264 dev->n_rnr_naks++;
265 sqp->s_rnr_timeout = 265 sqp->s_rnr_timeout =
266 ib_ipath_rnr_table[sqp->s_min_rnr_timer]; 266 ib_ipath_rnr_table[sqp->r_min_rnr_timer];
267 ipath_insert_rnr_queue(sqp); 267 ipath_insert_rnr_queue(sqp);
268 goto done; 268 goto done;
269 } 269 }
270 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
271 break; 270 break;
272 271
273 case IB_WR_RDMA_WRITE_WITH_IMM: 272 case IB_WR_RDMA_WRITE_WITH_IMM:
274 wc.wc_flags = IB_WC_WITH_IMM; 273 wc.wc_flags = IB_WC_WITH_IMM;
275 wc.imm_data = wqe->wr.imm_data; 274 wc.imm_data = wqe->wr.imm_data;
276 spin_lock_irqsave(&qp->r_rq.lock, flags);
277 if (!ipath_get_rwqe(qp, 1)) 275 if (!ipath_get_rwqe(qp, 1))
278 goto rnr_nak; 276 goto rnr_nak;
279 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
280 /* FALLTHROUGH */ 277 /* FALLTHROUGH */
281 case IB_WR_RDMA_WRITE: 278 case IB_WR_RDMA_WRITE:
282 if (wqe->length == 0) 279 if (wqe->length == 0)
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 89b3e1a5e3e3..10516842bb82 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -241,7 +241,6 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
241 u32 hdrsize; 241 u32 hdrsize;
242 u32 psn; 242 u32 psn;
243 u32 pad; 243 u32 pad;
244 unsigned long flags;
245 struct ib_wc wc; 244 struct ib_wc wc;
246 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 245 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
247 struct ib_reth *reth; 246 struct ib_reth *reth;
@@ -279,8 +278,6 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
279 wc.imm_data = 0; 278 wc.imm_data = 0;
280 wc.wc_flags = 0; 279 wc.wc_flags = 0;
281 280
282 spin_lock_irqsave(&qp->r_rq.lock, flags);
283
284 /* Compare the PSN verses the expected PSN. */ 281 /* Compare the PSN verses the expected PSN. */
285 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { 282 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
286 /* 283 /*
@@ -537,15 +534,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
537 534
538 default: 535 default:
539 /* Drop packet for unknown opcodes. */ 536 /* Drop packet for unknown opcodes. */
540 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
541 dev->n_pkt_drops++; 537 dev->n_pkt_drops++;
542 goto bail; 538 goto done;
543 } 539 }
544 qp->r_psn++; 540 qp->r_psn++;
545 qp->r_state = opcode; 541 qp->r_state = opcode;
546done: 542done:
547 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
548
549bail:
550 return; 543 return;
551} 544}
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 1cb797086679..2df684727dc1 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -307,32 +307,34 @@ struct ipath_qp {
307 u32 s_next_psn; /* PSN for next request */ 307 u32 s_next_psn; /* PSN for next request */
308 u32 s_last_psn; /* last response PSN processed */ 308 u32 s_last_psn; /* last response PSN processed */
309 u32 s_psn; /* current packet sequence number */ 309 u32 s_psn; /* current packet sequence number */
310 u32 s_ack_psn; /* PSN for RDMA_READ */
310 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ 311 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
311 u32 s_ack_psn; /* PSN for next ACK or RDMA_READ */ 312 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
312 u64 s_ack_atomic; /* data for atomic ACK */
313 u64 r_wr_id; /* ID for current receive WQE */ 313 u64 r_wr_id; /* ID for current receive WQE */
314 u64 r_atomic_data; /* data for last atomic op */ 314 u64 r_atomic_data; /* data for last atomic op */
315 u32 r_atomic_psn; /* PSN of last atomic op */ 315 u32 r_atomic_psn; /* PSN of last atomic op */
316 u32 r_len; /* total length of r_sge */ 316 u32 r_len; /* total length of r_sge */
317 u32 r_rcv_len; /* receive data len processed */ 317 u32 r_rcv_len; /* receive data len processed */
318 u32 r_psn; /* expected rcv packet sequence number */ 318 u32 r_psn; /* expected rcv packet sequence number */
319 u32 r_msn; /* message sequence number */
319 u8 state; /* QP state */ 320 u8 state; /* QP state */
320 u8 s_state; /* opcode of last packet sent */ 321 u8 s_state; /* opcode of last packet sent */
321 u8 s_ack_state; /* opcode of packet to ACK */ 322 u8 s_ack_state; /* opcode of packet to ACK */
322 u8 s_nak_state; /* non-zero if NAK is pending */ 323 u8 s_nak_state; /* non-zero if NAK is pending */
323 u8 r_state; /* opcode of last packet received */ 324 u8 r_state; /* opcode of last packet received */
325 u8 r_ack_state; /* opcode of packet to ACK */
326 u8 r_nak_state; /* non-zero if NAK is pending */
327 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
324 u8 r_reuse_sge; /* for UC receive errors */ 328 u8 r_reuse_sge; /* for UC receive errors */
325 u8 r_sge_inx; /* current index into sg_list */ 329 u8 r_sge_inx; /* current index into sg_list */
326 u8 s_max_sge; /* size of s_wq->sg_list */
327 u8 qp_access_flags; 330 u8 qp_access_flags;
331 u8 s_max_sge; /* size of s_wq->sg_list */
328 u8 s_retry_cnt; /* number of times to retry */ 332 u8 s_retry_cnt; /* number of times to retry */
329 u8 s_rnr_retry_cnt; 333 u8 s_rnr_retry_cnt;
330 u8 s_min_rnr_timer;
331 u8 s_retry; /* requester retry counter */ 334 u8 s_retry; /* requester retry counter */
332 u8 s_rnr_retry; /* requester RNR retry counter */ 335 u8 s_rnr_retry; /* requester RNR retry counter */
333 u8 s_pkey_index; /* PKEY index to use */ 336 u8 s_pkey_index; /* PKEY index to use */
334 enum ib_mtu path_mtu; 337 enum ib_mtu path_mtu;
335 atomic_t msn; /* message sequence number */
336 u32 remote_qpn; 338 u32 remote_qpn;
337 u32 qkey; /* QKEY for this QP (for UD or RD) */ 339 u32 qkey; /* QKEY for this QP (for UD or RD) */
338 u32 s_size; /* send work queue size */ 340 u32 s_size; /* send work queue size */