aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@qlogic.com>2011-01-10 20:42:22 -0500
committerRoland Dreier <rolandd@cisco.com>2011-01-10 20:42:22 -0500
commit994bcd28a36af1413381dfe0aac065e2cbc2af40 (patch)
tree26f4685a39d94dbfbab346862061ef6f577653f2 /drivers/infiniband
parent2a600f14d25fda341b5633c75cc50a7574fc1007 (diff)
IB/qib: Issue pre-emptive NAKs on eager buffer overflow
Under congestion resulting in eager buffer overflow attempt to send pre-emptive NAKs if header queue entries with TID errors are generated and a valid header is present. This prevents long timeouts and flow restarts if a trailing set of packets are dropped due to eager overflows. Pre-emptive NAKs are currently only supported for RDMA writes. Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c141
1 files changed, 137 insertions, 4 deletions
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 816a6bdc0b1c..23e584f4c36c 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -289,14 +289,147 @@ static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
289 * Returns 1 if error was a CRC, else 0. 289 * Returns 1 if error was a CRC, else 0.
290 * Needed for some chip's synthesized error counters. 290 * Needed for some chip's synthesized error counters.
291 */ 291 */
292static u32 qib_rcv_hdrerr(struct qib_pportdata *ppd, u32 ctxt, 292static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
293 u32 eflags, u32 l, u32 etail, __le32 *rhf_addr, 293 u32 ctxt, u32 eflags, u32 l, u32 etail,
294 struct qib_message_header *hdr) 294 __le32 *rhf_addr, struct qib_message_header *rhdr)
295{ 295{
296 u32 ret = 0; 296 u32 ret = 0;
297 297
298 if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR)) 298 if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
299 ret = 1; 299 ret = 1;
300 else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
301 /* For TIDERR and RC QPs premptively schedule a NAK */
302 struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
303 struct qib_other_headers *ohdr = NULL;
304 struct qib_ibport *ibp = &ppd->ibport_data;
305 struct qib_qp *qp = NULL;
306 u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
307 u16 lid = be16_to_cpu(hdr->lrh[1]);
308 int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
309 u32 qp_num;
310 u32 opcode;
311 u32 psn;
312 int diff;
313 unsigned long flags;
314
315 /* Sanity check packet */
316 if (tlen < 24)
317 goto drop;
318
319 if (lid < QIB_MULTICAST_LID_BASE) {
320 lid &= ~((1 << ppd->lmc) - 1);
321 if (unlikely(lid != ppd->lid))
322 goto drop;
323 }
324
325 /* Check for GRH */
326 if (lnh == QIB_LRH_BTH)
327 ohdr = &hdr->u.oth;
328 else if (lnh == QIB_LRH_GRH) {
329 u32 vtf;
330
331 ohdr = &hdr->u.l.oth;
332 if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
333 goto drop;
334 vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
335 if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
336 goto drop;
337 } else
338 goto drop;
339
340 /* Get opcode and PSN from packet */
341 opcode = be32_to_cpu(ohdr->bth[0]);
342 opcode >>= 24;
343 psn = be32_to_cpu(ohdr->bth[2]);
344
345 /* Get the destination QP number. */
346 qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
347 if (qp_num != QIB_MULTICAST_QPN) {
348 int ruc_res;
349 qp = qib_lookup_qpn(ibp, qp_num);
350 if (!qp)
351 goto drop;
352
353 /*
354 * Handle only RC QPs - for other QP types drop error
355 * packet.
356 */
357 spin_lock(&qp->r_lock);
358
359 /* Check for valid receive state. */
360 if (!(ib_qib_state_ops[qp->state] &
361 QIB_PROCESS_RECV_OK)) {
362 ibp->n_pkt_drops++;
363 goto unlock;
364 }
365
366 switch (qp->ibqp.qp_type) {
367 case IB_QPT_RC:
368 spin_lock_irqsave(&qp->s_lock, flags);
369 ruc_res =
370 qib_ruc_check_hdr(
371 ibp, hdr,
372 lnh == QIB_LRH_GRH,
373 qp,
374 be32_to_cpu(ohdr->bth[0]));
375 if (ruc_res) {
376 spin_unlock_irqrestore(&qp->s_lock,
377 flags);
378 goto unlock;
379 }
380 spin_unlock_irqrestore(&qp->s_lock, flags);
381
382 /* Only deal with RDMA Writes for now */
383 if (opcode <
384 IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
385 diff = qib_cmp24(psn, qp->r_psn);
386 if (!qp->r_nak_state && diff >= 0) {
387 ibp->n_rc_seqnak++;
388 qp->r_nak_state =
389 IB_NAK_PSN_ERROR;
390 /* Use the expected PSN. */
391 qp->r_ack_psn = qp->r_psn;
392 /*
393 * Wait to send the sequence
394 * NAK until all packets
395 * in the receive queue have
396 * been processed.
397 * Otherwise, we end up
398 * propagating congestion.
399 */
400 if (list_empty(&qp->rspwait)) {
401 qp->r_flags |=
402 QIB_R_RSP_NAK;
403 atomic_inc(
404 &qp->refcount);
405 list_add_tail(
406 &qp->rspwait,
407 &rcd->qp_wait_list);
408 }
409 } /* Out of sequence NAK */
410 } /* QP Request NAKs */
411 break;
412 case IB_QPT_SMI:
413 case IB_QPT_GSI:
414 case IB_QPT_UD:
415 case IB_QPT_UC:
416 default:
417 /* For now don't handle any other QP types */
418 break;
419 }
420
421unlock:
422 spin_unlock(&qp->r_lock);
423 /*
424 * Notify qib_destroy_qp() if it is waiting
425 * for us to finish.
426 */
427 if (atomic_dec_and_test(&qp->refcount))
428 wake_up(&qp->wait);
429 } /* Unicast QP */
430 } /* Valid packet with TIDErr */
431
432drop:
300 return ret; 433 return ret;
301} 434}
302 435
@@ -376,7 +509,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
376 * packets; only qibhdrerr should be set. 509 * packets; only qibhdrerr should be set.
377 */ 510 */
378 if (unlikely(eflags)) 511 if (unlikely(eflags))
379 crcs += qib_rcv_hdrerr(ppd, rcd->ctxt, eflags, l, 512 crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
380 etail, rhf_addr, hdr); 513 etail, rhf_addr, hdr);
381 else if (etype == RCVHQ_RCV_TYPE_NON_KD) { 514 else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
382 qib_ib_rcv(rcd, hdr, ebuf, tlen); 515 qib_ib_rcv(rcd, hdr, ebuf, tlen);