aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ehca/ehca_reqs.c
diff options
context:
space:
mode:
authorJoachim Fenkes <fenkes@de.ibm.com>2008-01-17 09:07:24 -0500
committerRoland Dreier <rolandd@cisco.com>2008-01-25 17:15:44 -0500
commit2ec8e662416cc9a171cdfe3d75e1ff00ba757859 (patch)
treedbc881205c3177db9a7816385b4fdda762bdb04c /drivers/infiniband/hw/ehca/ehca_reqs.c
parentbbdd267ef2796e96b461b8447b2026ce06e6ec4b (diff)
IB/ehca: Prevent RDMA-related connection failures on some eHCA2 hardware
Some HW revisions of eHCA2 may cause an RC connection to break if they received RDMA Reads over that connection before. This can be prevented by assuring that, after the first RDMA Read, the QP receives a new RDMA Read every few million link packets. Include code into the driver that inserts an empty (size 0) RDMA Read into the message stream every now and then if the consumer doesn't post them frequently enough. Signed-off-by: Joachim Fenkes <fenkes@de.ibm.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ehca/ehca_reqs.c')
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c112
1 files changed, 80 insertions, 32 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index ea91360835d..3aacc8cf1e4 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -50,6 +50,9 @@
50#include "hcp_if.h" 50#include "hcp_if.h"
51#include "hipz_fns.h" 51#include "hipz_fns.h"
52 52
53/* in RC traffic, insert an empty RDMA READ every this many packets */
54#define ACK_CIRC_THRESHOLD 2000000
55
53static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, 56static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
54 struct ehca_wqe *wqe_p, 57 struct ehca_wqe *wqe_p,
55 struct ib_recv_wr *recv_wr) 58 struct ib_recv_wr *recv_wr)
@@ -81,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
81 if (ehca_debug_level) { 84 if (ehca_debug_level) {
82 ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", 85 ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
83 ipz_rqueue); 86 ipz_rqueue);
84 ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); 87 ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
85 } 88 }
86 89
87 return 0; 90 return 0;
@@ -135,7 +138,8 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
135 138
136static inline int ehca_write_swqe(struct ehca_qp *qp, 139static inline int ehca_write_swqe(struct ehca_qp *qp,
137 struct ehca_wqe *wqe_p, 140 struct ehca_wqe *wqe_p,
138 const struct ib_send_wr *send_wr) 141 const struct ib_send_wr *send_wr,
142 int hidden)
139{ 143{
140 u32 idx; 144 u32 idx;
141 u64 dma_length; 145 u64 dma_length;
@@ -176,7 +180,9 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
176 180
177 wqe_p->wr_flag = 0; 181 wqe_p->wr_flag = 0;
178 182
179 if (send_wr->send_flags & IB_SEND_SIGNALED) 183 if ((send_wr->send_flags & IB_SEND_SIGNALED ||
184 qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
185 && !hidden)
180 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; 186 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
181 187
182 if (send_wr->opcode == IB_WR_SEND_WITH_IMM || 188 if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
@@ -199,7 +205,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
199 205
200 wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; 206 wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
201 wqe_p->local_ee_context_qkey = remote_qkey; 207 wqe_p->local_ee_context_qkey = remote_qkey;
202 if (!send_wr->wr.ud.ah) { 208 if (unlikely(!send_wr->wr.ud.ah)) {
203 ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); 209 ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
204 return -EINVAL; 210 return -EINVAL;
205 } 211 }
@@ -255,6 +261,15 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
255 } /* eof idx */ 261 } /* eof idx */
256 wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; 262 wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
257 263
264 /* unsolicited ack circumvention */
265 if (send_wr->opcode == IB_WR_RDMA_READ) {
266 /* on RDMA read, switch on and reset counters */
267 qp->message_count = qp->packet_count = 0;
268 qp->unsol_ack_circ = 1;
269 } else
270 /* else estimate #packets */
271 qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
272
258 break; 273 break;
259 274
260 default: 275 default:
@@ -355,13 +370,49 @@ static inline void map_ib_wc_status(u32 cqe_status,
355 *wc_status = IB_WC_SUCCESS; 370 *wc_status = IB_WC_SUCCESS;
356} 371}
357 372
373static inline int post_one_send(struct ehca_qp *my_qp,
374 struct ib_send_wr *cur_send_wr,
375 struct ib_send_wr **bad_send_wr,
376 int hidden)
377{
378 struct ehca_wqe *wqe_p;
379 int ret;
380 u64 start_offset = my_qp->ipz_squeue.current_q_offset;
381
382 /* get pointer next to free WQE */
383 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
384 if (unlikely(!wqe_p)) {
385 /* too many posted work requests: queue overflow */
386 if (bad_send_wr)
387 *bad_send_wr = cur_send_wr;
388 ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
389 "qp_num=%x", my_qp->ib_qp.qp_num);
390 return -ENOMEM;
391 }
392 /* write a SEND WQE into the QUEUE */
393 ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden);
394 /*
395 * if something failed,
396 * reset the free entry pointer to the start value
397 */
398 if (unlikely(ret)) {
399 my_qp->ipz_squeue.current_q_offset = start_offset;
400 if (bad_send_wr)
401 *bad_send_wr = cur_send_wr;
402 ehca_err(my_qp->ib_qp.device, "Could not write WQE "
403 "qp_num=%x", my_qp->ib_qp.qp_num);
404 return -EINVAL;
405 }
406
407 return 0;
408}
409
358int ehca_post_send(struct ib_qp *qp, 410int ehca_post_send(struct ib_qp *qp,
359 struct ib_send_wr *send_wr, 411 struct ib_send_wr *send_wr,
360 struct ib_send_wr **bad_send_wr) 412 struct ib_send_wr **bad_send_wr)
361{ 413{
362 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); 414 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
363 struct ib_send_wr *cur_send_wr; 415 struct ib_send_wr *cur_send_wr;
364 struct ehca_wqe *wqe_p;
365 int wqe_cnt = 0; 416 int wqe_cnt = 0;
366 int ret = 0; 417 int ret = 0;
367 unsigned long flags; 418 unsigned long flags;
@@ -369,37 +420,33 @@ int ehca_post_send(struct ib_qp *qp,
369 /* LOCK the QUEUE */ 420 /* LOCK the QUEUE */
370 spin_lock_irqsave(&my_qp->spinlock_s, flags); 421 spin_lock_irqsave(&my_qp->spinlock_s, flags);
371 422
423 /* Send an empty extra RDMA read if:
424 * 1) there has been an RDMA read on this connection before
425 * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
426 * 3) we can be sure that any previous extra RDMA read has been
427 * processed so we don't overflow the SQ
428 */
429 if (unlikely(my_qp->unsol_ack_circ &&
430 my_qp->packet_count > ACK_CIRC_THRESHOLD &&
431 my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
432 /* insert an empty RDMA READ to fix up the remote QP state */
433 struct ib_send_wr circ_wr;
434 memset(&circ_wr, 0, sizeof(circ_wr));
435 circ_wr.opcode = IB_WR_RDMA_READ;
436 post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
437 wqe_cnt++;
438 ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
439 my_qp->message_count = my_qp->packet_count = 0;
440 }
441
372 /* loop processes list of send reqs */ 442 /* loop processes list of send reqs */
373 for (cur_send_wr = send_wr; cur_send_wr != NULL; 443 for (cur_send_wr = send_wr; cur_send_wr != NULL;
374 cur_send_wr = cur_send_wr->next) { 444 cur_send_wr = cur_send_wr->next) {
375 u64 start_offset = my_qp->ipz_squeue.current_q_offset; 445 ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
376 /* get pointer next to free WQE */
377 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
378 if (unlikely(!wqe_p)) {
379 /* too many posted work requests: queue overflow */
380 if (bad_send_wr)
381 *bad_send_wr = cur_send_wr;
382 if (wqe_cnt == 0) {
383 ret = -ENOMEM;
384 ehca_err(qp->device, "Too many posted WQEs "
385 "qp_num=%x", qp->qp_num);
386 }
387 goto post_send_exit0;
388 }
389 /* write a SEND WQE into the QUEUE */
390 ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
391 /*
392 * if something failed,
393 * reset the free entry pointer to the start value
394 */
395 if (unlikely(ret)) { 446 if (unlikely(ret)) {
396 my_qp->ipz_squeue.current_q_offset = start_offset; 447 /* if one or more WQEs were successful, don't fail */
397 *bad_send_wr = cur_send_wr; 448 if (wqe_cnt)
398 if (wqe_cnt == 0) { 449 ret = 0;
399 ret = -EINVAL;
400 ehca_err(qp->device, "Could not write WQE "
401 "qp_num=%x", qp->qp_num);
402 }
403 goto post_send_exit0; 450 goto post_send_exit0;
404 } 451 }
405 wqe_cnt++; 452 wqe_cnt++;
@@ -410,6 +457,7 @@ int ehca_post_send(struct ib_qp *qp,
410post_send_exit0: 457post_send_exit0:
411 iosync(); /* serialize GAL register access */ 458 iosync(); /* serialize GAL register access */
412 hipz_update_sqa(my_qp, wqe_cnt); 459 hipz_update_sqa(my_qp, wqe_cnt);
460 my_qp->message_count += wqe_cnt;
413 spin_unlock_irqrestore(&my_qp->spinlock_s, flags); 461 spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
414 return ret; 462 return ret;
415} 463}