aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorSelvin Xavier <selvin.xavier@broadcom.com>2018-03-06 00:49:28 -0500
committerJason Gunthorpe <jgg@mellanox.com>2018-03-06 22:08:39 -0500
commit942c9b6ca8de5b7ad675e9b2e0e964449c10c18a (patch)
treecc90b8e8e843551f5ffc3c35c2e99142b0a4c761 /drivers
parentd3b9e8ad425cfd5b9116732e057f1b48e4d3bcb8 (diff)
RDMA/bnxt_re: Avoid Hard lockup during error CQE processing
Hitting the following hardlockup due to a race condition in error CQE processing. [26146.879798] bnxt_en 0000:04:00.0: QPLIB: FP: CQ Processed Req [26146.886346] bnxt_en 0000:04:00.0: QPLIB: wr_id[1251] = 0x0 with status 0xa [26156.350935] NMI watchdog: Watchdog detected hard LOCKUP on cpu 4 [26156.357470] Modules linked in: nfsd auth_rpcgss nfs_acl lockd grace [26156.447957] CPU: 4 PID: 3413 Comm: kworker/4:1H Kdump: loaded [26156.457994] Hardware name: Dell Inc. PowerEdge R430/0CN7X8, [26156.466390] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] [26156.472639] Call Trace: [26156.475379] <NMI> [<ffffffff98d0d722>] dump_stack+0x19/0x1b [26156.481833] [<ffffffff9873f775>] watchdog_overflow_callback+0x135/0x140 [26156.489341] [<ffffffff9877f237>] __perf_event_overflow+0x57/0x100 [26156.496256] [<ffffffff98787c24>] perf_event_overflow+0x14/0x20 [26156.502887] [<ffffffff9860a580>] intel_pmu_handle_irq+0x220/0x510 [26156.509813] [<ffffffff98d16031>] perf_event_nmi_handler+0x31/0x50 [26156.516738] [<ffffffff98d1790c>] nmi_handle.isra.0+0x8c/0x150 [26156.523273] [<ffffffff98d17be8>] do_nmi+0x218/0x460 [26156.528834] [<ffffffff98d16d79>] end_repeat_nmi+0x1e/0x7e [26156.534980] [<ffffffff987089c0>] ? native_queued_spin_lock_slowpath+0x1d0/0x200 [26156.543268] [<ffffffff987089c0>] ? native_queued_spin_lock_slowpath+0x1d0/0x200 [26156.551556] [<ffffffff987089c0>] ? native_queued_spin_lock_slowpath+0x1d0/0x200 [26156.559842] <EOE> [<ffffffff98d083e4>] queued_spin_lock_slowpath+0xb/0xf [26156.567555] [<ffffffff98d15690>] _raw_spin_lock+0x20/0x30 [26156.573696] [<ffffffffc08381a1>] bnxt_qplib_lock_buddy_cq+0x31/0x40 [bnxt_re] [26156.581789] [<ffffffffc083bbaa>] bnxt_qplib_poll_cq+0x43a/0xf10 [bnxt_re] [26156.589493] [<ffffffffc083239b>] bnxt_re_poll_cq+0x9b/0x760 [bnxt_re] The issue happens if RQ poll_cq or SQ poll_cq or Async error event tries to put the error QP in flush list. Since SQ and RQ of each error qp are added to two different flush list, we need to protect it using locks of corresponding CQs. Difference in order of acquiring the lock in SQ poll_cq and RQ poll_cq can cause a hard lockup. Revisits the locking strategy and removes the usage of qplib_cq.hwq.lock. Instead of this lock, introduces qplib_cq.flush_lock to handle addition/deletion of QPs in flush list. Also, always invoke the flush_lock in order (SQ CQ lock first and then RQ CQ lock) to avoid any potential deadlock. Other than the poll_cq context, the movement of QP to/from flush list can be done in modify_qp context or from an async error event from HW. Synchronize these operations using the bnxt_re verbs layer CQ locks. To achieve this, adds a call back to the HW abstraction layer(qplib) to bnxt_re ib_verbs layer in case of async error event. Also, removes the buddy cq functions as it is no longer required. Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com> Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com> Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c7
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c109
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c3
6 files changed, 55 insertions, 90 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 755f1ccd82bb..0dd75f449872 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -785,7 +785,7 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
785 return 0; 785 return 0;
786} 786}
787 787
788static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp) 788unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
789 __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock) 789 __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
790{ 790{
791 unsigned long flags; 791 unsigned long flags;
@@ -799,8 +799,8 @@ static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
799 return flags; 799 return flags;
800} 800}
801 801
802static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, 802void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
803 unsigned long flags) 803 unsigned long flags)
804 __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock) 804 __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
805{ 805{
806 if (qp->rcq != qp->scq) 806 if (qp->rcq != qp->scq)
@@ -1606,6 +1606,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
1606 int status; 1606 int status;
1607 union ib_gid sgid; 1607 union ib_gid sgid;
1608 struct ib_gid_attr sgid_attr; 1608 struct ib_gid_attr sgid_attr;
1609 unsigned int flags;
1609 u8 nw_type; 1610 u8 nw_type;
1610 1611
1611 qp->qplib_qp.modify_flags = 0; 1612 qp->qplib_qp.modify_flags = 0;
@@ -1634,14 +1635,18 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
1634 dev_dbg(rdev_to_dev(rdev), 1635 dev_dbg(rdev_to_dev(rdev),
1635 "Move QP = %p to flush list\n", 1636 "Move QP = %p to flush list\n",
1636 qp); 1637 qp);
1638 flags = bnxt_re_lock_cqs(qp);
1637 bnxt_qplib_add_flush_qp(&qp->qplib_qp); 1639 bnxt_qplib_add_flush_qp(&qp->qplib_qp);
1640 bnxt_re_unlock_cqs(qp, flags);
1638 } 1641 }
1639 if (!qp->sumem && 1642 if (!qp->sumem &&
1640 qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { 1643 qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
1641 dev_dbg(rdev_to_dev(rdev), 1644 dev_dbg(rdev_to_dev(rdev),
1642 "Move QP = %p out of flush list\n", 1645 "Move QP = %p out of flush list\n",
1643 qp); 1646 qp);
1647 flags = bnxt_re_lock_cqs(qp);
1644 bnxt_qplib_clean_qp(&qp->qplib_qp); 1648 bnxt_qplib_clean_qp(&qp->qplib_qp);
1649 bnxt_re_unlock_cqs(qp, flags);
1645 } 1650 }
1646 } 1651 }
1647 if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { 1652 if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index b88a48d43a9d..e62b7c2c7da6 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -222,4 +222,7 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
222 struct ib_udata *udata); 222 struct ib_udata *udata);
223int bnxt_re_dealloc_ucontext(struct ib_ucontext *context); 223int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
224int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); 224int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
225
226unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
227void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags);
225#endif /* __BNXT_RE_IB_VERBS_H__ */ 228#endif /* __BNXT_RE_IB_VERBS_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 604c805ceaa7..f6e361750466 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -730,6 +730,13 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
730 struct bnxt_re_qp *qp) 730 struct bnxt_re_qp *qp)
731{ 731{
732 struct ib_event event; 732 struct ib_event event;
733 unsigned int flags;
734
735 if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
736 flags = bnxt_re_lock_cqs(qp);
737 bnxt_qplib_add_flush_qp(&qp->qplib_qp);
738 bnxt_re_unlock_cqs(qp, flags);
739 }
733 740
734 memset(&event, 0, sizeof(event)); 741 memset(&event, 0, sizeof(event));
735 if (qp->qplib_qp.srq) { 742 if (qp->qplib_qp.srq) {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 3ea5b9624f6b..06b42c880fd4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -88,75 +88,35 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
88 } 88 }
89} 89}
90 90
91void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp, 91static void bnxt_qplib_acquire_cq_flush_locks(struct bnxt_qplib_qp *qp,
92 unsigned long *flags) 92 unsigned long *flags)
93 __acquires(&qp->scq->hwq.lock) __acquires(&qp->rcq->hwq.lock) 93 __acquires(&qp->scq->flush_lock) __acquires(&qp->rcq->flush_lock)
94{ 94{
95 spin_lock_irqsave(&qp->scq->hwq.lock, *flags); 95 spin_lock_irqsave(&qp->scq->flush_lock, *flags);
96 if (qp->scq == qp->rcq) 96 if (qp->scq == qp->rcq)
97 __acquire(&qp->rcq->hwq.lock); 97 __acquire(&qp->rcq->flush_lock);
98 else 98 else
99 spin_lock(&qp->rcq->hwq.lock); 99 spin_lock(&qp->rcq->flush_lock);
100} 100}
101 101
102void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp, 102static void bnxt_qplib_release_cq_flush_locks(struct bnxt_qplib_qp *qp,
103 unsigned long *flags) 103 unsigned long *flags)
104 __releases(&qp->scq->hwq.lock) __releases(&qp->rcq->hwq.lock) 104 __releases(&qp->scq->flush_lock) __releases(&qp->rcq->flush_lock)
105{ 105{
106 if (qp->scq == qp->rcq) 106 if (qp->scq == qp->rcq)
107 __release(&qp->rcq->hwq.lock); 107 __release(&qp->rcq->flush_lock);
108 else 108 else
109 spin_unlock(&qp->rcq->hwq.lock); 109 spin_unlock(&qp->rcq->flush_lock);
110 spin_unlock_irqrestore(&qp->scq->hwq.lock, *flags); 110 spin_unlock_irqrestore(&qp->scq->flush_lock, *flags);
111}
112
113static struct bnxt_qplib_cq *bnxt_qplib_find_buddy_cq(struct bnxt_qplib_qp *qp,
114 struct bnxt_qplib_cq *cq)
115{
116 struct bnxt_qplib_cq *buddy_cq = NULL;
117
118 if (qp->scq == qp->rcq)
119 buddy_cq = NULL;
120 else if (qp->scq == cq)
121 buddy_cq = qp->rcq;
122 else
123 buddy_cq = qp->scq;
124 return buddy_cq;
125}
126
127static void bnxt_qplib_lock_buddy_cq(struct bnxt_qplib_qp *qp,
128 struct bnxt_qplib_cq *cq)
129 __acquires(&buddy_cq->hwq.lock)
130{
131 struct bnxt_qplib_cq *buddy_cq = NULL;
132
133 buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
134 if (!buddy_cq)
135 __acquire(&cq->hwq.lock);
136 else
137 spin_lock(&buddy_cq->hwq.lock);
138}
139
140static void bnxt_qplib_unlock_buddy_cq(struct bnxt_qplib_qp *qp,
141 struct bnxt_qplib_cq *cq)
142 __releases(&buddy_cq->hwq.lock)
143{
144 struct bnxt_qplib_cq *buddy_cq = NULL;
145
146 buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
147 if (!buddy_cq)
148 __release(&cq->hwq.lock);
149 else
150 spin_unlock(&buddy_cq->hwq.lock);
151} 111}
152 112
153void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp) 113void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
154{ 114{
155 unsigned long flags; 115 unsigned long flags;
156 116
157 bnxt_qplib_acquire_cq_locks(qp, &flags); 117 bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
158 __bnxt_qplib_add_flush_qp(qp); 118 __bnxt_qplib_add_flush_qp(qp);
159 bnxt_qplib_release_cq_locks(qp, &flags); 119 bnxt_qplib_release_cq_flush_locks(qp, &flags);
160} 120}
161 121
162static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) 122static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
@@ -177,7 +137,7 @@ void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
177{ 137{
178 unsigned long flags; 138 unsigned long flags;
179 139
180 bnxt_qplib_acquire_cq_locks(qp, &flags); 140 bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
181 __clean_cq(qp->scq, (u64)(unsigned long)qp); 141 __clean_cq(qp->scq, (u64)(unsigned long)qp);
182 qp->sq.hwq.prod = 0; 142 qp->sq.hwq.prod = 0;
183 qp->sq.hwq.cons = 0; 143 qp->sq.hwq.cons = 0;
@@ -186,7 +146,7 @@ void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
186 qp->rq.hwq.cons = 0; 146 qp->rq.hwq.cons = 0;
187 147
188 __bnxt_qplib_del_flush_qp(qp); 148 __bnxt_qplib_del_flush_qp(qp);
189 bnxt_qplib_release_cq_locks(qp, &flags); 149 bnxt_qplib_release_cq_flush_locks(qp, &flags);
190} 150}
191 151
192static void bnxt_qpn_cqn_sched_task(struct work_struct *work) 152static void bnxt_qpn_cqn_sched_task(struct work_struct *work)
@@ -2107,9 +2067,6 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
2107 /* Must block new posting of SQ and RQ */ 2067 /* Must block new posting of SQ and RQ */
2108 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; 2068 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
2109 bnxt_qplib_cancel_phantom_processing(qp); 2069 bnxt_qplib_cancel_phantom_processing(qp);
2110
2111 /* Add qp to flush list of the CQ */
2112 __bnxt_qplib_add_flush_qp(qp);
2113} 2070}
2114 2071
2115/* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive) 2072/* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
@@ -2285,9 +2242,9 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
2285 sw_sq_cons, cqe->wr_id, cqe->status); 2242 sw_sq_cons, cqe->wr_id, cqe->status);
2286 cqe++; 2243 cqe++;
2287 (*budget)--; 2244 (*budget)--;
2288 bnxt_qplib_lock_buddy_cq(qp, cq);
2289 bnxt_qplib_mark_qp_error(qp); 2245 bnxt_qplib_mark_qp_error(qp);
2290 bnxt_qplib_unlock_buddy_cq(qp, cq); 2246 /* Add qp to flush list of the CQ */
2247 bnxt_qplib_add_flush_qp(qp);
2291 } else { 2248 } else {
2292 if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) { 2249 if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
2293 /* Before we complete, do WA 9060 */ 2250 /* Before we complete, do WA 9060 */
@@ -2403,9 +2360,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
2403 if (hwcqe->status != CQ_RES_RC_STATUS_OK) { 2360 if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
2404 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; 2361 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
2405 /* Add qp to flush list of the CQ */ 2362 /* Add qp to flush list of the CQ */
2406 bnxt_qplib_lock_buddy_cq(qp, cq); 2363 bnxt_qplib_add_flush_qp(qp);
2407 __bnxt_qplib_add_flush_qp(qp);
2408 bnxt_qplib_unlock_buddy_cq(qp, cq);
2409 } 2364 }
2410 } 2365 }
2411 2366
@@ -2489,9 +2444,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
2489 if (hwcqe->status != CQ_RES_RC_STATUS_OK) { 2444 if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
2490 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; 2445 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
2491 /* Add qp to flush list of the CQ */ 2446 /* Add qp to flush list of the CQ */
2492 bnxt_qplib_lock_buddy_cq(qp, cq); 2447 bnxt_qplib_add_flush_qp(qp);
2493 __bnxt_qplib_add_flush_qp(qp);
2494 bnxt_qplib_unlock_buddy_cq(qp, cq);
2495 } 2448 }
2496 } 2449 }
2497done: 2450done:
@@ -2501,11 +2454,9 @@ done:
2501bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq) 2454bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
2502{ 2455{
2503 struct cq_base *hw_cqe, **hw_cqe_ptr; 2456 struct cq_base *hw_cqe, **hw_cqe_ptr;
2504 unsigned long flags;
2505 u32 sw_cons, raw_cons; 2457 u32 sw_cons, raw_cons;
2506 bool rc = true; 2458 bool rc = true;
2507 2459
2508 spin_lock_irqsave(&cq->hwq.lock, flags);
2509 raw_cons = cq->hwq.cons; 2460 raw_cons = cq->hwq.cons;
2510 sw_cons = HWQ_CMP(raw_cons, &cq->hwq); 2461 sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
2511 hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; 2462 hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
@@ -2513,7 +2464,6 @@ bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
2513 2464
2514 /* Check for Valid bit. If the CQE is valid, return false */ 2465 /* Check for Valid bit. If the CQE is valid, return false */
2515 rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements); 2466 rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
2516 spin_unlock_irqrestore(&cq->hwq.lock, flags);
2517 return rc; 2467 return rc;
2518} 2468}
2519 2469
@@ -2602,9 +2552,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
2602 if (hwcqe->status != CQ_RES_RC_STATUS_OK) { 2552 if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
2603 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; 2553 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
2604 /* Add qp to flush list of the CQ */ 2554 /* Add qp to flush list of the CQ */
2605 bnxt_qplib_lock_buddy_cq(qp, cq); 2555 bnxt_qplib_add_flush_qp(qp);
2606 __bnxt_qplib_add_flush_qp(qp);
2607 bnxt_qplib_unlock_buddy_cq(qp, cq);
2608 } 2556 }
2609 } 2557 }
2610 2558
@@ -2719,9 +2667,7 @@ do_rq:
2719 */ 2667 */
2720 2668
2721 /* Add qp to flush list of the CQ */ 2669 /* Add qp to flush list of the CQ */
2722 bnxt_qplib_lock_buddy_cq(qp, cq); 2670 bnxt_qplib_add_flush_qp(qp);
2723 __bnxt_qplib_add_flush_qp(qp);
2724 bnxt_qplib_unlock_buddy_cq(qp, cq);
2725done: 2671done:
2726 return rc; 2672 return rc;
2727} 2673}
@@ -2750,7 +2696,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
2750 u32 budget = num_cqes; 2696 u32 budget = num_cqes;
2751 unsigned long flags; 2697 unsigned long flags;
2752 2698
2753 spin_lock_irqsave(&cq->hwq.lock, flags); 2699 spin_lock_irqsave(&cq->flush_lock, flags);
2754 list_for_each_entry(qp, &cq->sqf_head, sq_flush) { 2700 list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
2755 dev_dbg(&cq->hwq.pdev->dev, 2701 dev_dbg(&cq->hwq.pdev->dev,
2756 "QPLIB: FP: Flushing SQ QP= %p", 2702 "QPLIB: FP: Flushing SQ QP= %p",
@@ -2764,7 +2710,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
2764 qp); 2710 qp);
2765 __flush_rq(&qp->rq, qp, &cqe, &budget); 2711 __flush_rq(&qp->rq, qp, &cqe, &budget);
2766 } 2712 }
2767 spin_unlock_irqrestore(&cq->hwq.lock, flags); 2713 spin_unlock_irqrestore(&cq->flush_lock, flags);
2768 2714
2769 return num_cqes - budget; 2715 return num_cqes - budget;
2770} 2716}
@@ -2773,11 +2719,9 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
2773 int num_cqes, struct bnxt_qplib_qp **lib_qp) 2719 int num_cqes, struct bnxt_qplib_qp **lib_qp)
2774{ 2720{
2775 struct cq_base *hw_cqe, **hw_cqe_ptr; 2721 struct cq_base *hw_cqe, **hw_cqe_ptr;
2776 unsigned long flags;
2777 u32 sw_cons, raw_cons; 2722 u32 sw_cons, raw_cons;
2778 int budget, rc = 0; 2723 int budget, rc = 0;
2779 2724
2780 spin_lock_irqsave(&cq->hwq.lock, flags);
2781 raw_cons = cq->hwq.cons; 2725 raw_cons = cq->hwq.cons;
2782 budget = num_cqes; 2726 budget = num_cqes;
2783 2727
@@ -2853,20 +2797,15 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
2853 bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ); 2797 bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ);
2854 } 2798 }
2855exit: 2799exit:
2856 spin_unlock_irqrestore(&cq->hwq.lock, flags);
2857 return num_cqes - budget; 2800 return num_cqes - budget;
2858} 2801}
2859 2802
2860void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type) 2803void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
2861{ 2804{
2862 unsigned long flags;
2863
2864 spin_lock_irqsave(&cq->hwq.lock, flags);
2865 if (arm_type) 2805 if (arm_type)
2866 bnxt_qplib_arm_cq(cq, arm_type); 2806 bnxt_qplib_arm_cq(cq, arm_type);
2867 /* Using cq->arm_state variable to track whether to issue cq handler */ 2807 /* Using cq->arm_state variable to track whether to issue cq handler */
2868 atomic_set(&cq->arm_state, 1); 2808 atomic_set(&cq->arm_state, 1);
2869 spin_unlock_irqrestore(&cq->hwq.lock, flags);
2870} 2809}
2871 2810
2872void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp) 2811void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index ca0a2ffa3509..ade9f13c0fd1 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -389,6 +389,18 @@ struct bnxt_qplib_cq {
389 struct list_head sqf_head, rqf_head; 389 struct list_head sqf_head, rqf_head;
390 atomic_t arm_state; 390 atomic_t arm_state;
391 spinlock_t compl_lock; /* synch CQ handlers */ 391 spinlock_t compl_lock; /* synch CQ handlers */
392/* Locking Notes:
393 * QP can move to error state from modify_qp, async error event or error
394 * CQE as part of poll_cq. When QP is moved to error state, it gets added
395 * to two flush lists, one each for SQ and RQ.
396 * Each flush list is protected by qplib_cq->flush_lock. Both scq and rcq
397 * flush_locks should be acquired when QP is moved to error. The control path
398 * operations(modify_qp and async error events) are synchronized with poll_cq
399 * using upper level CQ locks (bnxt_re_cq->cq_lock) of both SCQ and RCQ.
400 * The qplib_cq->flush_lock is required to synchronize two instances of poll_cq
401 * of the same QP while manipulating the flush list.
402 */
403 spinlock_t flush_lock; /* QP flush management */
392}; 404};
393 405
394#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq) 406#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 14d153d4013c..80027a494730 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -305,9 +305,8 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
305 err_event->res_err_state_reason); 305 err_event->res_err_state_reason);
306 if (!qp) 306 if (!qp)
307 break; 307 break;
308 bnxt_qplib_acquire_cq_locks(qp, &flags);
309 bnxt_qplib_mark_qp_error(qp); 308 bnxt_qplib_mark_qp_error(qp);
310 bnxt_qplib_release_cq_locks(qp, &flags); 309 rcfw->aeq_handler(rcfw, qp_event, qp);
311 break; 310 break;
312 default: 311 default:
313 /* Command Response */ 312 /* Command Response */