aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYonatan Cohen <yonatanc@mellanox.com>2016-11-16 03:39:15 -0500
committerDoug Ledford <dledford@redhat.com>2016-11-16 20:03:44 -0500
commit002e062e13db10973adb8302f231e48b477c7ccf (patch)
treed8e35b3e448a1008581af2db04de0aa8c87a86d7
parent1454ca3a97e147bb91e98b087446c39cf6692a48 (diff)
IB/rxe: Fix handling of erroneous WR
To correctly handle a erroneous WR this fix does the following 1. Make sure the bad WQE causes a user completion event. 2. Call rxe_completer to handle the erred WQE. Before the fix, when rxe_requester found a bad WQE, it changed its status to IB_WC_LOC_PROT_ERR and exit with 0 for non RC QPs. If this was the 1st WQE then there would be no ACK to invoke the completer and this bad WQE would be stuck in the QP's send-q. On top of that the requester exiting with 0 caused rxe_do_task to endlessly invoke rxe_requester, resulting in a soft-lockup attached below. In case the WQE was not the 1st and rxe_completer did get a chance to handle the bad WQE, it did not cause a complete event since the WQE's IB_SEND_SIGNALED flag was not set. Setting WQE status to IB_SEND_SIGNALED is subject to IBA spec version 1.2.1, section 10.7.3.1 Signaled Completions. NMI watchdog: BUG: soft lockup - CPU#7 stuck for 22s! [<ffffffffa0590145>] ? rxe_pool_get_index+0x35/0xb0 [rdma_rxe] [<ffffffffa05952ec>] lookup_mem+0x3c/0xc0 [rdma_rxe] [<ffffffffa0595534>] copy_data+0x1c4/0x230 [rdma_rxe] [<ffffffffa058c180>] rxe_requester+0x9d0/0x1100 [rdma_rxe] [<ffffffff8158e98a>] ? kfree_skbmem+0x5a/0x60 [<ffffffffa05962c9>] rxe_do_task+0x89/0xf0 [rdma_rxe] [<ffffffffa05963e2>] rxe_run_task+0x12/0x30 [rdma_rxe] [<ffffffffa059110a>] rxe_post_send+0x41a/0x550 [rdma_rxe] [<ffffffff811ef922>] ? __kmalloc+0x182/0x200 [<ffffffff816ba512>] ? down_read+0x12/0x40 [<ffffffffa054bd32>] ib_uverbs_post_send+0x532/0x540 [ib_uverbs] [<ffffffff815f8722>] ? tcp_sendmsg+0x402/0xb80 [<ffffffffa05453dc>] ib_uverbs_write+0x18c/0x3f0 [ib_uverbs] [<ffffffff81623c2e>] ? inet_recvmsg+0x7e/0xb0 [<ffffffff8158764d>] ? sock_recvmsg+0x3d/0x50 [<ffffffff81215b87>] __vfs_write+0x37/0x140 [<ffffffff81216892>] vfs_write+0xb2/0x1b0 [<ffffffff81217ce5>] SyS_write+0x55/0xc0 [<ffffffff816bc672>] entry_SYSCALL_64_fastpath+0x1a/0xa Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com> Reviewed-by: Moni Shoua <monis@mellanox.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c21
1 files changed, 13 insertions, 8 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 832846b73ea0..22bd9630dcd9 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -696,7 +696,8 @@ next_wqe:
696 qp->req.wqe_index); 696 qp->req.wqe_index);
697 wqe->state = wqe_state_done; 697 wqe->state = wqe_state_done;
698 wqe->status = IB_WC_SUCCESS; 698 wqe->status = IB_WC_SUCCESS;
699 goto complete; 699 __rxe_do_task(&qp->comp.task);
700 return 0;
700 } 701 }
701 payload = mtu; 702 payload = mtu;
702 } 703 }
@@ -745,13 +746,17 @@ err:
745 wqe->status = IB_WC_LOC_PROT_ERR; 746 wqe->status = IB_WC_LOC_PROT_ERR;
746 wqe->state = wqe_state_error; 747 wqe->state = wqe_state_error;
747 748
748complete: 749 /*
749 if (qp_type(qp) != IB_QPT_RC) { 750 * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
750 while (rxe_completer(qp) == 0) 751 * ---------8<---------8<-------------
751 ; 752 * ...Note that if a completion error occurs, a Work Completion
752 } 753 * will always be generated, even if the signaling
753 754 * indicator requests an Unsignaled Completion.
754 return 0; 755 * ---------8<---------8<-------------
756 */
757 wqe->wr.send_flags |= IB_SEND_SIGNALED;
758 __rxe_do_task(&qp->comp.task);
759 return -EAGAIN;
755 760
756exit: 761exit:
757 return -EAGAIN; 762 return -EAGAIN;