aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp/srpt/ib_srpt.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp/srpt/ib_srpt.c')
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c427
1 files changed, 124 insertions, 303 deletions
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 2e2fe818ca9f..8068affe25b5 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -93,6 +93,8 @@ MODULE_PARM_DESC(srpt_service_guid,
93static struct ib_client srpt_client; 93static struct ib_client srpt_client;
94static void srpt_release_channel(struct srpt_rdma_ch *ch); 94static void srpt_release_channel(struct srpt_rdma_ch *ch);
95static int srpt_queue_status(struct se_cmd *cmd); 95static int srpt_queue_status(struct se_cmd *cmd);
96static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
97static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
96 98
97/** 99/**
98 * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. 100 * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
@@ -778,12 +780,12 @@ static int srpt_post_recv(struct srpt_device *sdev,
778 struct ib_recv_wr wr, *bad_wr; 780 struct ib_recv_wr wr, *bad_wr;
779 781
780 BUG_ON(!sdev); 782 BUG_ON(!sdev);
781 wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
782
783 list.addr = ioctx->ioctx.dma; 783 list.addr = ioctx->ioctx.dma;
784 list.length = srp_max_req_size; 784 list.length = srp_max_req_size;
785 list.lkey = sdev->pd->local_dma_lkey; 785 list.lkey = sdev->pd->local_dma_lkey;
786 786
787 ioctx->ioctx.cqe.done = srpt_recv_done;
788 wr.wr_cqe = &ioctx->ioctx.cqe;
787 wr.next = NULL; 789 wr.next = NULL;
788 wr.sg_list = &list; 790 wr.sg_list = &list;
789 wr.num_sge = 1; 791 wr.num_sge = 1;
@@ -819,8 +821,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
819 list.length = len; 821 list.length = len;
820 list.lkey = sdev->pd->local_dma_lkey; 822 list.lkey = sdev->pd->local_dma_lkey;
821 823
824 ioctx->ioctx.cqe.done = srpt_send_done;
822 wr.next = NULL; 825 wr.next = NULL;
823 wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index); 826 wr.wr_cqe = &ioctx->ioctx.cqe;
824 wr.sg_list = &list; 827 wr.sg_list = &list;
825 wr.num_sge = 1; 828 wr.num_sge = 1;
826 wr.opcode = IB_WR_SEND; 829 wr.opcode = IB_WR_SEND;
@@ -1052,13 +1055,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1052 1055
1053 BUG_ON(!ch); 1056 BUG_ON(!ch);
1054 BUG_ON(!ioctx); 1057 BUG_ON(!ioctx);
1055 BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius); 1058 BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
1056 1059
1057 while (ioctx->n_rdma) 1060 while (ioctx->n_rdma)
1058 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge); 1061 kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
1059 1062
1060 kfree(ioctx->rdma_ius); 1063 kfree(ioctx->rdma_wrs);
1061 ioctx->rdma_ius = NULL; 1064 ioctx->rdma_wrs = NULL;
1062 1065
1063 if (ioctx->mapped_sg_count) { 1066 if (ioctx->mapped_sg_count) {
1064 sg = ioctx->sg; 1067 sg = ioctx->sg;
@@ -1082,7 +1085,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1082 struct scatterlist *sg, *sg_orig; 1085 struct scatterlist *sg, *sg_orig;
1083 int sg_cnt; 1086 int sg_cnt;
1084 enum dma_data_direction dir; 1087 enum dma_data_direction dir;
1085 struct rdma_iu *riu; 1088 struct ib_rdma_wr *riu;
1086 struct srp_direct_buf *db; 1089 struct srp_direct_buf *db;
1087 dma_addr_t dma_addr; 1090 dma_addr_t dma_addr;
1088 struct ib_sge *sge; 1091 struct ib_sge *sge;
@@ -1109,23 +1112,24 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1109 1112
1110 ioctx->mapped_sg_count = count; 1113 ioctx->mapped_sg_count = count;
1111 1114
1112 if (ioctx->rdma_ius && ioctx->n_rdma_ius) 1115 if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
1113 nrdma = ioctx->n_rdma_ius; 1116 nrdma = ioctx->n_rdma_wrs;
1114 else { 1117 else {
1115 nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE 1118 nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
1116 + ioctx->n_rbuf; 1119 + ioctx->n_rbuf;
1117 1120
1118 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL); 1121 ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
1119 if (!ioctx->rdma_ius) 1122 GFP_KERNEL);
1123 if (!ioctx->rdma_wrs)
1120 goto free_mem; 1124 goto free_mem;
1121 1125
1122 ioctx->n_rdma_ius = nrdma; 1126 ioctx->n_rdma_wrs = nrdma;
1123 } 1127 }
1124 1128
1125 db = ioctx->rbufs; 1129 db = ioctx->rbufs;
1126 tsize = cmd->data_length; 1130 tsize = cmd->data_length;
1127 dma_len = ib_sg_dma_len(dev, &sg[0]); 1131 dma_len = ib_sg_dma_len(dev, &sg[0]);
1128 riu = ioctx->rdma_ius; 1132 riu = ioctx->rdma_wrs;
1129 1133
1130 /* 1134 /*
1131 * For each remote desc - calculate the #ib_sge. 1135 * For each remote desc - calculate the #ib_sge.
@@ -1139,9 +1143,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1139 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { 1143 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1140 rsize = be32_to_cpu(db->len); 1144 rsize = be32_to_cpu(db->len);
1141 raddr = be64_to_cpu(db->va); 1145 raddr = be64_to_cpu(db->va);
1142 riu->raddr = raddr; 1146 riu->remote_addr = raddr;
1143 riu->rkey = be32_to_cpu(db->key); 1147 riu->rkey = be32_to_cpu(db->key);
1144 riu->sge_cnt = 0; 1148 riu->wr.num_sge = 0;
1145 1149
1146 /* calculate how many sge required for this remote_buf */ 1150 /* calculate how many sge required for this remote_buf */
1147 while (rsize > 0 && tsize > 0) { 1151 while (rsize > 0 && tsize > 0) {
@@ -1165,33 +1169,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1165 rsize = 0; 1169 rsize = 0;
1166 } 1170 }
1167 1171
1168 ++riu->sge_cnt; 1172 ++riu->wr.num_sge;
1169 1173
1170 if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) { 1174 if (rsize > 0 &&
1175 riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
1171 ++ioctx->n_rdma; 1176 ++ioctx->n_rdma;
1172 riu->sge = 1177 riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
1173 kmalloc(riu->sge_cnt * sizeof *riu->sge, 1178 sizeof(*riu->wr.sg_list),
1174 GFP_KERNEL); 1179 GFP_KERNEL);
1175 if (!riu->sge) 1180 if (!riu->wr.sg_list)
1176 goto free_mem; 1181 goto free_mem;
1177 1182
1178 ++riu; 1183 ++riu;
1179 riu->sge_cnt = 0; 1184 riu->wr.num_sge = 0;
1180 riu->raddr = raddr; 1185 riu->remote_addr = raddr;
1181 riu->rkey = be32_to_cpu(db->key); 1186 riu->rkey = be32_to_cpu(db->key);
1182 } 1187 }
1183 } 1188 }
1184 1189
1185 ++ioctx->n_rdma; 1190 ++ioctx->n_rdma;
1186 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge, 1191 riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
1187 GFP_KERNEL); 1192 sizeof(*riu->wr.sg_list),
1188 if (!riu->sge) 1193 GFP_KERNEL);
1194 if (!riu->wr.sg_list)
1189 goto free_mem; 1195 goto free_mem;
1190 } 1196 }
1191 1197
1192 db = ioctx->rbufs; 1198 db = ioctx->rbufs;
1193 tsize = cmd->data_length; 1199 tsize = cmd->data_length;
1194 riu = ioctx->rdma_ius; 1200 riu = ioctx->rdma_wrs;
1195 sg = sg_orig; 1201 sg = sg_orig;
1196 dma_len = ib_sg_dma_len(dev, &sg[0]); 1202 dma_len = ib_sg_dma_len(dev, &sg[0]);
1197 dma_addr = ib_sg_dma_address(dev, &sg[0]); 1203 dma_addr = ib_sg_dma_address(dev, &sg[0]);
@@ -1200,7 +1206,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1200 for (i = 0, j = 0; 1206 for (i = 0, j = 0;
1201 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { 1207 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1202 rsize = be32_to_cpu(db->len); 1208 rsize = be32_to_cpu(db->len);
1203 sge = riu->sge; 1209 sge = riu->wr.sg_list;
1204 k = 0; 1210 k = 0;
1205 1211
1206 while (rsize > 0 && tsize > 0) { 1212 while (rsize > 0 && tsize > 0) {
@@ -1232,9 +1238,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1232 } 1238 }
1233 1239
1234 ++k; 1240 ++k;
1235 if (k == riu->sge_cnt && rsize > 0 && tsize > 0) { 1241 if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
1236 ++riu; 1242 ++riu;
1237 sge = riu->sge; 1243 sge = riu->wr.sg_list;
1238 k = 0; 1244 k = 0;
1239 } else if (rsize > 0 && tsize > 0) 1245 } else if (rsize > 0 && tsize > 0)
1240 ++sge; 1246 ++sge;
@@ -1277,8 +1283,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
1277 ioctx->n_rbuf = 0; 1283 ioctx->n_rbuf = 0;
1278 ioctx->rbufs = NULL; 1284 ioctx->rbufs = NULL;
1279 ioctx->n_rdma = 0; 1285 ioctx->n_rdma = 0;
1280 ioctx->n_rdma_ius = 0; 1286 ioctx->n_rdma_wrs = 0;
1281 ioctx->rdma_ius = NULL; 1287 ioctx->rdma_wrs = NULL;
1282 ioctx->mapped_sg_count = 0; 1288 ioctx->mapped_sg_count = 0;
1283 init_completion(&ioctx->tx_done); 1289 init_completion(&ioctx->tx_done);
1284 ioctx->queue_status_only = false; 1290 ioctx->queue_status_only = false;
@@ -1380,118 +1386,44 @@ out:
1380} 1386}
1381 1387
1382/** 1388/**
1383 * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
1384 */
1385static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
1386{
1387 struct srpt_send_ioctx *ioctx;
1388 enum srpt_command_state state;
1389 u32 index;
1390
1391 atomic_inc(&ch->sq_wr_avail);
1392
1393 index = idx_from_wr_id(wr_id);
1394 ioctx = ch->ioctx_ring[index];
1395 state = srpt_get_cmd_state(ioctx);
1396
1397 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
1398 && state != SRPT_STATE_MGMT_RSP_SENT
1399 && state != SRPT_STATE_NEED_DATA
1400 && state != SRPT_STATE_DONE);
1401
1402 /* If SRP_RSP sending failed, undo the ch->req_lim change. */
1403 if (state == SRPT_STATE_CMD_RSP_SENT
1404 || state == SRPT_STATE_MGMT_RSP_SENT)
1405 atomic_dec(&ch->req_lim);
1406
1407 srpt_abort_cmd(ioctx);
1408}
1409
1410/**
1411 * srpt_handle_send_comp() - Process an IB send completion notification.
1412 */
1413static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1414 struct srpt_send_ioctx *ioctx)
1415{
1416 enum srpt_command_state state;
1417
1418 atomic_inc(&ch->sq_wr_avail);
1419
1420 state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
1421
1422 if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
1423 && state != SRPT_STATE_MGMT_RSP_SENT
1424 && state != SRPT_STATE_DONE))
1425 pr_debug("state = %d\n", state);
1426
1427 if (state != SRPT_STATE_DONE) {
1428 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1429 transport_generic_free_cmd(&ioctx->cmd, 0);
1430 } else {
1431 pr_err("IB completion has been received too late for"
1432 " wr_id = %u.\n", ioctx->ioctx.index);
1433 }
1434}
1435
1436/**
1437 * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
1438 *
1439 * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping 1389 * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
1440 * the data that has been transferred via IB RDMA had to be postponed until the 1390 * the data that has been transferred via IB RDMA had to be postponed until the
1441 * check_stop_free() callback. None of this is necessary anymore and needs to 1391 * check_stop_free() callback. None of this is necessary anymore and needs to
1442 * be cleaned up. 1392 * be cleaned up.
1443 */ 1393 */
1444static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, 1394static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
1445 struct srpt_send_ioctx *ioctx,
1446 enum srpt_opcode opcode)
1447{ 1395{
1396 struct srpt_rdma_ch *ch = cq->cq_context;
1397 struct srpt_send_ioctx *ioctx =
1398 container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
1399
1448 WARN_ON(ioctx->n_rdma <= 0); 1400 WARN_ON(ioctx->n_rdma <= 0);
1449 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); 1401 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1450 1402
1451 if (opcode == SRPT_RDMA_READ_LAST) { 1403 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1452 if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, 1404 pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
1453 SRPT_STATE_DATA_IN)) 1405 ioctx, wc->status);
1454 target_execute_cmd(&ioctx->cmd); 1406 srpt_abort_cmd(ioctx);
1455 else 1407 return;
1456 pr_err("%s[%d]: wrong state = %d\n", __func__,
1457 __LINE__, srpt_get_cmd_state(ioctx));
1458 } else if (opcode == SRPT_RDMA_ABORT) {
1459 ioctx->rdma_aborted = true;
1460 } else {
1461 WARN(true, "unexpected opcode %d\n", opcode);
1462 } 1408 }
1409
1410 if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1411 SRPT_STATE_DATA_IN))
1412 target_execute_cmd(&ioctx->cmd);
1413 else
1414 pr_err("%s[%d]: wrong state = %d\n", __func__,
1415 __LINE__, srpt_get_cmd_state(ioctx));
1463} 1416}
1464 1417
1465/** 1418static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
1466 * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
1467 */
1468static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
1469 struct srpt_send_ioctx *ioctx,
1470 enum srpt_opcode opcode)
1471{ 1419{
1472 enum srpt_command_state state; 1420 struct srpt_send_ioctx *ioctx =
1421 container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
1473 1422
1474 state = srpt_get_cmd_state(ioctx); 1423 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1475 switch (opcode) { 1424 pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
1476 case SRPT_RDMA_READ_LAST: 1425 ioctx, wc->status);
1477 if (ioctx->n_rdma <= 0) { 1426 srpt_abort_cmd(ioctx);
1478 pr_err("Received invalid RDMA read"
1479 " error completion with idx %d\n",
1480 ioctx->ioctx.index);
1481 break;
1482 }
1483 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1484 if (state == SRPT_STATE_NEED_DATA)
1485 srpt_abort_cmd(ioctx);
1486 else
1487 pr_err("%s[%d]: wrong state = %d\n",
1488 __func__, __LINE__, state);
1489 break;
1490 case SRPT_RDMA_WRITE_LAST:
1491 break;
1492 default:
1493 pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
1494 break;
1495 } 1427 }
1496} 1428}
1497 1429
@@ -1926,32 +1858,26 @@ out:
1926 return; 1858 return;
1927} 1859}
1928 1860
1929static void srpt_process_rcv_completion(struct ib_cq *cq, 1861static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1930 struct srpt_rdma_ch *ch,
1931 struct ib_wc *wc)
1932{ 1862{
1933 struct srpt_device *sdev = ch->sport->sdev; 1863 struct srpt_rdma_ch *ch = cq->cq_context;
1934 struct srpt_recv_ioctx *ioctx; 1864 struct srpt_recv_ioctx *ioctx =
1935 u32 index; 1865 container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
1936 1866
1937 index = idx_from_wr_id(wc->wr_id);
1938 if (wc->status == IB_WC_SUCCESS) { 1867 if (wc->status == IB_WC_SUCCESS) {
1939 int req_lim; 1868 int req_lim;
1940 1869
1941 req_lim = atomic_dec_return(&ch->req_lim); 1870 req_lim = atomic_dec_return(&ch->req_lim);
1942 if (unlikely(req_lim < 0)) 1871 if (unlikely(req_lim < 0))
1943 pr_err("req_lim = %d < 0\n", req_lim); 1872 pr_err("req_lim = %d < 0\n", req_lim);
1944 ioctx = sdev->ioctx_ring[index];
1945 srpt_handle_new_iu(ch, ioctx, NULL); 1873 srpt_handle_new_iu(ch, ioctx, NULL);
1946 } else { 1874 } else {
1947 pr_info("receiving failed for idx %u with status %d\n", 1875 pr_info("receiving failed for ioctx %p with status %d\n",
1948 index, wc->status); 1876 ioctx, wc->status);
1949 } 1877 }
1950} 1878}
1951 1879
1952/** 1880/**
1953 * srpt_process_send_completion() - Process an IB send completion.
1954 *
1955 * Note: Although this has not yet been observed during tests, at least in 1881 * Note: Although this has not yet been observed during tests, at least in
1956 * theory it is possible that the srpt_get_send_ioctx() call invoked by 1882 * theory it is possible that the srpt_get_send_ioctx() call invoked by
1957 * srpt_handle_new_iu() fails. This is possible because the req_lim_delta 1883 * srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1964,108 +1890,51 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
1964 * are queued on cmd_wait_list. The code below processes these delayed 1890 * are queued on cmd_wait_list. The code below processes these delayed
1965 * requests one at a time. 1891 * requests one at a time.
1966 */ 1892 */
1967static void srpt_process_send_completion(struct ib_cq *cq, 1893static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
1968 struct srpt_rdma_ch *ch,
1969 struct ib_wc *wc)
1970{ 1894{
1971 struct srpt_send_ioctx *send_ioctx; 1895 struct srpt_rdma_ch *ch = cq->cq_context;
1972 uint32_t index; 1896 struct srpt_send_ioctx *ioctx =
1973 enum srpt_opcode opcode; 1897 container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
1898 enum srpt_command_state state;
1974 1899
1975 index = idx_from_wr_id(wc->wr_id); 1900 state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
1976 opcode = opcode_from_wr_id(wc->wr_id); 1901
1977 send_ioctx = ch->ioctx_ring[index]; 1902 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
1978 if (wc->status == IB_WC_SUCCESS) { 1903 state != SRPT_STATE_MGMT_RSP_SENT);
1979 if (opcode == SRPT_SEND) 1904
1980 srpt_handle_send_comp(ch, send_ioctx); 1905 atomic_inc(&ch->sq_wr_avail);
1981 else { 1906
1982 WARN_ON(opcode != SRPT_RDMA_ABORT && 1907 if (wc->status != IB_WC_SUCCESS) {
1983 wc->opcode != IB_WC_RDMA_READ); 1908 pr_info("sending response for ioctx 0x%p failed"
1984 srpt_handle_rdma_comp(ch, send_ioctx, opcode); 1909 " with status %d\n", ioctx, wc->status);
1985 } 1910
1911 atomic_dec(&ch->req_lim);
1912 srpt_abort_cmd(ioctx);
1913 goto out;
1914 }
1915
1916 if (state != SRPT_STATE_DONE) {
1917 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1918 transport_generic_free_cmd(&ioctx->cmd, 0);
1986 } else { 1919 } else {
1987 if (opcode == SRPT_SEND) { 1920 pr_err("IB completion has been received too late for"
1988 pr_info("sending response for idx %u failed" 1921 " wr_id = %u.\n", ioctx->ioctx.index);
1989 " with status %d\n", index, wc->status);
1990 srpt_handle_send_err_comp(ch, wc->wr_id);
1991 } else if (opcode != SRPT_RDMA_MID) {
1992 pr_info("RDMA t %d for idx %u failed with"
1993 " status %d\n", opcode, index, wc->status);
1994 srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
1995 }
1996 } 1922 }
1997 1923
1998 while (unlikely(opcode == SRPT_SEND 1924out:
1999 && !list_empty(&ch->cmd_wait_list) 1925 while (!list_empty(&ch->cmd_wait_list) &&
2000 && srpt_get_ch_state(ch) == CH_LIVE 1926 srpt_get_ch_state(ch) == CH_LIVE &&
2001 && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) { 1927 (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
2002 struct srpt_recv_ioctx *recv_ioctx; 1928 struct srpt_recv_ioctx *recv_ioctx;
2003 1929
2004 recv_ioctx = list_first_entry(&ch->cmd_wait_list, 1930 recv_ioctx = list_first_entry(&ch->cmd_wait_list,
2005 struct srpt_recv_ioctx, 1931 struct srpt_recv_ioctx,
2006 wait_list); 1932 wait_list);
2007 list_del(&recv_ioctx->wait_list); 1933 list_del(&recv_ioctx->wait_list);
2008 srpt_handle_new_iu(ch, recv_ioctx, send_ioctx); 1934 srpt_handle_new_iu(ch, recv_ioctx, ioctx);
2009 } 1935 }
2010} 1936}
2011 1937
2012static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
2013{
2014 struct ib_wc *const wc = ch->wc;
2015 int i, n;
2016
2017 WARN_ON(cq != ch->cq);
2018
2019 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2020 while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
2021 for (i = 0; i < n; i++) {
2022 if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
2023 srpt_process_rcv_completion(cq, ch, &wc[i]);
2024 else
2025 srpt_process_send_completion(cq, ch, &wc[i]);
2026 }
2027 }
2028}
2029
2030/**
2031 * srpt_completion() - IB completion queue callback function.
2032 *
2033 * Notes:
2034 * - It is guaranteed that a completion handler will never be invoked
2035 * concurrently on two different CPUs for the same completion queue. See also
2036 * Documentation/infiniband/core_locking.txt and the implementation of
2037 * handle_edge_irq() in kernel/irq/chip.c.
2038 * - When threaded IRQs are enabled, completion handlers are invoked in thread
2039 * context instead of interrupt context.
2040 */
2041static void srpt_completion(struct ib_cq *cq, void *ctx)
2042{
2043 struct srpt_rdma_ch *ch = ctx;
2044
2045 wake_up_interruptible(&ch->wait_queue);
2046}
2047
2048static int srpt_compl_thread(void *arg)
2049{
2050 struct srpt_rdma_ch *ch;
2051
2052 /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2053 current->flags |= PF_NOFREEZE;
2054
2055 ch = arg;
2056 BUG_ON(!ch);
2057 pr_info("Session %s: kernel thread %s (PID %d) started\n",
2058 ch->sess_name, ch->thread->comm, current->pid);
2059 while (!kthread_should_stop()) {
2060 wait_event_interruptible(ch->wait_queue,
2061 (srpt_process_completion(ch->cq, ch),
2062 kthread_should_stop()));
2063 }
2064 pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
2065 ch->sess_name, ch->thread->comm, current->pid);
2066 return 0;
2067}
2068
2069/** 1938/**
2070 * srpt_create_ch_ib() - Create receive and send completion queues. 1939 * srpt_create_ch_ib() - Create receive and send completion queues.
2071 */ 1940 */
@@ -2075,7 +1944,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
2075 struct srpt_port *sport = ch->sport; 1944 struct srpt_port *sport = ch->sport;
2076 struct srpt_device *sdev = sport->sdev; 1945 struct srpt_device *sdev = sport->sdev;
2077 u32 srp_sq_size = sport->port_attrib.srp_sq_size; 1946 u32 srp_sq_size = sport->port_attrib.srp_sq_size;
2078 struct ib_cq_init_attr cq_attr = {};
2079 int ret; 1947 int ret;
2080 1948
2081 WARN_ON(ch->rq_size < 1); 1949 WARN_ON(ch->rq_size < 1);
@@ -2086,9 +1954,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
2086 goto out; 1954 goto out;
2087 1955
2088retry: 1956retry:
2089 cq_attr.cqe = ch->rq_size + srp_sq_size; 1957 ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
2090 ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, 1958 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
2091 &cq_attr);
2092 if (IS_ERR(ch->cq)) { 1959 if (IS_ERR(ch->cq)) {
2093 ret = PTR_ERR(ch->cq); 1960 ret = PTR_ERR(ch->cq);
2094 pr_err("failed to create CQ cqe= %d ret= %d\n", 1961 pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -2131,18 +1998,6 @@ retry:
2131 if (ret) 1998 if (ret)
2132 goto err_destroy_qp; 1999 goto err_destroy_qp;
2133 2000
2134 init_waitqueue_head(&ch->wait_queue);
2135
2136 pr_debug("creating thread for session %s\n", ch->sess_name);
2137
2138 ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
2139 if (IS_ERR(ch->thread)) {
2140 pr_err("failed to create kernel thread %ld\n",
2141 PTR_ERR(ch->thread));
2142 ch->thread = NULL;
2143 goto err_destroy_qp;
2144 }
2145
2146out: 2001out:
2147 kfree(qp_init); 2002 kfree(qp_init);
2148 return ret; 2003 return ret;
@@ -2150,17 +2005,14 @@ out:
2150err_destroy_qp: 2005err_destroy_qp:
2151 ib_destroy_qp(ch->qp); 2006 ib_destroy_qp(ch->qp);
2152err_destroy_cq: 2007err_destroy_cq:
2153 ib_destroy_cq(ch->cq); 2008 ib_free_cq(ch->cq);
2154 goto out; 2009 goto out;
2155} 2010}
2156 2011
2157static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) 2012static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
2158{ 2013{
2159 if (ch->thread)
2160 kthread_stop(ch->thread);
2161
2162 ib_destroy_qp(ch->qp); 2014 ib_destroy_qp(ch->qp);
2163 ib_destroy_cq(ch->cq); 2015 ib_free_cq(ch->cq);
2164} 2016}
2165 2017
2166/** 2018/**
@@ -2821,12 +2673,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2821static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, 2673static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2822 struct srpt_send_ioctx *ioctx) 2674 struct srpt_send_ioctx *ioctx)
2823{ 2675{
2824 struct ib_rdma_wr wr;
2825 struct ib_send_wr *bad_wr; 2676 struct ib_send_wr *bad_wr;
2826 struct rdma_iu *riu; 2677 int sq_wr_avail, ret, i;
2827 int i;
2828 int ret;
2829 int sq_wr_avail;
2830 enum dma_data_direction dir; 2678 enum dma_data_direction dir;
2831 const int n_rdma = ioctx->n_rdma; 2679 const int n_rdma = ioctx->n_rdma;
2832 2680
@@ -2842,59 +2690,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2842 } 2690 }
2843 } 2691 }
2844 2692
2845 ioctx->rdma_aborted = false; 2693 for (i = 0; i < n_rdma; i++) {
2846 ret = 0; 2694 struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
2847 riu = ioctx->rdma_ius;
2848 memset(&wr, 0, sizeof wr);
2849
2850 for (i = 0; i < n_rdma; ++i, ++riu) {
2851 if (dir == DMA_FROM_DEVICE) {
2852 wr.wr.opcode = IB_WR_RDMA_WRITE;
2853 wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2854 SRPT_RDMA_WRITE_LAST :
2855 SRPT_RDMA_MID,
2856 ioctx->ioctx.index);
2857 } else {
2858 wr.wr.opcode = IB_WR_RDMA_READ;
2859 wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2860 SRPT_RDMA_READ_LAST :
2861 SRPT_RDMA_MID,
2862 ioctx->ioctx.index);
2863 }
2864 wr.wr.next = NULL;
2865 wr.remote_addr = riu->raddr;
2866 wr.rkey = riu->rkey;
2867 wr.wr.num_sge = riu->sge_cnt;
2868 wr.wr.sg_list = riu->sge;
2869 2695
2870 /* only get completion event for the last rdma write */ 2696 wr->opcode = (dir == DMA_FROM_DEVICE) ?
2871 if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE) 2697 IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2872 wr.wr.send_flags = IB_SEND_SIGNALED;
2873 2698
2874 ret = ib_post_send(ch->qp, &wr.wr, &bad_wr); 2699 if (i == n_rdma - 1) {
2875 if (ret) 2700 /* only get completion event for the last rdma read */
2876 break; 2701 if (dir == DMA_TO_DEVICE) {
2702 wr->send_flags = IB_SEND_SIGNALED;
2703 ioctx->rdma_cqe.done = srpt_rdma_read_done;
2704 } else {
2705 ioctx->rdma_cqe.done = srpt_rdma_write_done;
2706 }
2707 wr->wr_cqe = &ioctx->rdma_cqe;
2708 wr->next = NULL;
2709 } else {
2710 wr->wr_cqe = NULL;
2711 wr->next = &ioctx->rdma_wrs[i + 1].wr;
2712 }
2877 } 2713 }
2878 2714
2715 ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
2879 if (ret) 2716 if (ret)
2880 pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n", 2717 pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
2881 __func__, __LINE__, ret, i, n_rdma); 2718 __func__, __LINE__, ret, i, n_rdma);
2882 if (ret && i > 0) {
2883 wr.wr.num_sge = 0;
2884 wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
2885 wr.wr.send_flags = IB_SEND_SIGNALED;
2886 while (ch->state == CH_LIVE &&
2887 ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
2888 pr_info("Trying to abort failed RDMA transfer [%d]\n",
2889 ioctx->ioctx.index);
2890 msleep(1000);
2891 }
2892 while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
2893 pr_info("Waiting until RDMA abort finished [%d]\n",
2894 ioctx->ioctx.index);
2895 msleep(1000);
2896 }
2897 }
2898out: 2719out:
2899 if (unlikely(dir == DMA_TO_DEVICE && ret < 0)) 2720 if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
2900 atomic_add(n_rdma, &ch->sq_wr_avail); 2721 atomic_add(n_rdma, &ch->sq_wr_avail);