aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorSagi Grimberg <sagig@mellanox.com>2014-10-01 07:02:09 -0400
committerRoland Dreier <roland@purestorage.com>2014-10-09 03:06:07 -0400
commit6aabfa76f5e5281e5db128a34420d8f33b8574f7 (patch)
treed2d2276fa85be0bb6921d1f27125250eea47e379 /drivers/infiniband
parent183cfa434ec90897b1423ce4f916e8a237139133 (diff)
IB/iser: Use single CQ for RX and TX
This will solve a possible condition where we might miss TX completion (flush error) during session teardown. Since we are using a single CQ, we don't need to actively drain the TX CQ, instead just wait for flush_completion (when counters reach zero) and remove iser_poll_for_flush_errors(). This patch might introduce a minor performance regression on its own, but the next patches will enhance performance using a single CQ for RX and TX. Signed-off-by: Sagi Grimberg <sagig@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h9
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c3
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c227
3 files changed, 114 insertions, 125 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 2bc34aa50705..1617c5cce8b1 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -271,16 +271,14 @@ struct iscsi_iser_task;
271 * struct iser_comp - iSER completion context 271 * struct iser_comp - iSER completion context
272 * 272 *
273 * @device: pointer to device handle 273 * @device: pointer to device handle
274 * @rx_cq: RX completion queue 274 * @cq: completion queue
275 * @tx_cq: TX completion queue
276 * @tasklet: Tasklet handle 275 * @tasklet: Tasklet handle
277 * @active_qps: Number of active QPs attached 276 * @active_qps: Number of active QPs attached
278 * to completion context 277 * to completion context
279 */ 278 */
280struct iser_comp { 279struct iser_comp {
281 struct iser_device *device; 280 struct iser_device *device;
282 struct ib_cq *rx_cq; 281 struct ib_cq *cq;
283 struct ib_cq *tx_cq;
284 struct tasklet_struct tasklet; 282 struct tasklet_struct tasklet;
285 int active_qps; 283 int active_qps;
286}; 284};
@@ -342,6 +340,7 @@ struct fast_reg_descriptor {
342 * @device: reference to iser device 340 * @device: reference to iser device
343 * @comp: iser completion context 341 * @comp: iser completion context
344 * @pi_support: Indicate device T10-PI support 342 * @pi_support: Indicate device T10-PI support
343 * @flush_comp: completes when all connection completions consumed
345 * @lock: protects fmr/fastreg pool 344 * @lock: protects fmr/fastreg pool
346 * @union.fmr: 345 * @union.fmr:
347 * @pool: FMR pool for fast registrations 346 * @pool: FMR pool for fast registrations
@@ -361,6 +360,7 @@ struct ib_conn {
361 struct iser_device *device; 360 struct iser_device *device;
362 struct iser_comp *comp; 361 struct iser_comp *comp;
363 bool pi_support; 362 bool pi_support;
363 struct completion flush_comp;
364 spinlock_t lock; 364 spinlock_t lock;
365 union { 365 union {
366 struct { 366 struct {
@@ -395,6 +395,7 @@ struct iser_conn {
395 u64 login_req_dma, login_resp_dma; 395 u64 login_req_dma, login_resp_dma;
396 unsigned int rx_desc_head; 396 unsigned int rx_desc_head;
397 struct iser_rx_desc *rx_descs; 397 struct iser_rx_desc *rx_descs;
398 u32 num_rx_descs;
398}; 399};
399 400
400struct iscsi_iser_task { 401struct iscsi_iser_task {
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 123174570c16..359c0b84f1ac 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -272,7 +272,8 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
272 if (iser_alloc_login_buf(iser_conn)) 272 if (iser_alloc_login_buf(iser_conn))
273 goto alloc_login_buf_fail; 273 goto alloc_login_buf_fail;
274 274
275 iser_conn->rx_descs = kmalloc(session->cmds_max * 275 iser_conn->num_rx_descs = session->cmds_max;
276 iser_conn->rx_descs = kmalloc(iser_conn->num_rx_descs *
276 sizeof(struct iser_rx_desc), GFP_KERNEL); 277 sizeof(struct iser_rx_desc), GFP_KERNEL);
277 if (!iser_conn->rx_descs) 278 if (!iser_conn->rx_descs)
278 goto rx_desc_alloc_fail; 279 goto rx_desc_alloc_fail;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index e31ac57accc9..eedc27a0d3c3 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -39,14 +39,14 @@
39#include "iscsi_iser.h" 39#include "iscsi_iser.h"
40 40
41#define ISCSI_ISER_MAX_CONN 8 41#define ISCSI_ISER_MAX_CONN 8
42#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) 42#define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
43#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) 43#define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
44#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN)
44 45
45static int iser_cq_poll_limit = 512; 46static int iser_cq_poll_limit = 512;
46 47
47static void iser_cq_tasklet_fn(unsigned long data); 48static void iser_cq_tasklet_fn(unsigned long data);
48static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 49static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
49static int iser_drain_tx_cq(struct iser_comp *comp);
50 50
51static void iser_cq_event_callback(struct ib_event *cause, void *context) 51static void iser_cq_event_callback(struct ib_event *cause, void *context)
52{ 52{
@@ -117,26 +117,17 @@ static int iser_create_device_ib_res(struct iser_device *device)
117 struct iser_comp *comp = &device->comps[i]; 117 struct iser_comp *comp = &device->comps[i];
118 118
119 comp->device = device; 119 comp->device = device;
120 comp->rx_cq = ib_create_cq(device->ib_device, 120 comp->cq = ib_create_cq(device->ib_device,
121 iser_cq_callback, 121 iser_cq_callback,
122 iser_cq_event_callback, 122 iser_cq_event_callback,
123 (void *)comp, 123 (void *)comp,
124 ISER_MAX_RX_CQ_LEN, i); 124 ISER_MAX_CQ_LEN, i);
125 if (IS_ERR(comp->rx_cq)) { 125 if (IS_ERR(comp->cq)) {
126 comp->rx_cq = NULL; 126 comp->cq = NULL;
127 goto cq_err; 127 goto cq_err;
128 } 128 }
129 129
130 comp->tx_cq = ib_create_cq(device->ib_device, NULL, 130 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
131 iser_cq_event_callback,
132 (void *)comp,
133 ISER_MAX_TX_CQ_LEN, i);
134 if (IS_ERR(comp->tx_cq)) {
135 comp->tx_cq = NULL;
136 goto cq_err;
137 }
138
139 if (ib_req_notify_cq(comp->rx_cq, IB_CQ_NEXT_COMP))
140 goto cq_err; 131 goto cq_err;
141 132
142 tasklet_init(&comp->tasklet, iser_cq_tasklet_fn, 133 tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
@@ -165,10 +156,8 @@ cq_err:
165 for (i = 0; i < device->comps_used; i++) { 156 for (i = 0; i < device->comps_used; i++) {
166 struct iser_comp *comp = &device->comps[i]; 157 struct iser_comp *comp = &device->comps[i];
167 158
168 if (comp->tx_cq) 159 if (comp->cq)
169 ib_destroy_cq(comp->tx_cq); 160 ib_destroy_cq(comp->cq);
170 if (comp->rx_cq)
171 ib_destroy_cq(comp->rx_cq);
172 } 161 }
173 ib_dealloc_pd(device->pd); 162 ib_dealloc_pd(device->pd);
174pd_err: 163pd_err:
@@ -189,10 +178,8 @@ static void iser_free_device_ib_res(struct iser_device *device)
189 struct iser_comp *comp = &device->comps[i]; 178 struct iser_comp *comp = &device->comps[i];
190 179
191 tasklet_kill(&comp->tasklet); 180 tasklet_kill(&comp->tasklet);
192 ib_destroy_cq(comp->tx_cq); 181 ib_destroy_cq(comp->cq);
193 ib_destroy_cq(comp->rx_cq); 182 comp->cq = NULL;
194 comp->tx_cq = NULL;
195 comp->rx_cq = NULL;
196 } 183 }
197 184
198 (void)ib_unregister_event_handler(&device->event_handler); 185 (void)ib_unregister_event_handler(&device->event_handler);
@@ -462,8 +449,8 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
462 449
463 init_attr.event_handler = iser_qp_event_callback; 450 init_attr.event_handler = iser_qp_event_callback;
464 init_attr.qp_context = (void *)ib_conn; 451 init_attr.qp_context = (void *)ib_conn;
465 init_attr.send_cq = ib_conn->comp->tx_cq; 452 init_attr.send_cq = ib_conn->comp->cq;
466 init_attr.recv_cq = ib_conn->comp->rx_cq; 453 init_attr.recv_cq = ib_conn->comp->cq;
467 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 454 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
468 init_attr.cap.max_send_sge = 2; 455 init_attr.cap.max_send_sge = 2;
469 init_attr.cap.max_recv_sge = 1; 456 init_attr.cap.max_recv_sge = 1;
@@ -641,33 +628,6 @@ void iser_conn_release(struct iser_conn *iser_conn)
641} 628}
642 629
643/** 630/**
644 * iser_poll_for_flush_errors - Don't settle for less than all.
645 * @struct ib_conn: IB context of the connection
646 *
647 * This routine is called when the QP is in error state
648 * It polls the send CQ until all flush errors are consumed and
649 * returns when all flush errors were processed.
650 */
651static void iser_poll_for_flush_errors(struct ib_conn *ib_conn)
652{
653 int count = 0;
654
655 while (ib_conn->post_recv_buf_count > 0 ||
656 atomic_read(&ib_conn->post_send_buf_count) > 0) {
657 msleep(100);
658 if (atomic_read(&ib_conn->post_send_buf_count) > 0)
659 iser_drain_tx_cq(ib_conn->comp);
660
661 count++;
662 /* Don't flood with prints */
663 if (count % 30 == 0)
664 iser_dbg("post_recv %d post_send %d",
665 ib_conn->post_recv_buf_count,
666 atomic_read(&ib_conn->post_send_buf_count));
667 }
668}
669
670/**
671 * triggers start of the disconnect procedures and wait for them to be done 631 * triggers start of the disconnect procedures and wait for them to be done
672 * Called with state mutex held 632 * Called with state mutex held
673 */ 633 */
@@ -698,7 +658,7 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
698 iser_err("Failed to disconnect, conn: 0x%p err %d\n", 658 iser_err("Failed to disconnect, conn: 0x%p err %d\n",
699 iser_conn, err); 659 iser_conn, err);
700 660
701 iser_poll_for_flush_errors(ib_conn); 661 wait_for_completion(&ib_conn->flush_comp);
702 } 662 }
703 663
704 return 1; 664 return 1;
@@ -908,6 +868,7 @@ void iser_conn_init(struct iser_conn *iser_conn)
908 iser_conn->state = ISER_CONN_INIT; 868 iser_conn->state = ISER_CONN_INIT;
909 iser_conn->ib_conn.post_recv_buf_count = 0; 869 iser_conn->ib_conn.post_recv_buf_count = 0;
910 atomic_set(&iser_conn->ib_conn.post_send_buf_count, 0); 870 atomic_set(&iser_conn->ib_conn.post_send_buf_count, 0);
871 init_completion(&iser_conn->ib_conn.flush_comp);
911 init_completion(&iser_conn->stop_completion); 872 init_completion(&iser_conn->stop_completion);
912 init_completion(&iser_conn->ib_completion); 873 init_completion(&iser_conn->ib_completion);
913 init_completion(&iser_conn->up_completion); 874 init_completion(&iser_conn->up_completion);
@@ -1156,8 +1117,30 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc)
1156} 1117}
1157 1118
1158/** 1119/**
1120 * is_iser_tx_desc - Indicate if the completion wr_id
1121 * is a TX descriptor or not.
1122 * @iser_conn: iser connection
1123 * @wr_id: completion WR identifier
1124 *
1125 * Since we cannot rely on wc opcode in FLUSH errors
1126 * we must work around it by checking if the wr_id address
1127 * falls in the iser connection rx_descs buffer. If so
1128 * it is an RX descriptor, otherwize it is a TX.
1129 */
1130static inline bool
1131is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
1132{
1133 void *start = iser_conn->rx_descs;
1134 int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
1135
1136 if (wr_id >= start && wr_id < start + len)
1137 return false;
1138
1139 return true;
1140}
1141
1142/**
1159 * iser_handle_comp_error() - Handle error completion 1143 * iser_handle_comp_error() - Handle error completion
1160 * @desc: iser TX descriptor
1161 * @ib_conn: connection RDMA resources 1144 * @ib_conn: connection RDMA resources
1162 * @wc: work completion 1145 * @wc: work completion
1163 * 1146 *
@@ -1167,8 +1150,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc)
1167 * connection is failed (in case we passed bind stage). 1150 * connection is failed (in case we passed bind stage).
1168 */ 1151 */
1169static void 1152static void
1170iser_handle_comp_error(struct iser_tx_desc *desc, 1153iser_handle_comp_error(struct ib_conn *ib_conn,
1171 struct ib_conn *ib_conn,
1172 struct ib_wc *wc) 1154 struct ib_wc *wc)
1173{ 1155{
1174 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 1156 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
@@ -1179,85 +1161,90 @@ iser_handle_comp_error(struct iser_tx_desc *desc,
1179 iscsi_conn_failure(iser_conn->iscsi_conn, 1161 iscsi_conn_failure(iser_conn->iscsi_conn,
1180 ISCSI_ERR_CONN_FAILED); 1162 ISCSI_ERR_CONN_FAILED);
1181 1163
1182 if (desc && desc->type == ISCSI_TX_DATAOUT) 1164 if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
1183 kmem_cache_free(ig.desc_cache, desc); 1165 struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
1166
1167 atomic_dec(&ib_conn->post_send_buf_count);
1168 if (desc->type == ISCSI_TX_DATAOUT)
1169 kmem_cache_free(ig.desc_cache, desc);
1170 } else {
1171 ib_conn->post_recv_buf_count--;
1172 }
1184} 1173}
1185 1174
1186static int iser_drain_tx_cq(struct iser_comp *comp) 1175/**
1176 * iser_handle_wc - handle a single work completion
1177 * @wc: work completion
1178 *
1179 * Soft-IRQ context, work completion can be either
1180 * SEND or RECV, and can turn out successful or
1181 * with error (or flush error).
1182 */
1183static void iser_handle_wc(struct ib_wc *wc)
1187{ 1184{
1188 struct ib_cq *cq = comp->tx_cq;
1189 struct ib_wc wc;
1190 struct iser_tx_desc *tx_desc;
1191 struct ib_conn *ib_conn; 1185 struct ib_conn *ib_conn;
1192 int completed_tx = 0; 1186 struct iser_tx_desc *tx_desc;
1187 struct iser_rx_desc *rx_desc;
1193 1188
1194 while (ib_poll_cq(cq, 1, &wc) == 1) { 1189 ib_conn = wc->qp->qp_context;
1195 tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; 1190 if (wc->status == IB_WC_SUCCESS) {
1196 ib_conn = wc.qp->qp_context; 1191 if (wc->opcode == IB_WC_RECV) {
1197 if (wc.status == IB_WC_SUCCESS) { 1192 rx_desc = (struct iser_rx_desc *)wc->wr_id;
1198 if (wc.opcode == IB_WC_SEND) 1193 iser_rcv_completion(rx_desc, wc->byte_len,
1199 iser_snd_completion(tx_desc, ib_conn); 1194 ib_conn);
1200 else 1195 } else
1201 iser_err("expected opcode %d got %d\n", 1196 if (wc->opcode == IB_WC_SEND) {
1202 IB_WC_SEND, wc.opcode); 1197 tx_desc = (struct iser_tx_desc *)wc->wr_id;
1198 iser_snd_completion(tx_desc, ib_conn);
1199 atomic_dec(&ib_conn->post_send_buf_count);
1203 } else { 1200 } else {
1204 iser_err("tx id %llx status %d vend_err %x\n", 1201 iser_err("Unknown wc opcode %d\n", wc->opcode);
1205 wc.wr_id, wc.status, wc.vendor_err);
1206 if (wc.wr_id != ISER_FASTREG_LI_WRID) {
1207 atomic_dec(&ib_conn->post_send_buf_count);
1208 iser_handle_comp_error(tx_desc, ib_conn, &wc);
1209 }
1210 } 1202 }
1211 completed_tx++; 1203 } else {
1204 if (wc->status != IB_WC_WR_FLUSH_ERR)
1205 iser_err("wr id %llx status %d vend_err %x\n",
1206 wc->wr_id, wc->status, wc->vendor_err);
1207 else
1208 iser_dbg("flush error: wr id %llx\n", wc->wr_id);
1209
1210 if (wc->wr_id != ISER_FASTREG_LI_WRID)
1211 iser_handle_comp_error(ib_conn, wc);
1212
1213 /* complete in case all flush errors were consumed */
1214 if (ib_conn->post_recv_buf_count == 0 &&
1215 atomic_read(&ib_conn->post_send_buf_count) == 0)
1216 complete(&ib_conn->flush_comp);
1212 } 1217 }
1213 return completed_tx;
1214} 1218}
1215 1219
1216 1220/**
1221 * iser_cq_tasklet_fn - iSER completion polling loop
1222 * @data: iSER completion context
1223 *
1224 * Soft-IRQ context, polling connection CQ until
1225 * either CQ was empty or we exausted polling budget
1226 */
1217static void iser_cq_tasklet_fn(unsigned long data) 1227static void iser_cq_tasklet_fn(unsigned long data)
1218{ 1228{
1219 struct iser_comp *comp = (struct iser_comp *)data; 1229 struct iser_comp *comp = (struct iser_comp *)data;
1220 struct ib_cq *cq = comp->rx_cq; 1230 struct ib_cq *cq = comp->cq;
1221 struct ib_wc wc; 1231 struct ib_wc wc;
1222 struct iser_rx_desc *desc; 1232 int completed = 0;
1223 unsigned long xfer_len;
1224 struct ib_conn *ib_conn;
1225 int completed_tx, completed_rx = 0;
1226
1227 /* First do tx drain, so in a case where we have rx flushes and a successful
1228 * tx completion we will still go through completion error handling.
1229 */
1230 completed_tx = iser_drain_tx_cq(comp);
1231 1233
1232 while (ib_poll_cq(cq, 1, &wc) == 1) { 1234 while (ib_poll_cq(cq, 1, &wc) == 1) {
1233 desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; 1235 iser_handle_wc(&wc);
1234 BUG_ON(desc == NULL); 1236
1235 ib_conn = wc.qp->qp_context; 1237 if (++completed >= iser_cq_poll_limit)
1236 if (wc.status == IB_WC_SUCCESS) {
1237 if (wc.opcode == IB_WC_RECV) {
1238 xfer_len = (unsigned long)wc.byte_len;
1239 iser_rcv_completion(desc, xfer_len, ib_conn);
1240 } else
1241 iser_err("expected opcode %d got %d\n",
1242 IB_WC_RECV, wc.opcode);
1243 } else {
1244 if (wc.status != IB_WC_WR_FLUSH_ERR)
1245 iser_err("rx id %llx status %d vend_err %x\n",
1246 wc.wr_id, wc.status, wc.vendor_err);
1247 ib_conn->post_recv_buf_count--;
1248 iser_handle_comp_error(NULL, ib_conn, &wc);
1249 }
1250 completed_rx++;
1251 if (!(completed_rx & 63))
1252 completed_tx += iser_drain_tx_cq(comp);
1253 if (completed_rx >= iser_cq_poll_limit)
1254 break; 1238 break;
1255 } 1239 }
1256 /* #warning "it is assumed here that arming CQ only once its empty" * 1240
1257 * " would not cause interrupts to be missed" */ 1241 /*
1242 * It is assumed here that arming CQ only once its empty
1243 * would not cause interrupts to be missed.
1244 */
1258 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 1245 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1259 1246
1260 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 1247 iser_dbg("got %d completions\n", completed);
1261} 1248}
1262 1249
1263static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 1250static void iser_cq_callback(struct ib_cq *cq, void *cq_context)