aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorOr Gerlitz <ogerlitz@voltaire.com>2010-02-08 08:17:42 -0500
committerRoland Dreier <rolandd@cisco.com>2010-02-24 12:41:10 -0500
commitbcc60c381d857ced653e912cbe6121294773e147 (patch)
tree543a2d483a1110f9666ae5503d9e3c53a8782e0c /drivers/infiniband
parent1cef4659850eeb862c248c7670e404d7a1711ed1 (diff)
IB/iser: New receive buffer posting logic
Currently, the recv buffer posting logic is based on the transactional nature of iSER which allows for posting a buffer before sending a PDU. Change this to post only when the number of outstanding recv buffers is below a water mark and in a batched manner, thus simplifying and optimizing the data path. Use a pre-allocated ring of recv buffers instead of allocating from kmem cache. A special treatment is given to the login response buffer whose size must be 8K unlike the size of buffers used for any other purpose which is 128 bytes. Signed-off-by: Or Gerlitz <ogerlitz@voltaire.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c2
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h40
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c235
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c134
4 files changed, 235 insertions, 176 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 5f7a6fca0a4d..355470e7e904 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -283,7 +283,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
283 * due to issues with the login code re iser sematics 283 * due to issues with the login code re iser sematics
284 * this not set in iscsi_conn_setup - FIXME 284 * this not set in iscsi_conn_setup - FIXME
285 */ 285 */
286 conn->max_recv_dlength = 128; 286 conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN;
287 287
288 iser_conn = conn->dd_data; 288 iser_conn = conn->dd_data;
289 conn->dd_data = iser_conn; 289 conn->dd_data = iser_conn;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index e8dfdcfa1daf..83effb610594 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -102,9 +102,9 @@
102#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * 102#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
103 * SCSI_TMFUNC(2), LOGOUT(1) */ 103 * SCSI_TMFUNC(2), LOGOUT(1) */
104 104
105#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \ 105#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
106 ISER_MAX_RX_MISC_PDUS + \ 106
107 ISER_MAX_TX_MISC_PDUS) 107#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
108 108
109/* the max TX (send) WR supported by the iSER QP is defined by * 109/* the max TX (send) WR supported by the iSER QP is defined by *
110 * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * 110 * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
@@ -132,6 +132,12 @@ struct iser_hdr {
132 __be64 read_va; 132 __be64 read_va;
133} __attribute__((packed)); 133} __attribute__((packed));
134 134
135/* Constant PDU lengths calculations */
136#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
137
138#define ISER_RECV_DATA_SEG_LEN 128
139#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
140#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
135 141
136/* Length of an object name string */ 142/* Length of an object name string */
137#define ISER_OBJECT_NAME_SIZE 64 143#define ISER_OBJECT_NAME_SIZE 64
@@ -212,7 +218,6 @@ struct iser_dto {
212}; 218};
213 219
214enum iser_desc_type { 220enum iser_desc_type {
215 ISCSI_RX,
216 ISCSI_TX_CONTROL , 221 ISCSI_TX_CONTROL ,
217 ISCSI_TX_SCSI_COMMAND, 222 ISCSI_TX_SCSI_COMMAND,
218 ISCSI_TX_DATAOUT 223 ISCSI_TX_DATAOUT
@@ -228,6 +233,17 @@ struct iser_desc {
228 struct iser_dto dto; 233 struct iser_dto dto;
229}; 234};
230 235
236#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
237 sizeof(u64) + sizeof(struct ib_sge)))
238struct iser_rx_desc {
239 struct iser_hdr iser_header;
240 struct iscsi_hdr iscsi_header;
241 char data[ISER_RECV_DATA_SEG_LEN];
242 u64 dma_addr;
243 struct ib_sge rx_sg;
244 char pad[ISER_RX_PAD_SIZE];
245} __attribute__((packed));
246
231struct iser_device { 247struct iser_device {
232 struct ib_device *ib_device; 248 struct ib_device *ib_device;
233 struct ib_pd *pd; 249 struct ib_pd *pd;
@@ -256,6 +272,12 @@ struct iser_conn {
256 struct iser_page_vec *page_vec; /* represents SG to fmr maps* 272 struct iser_page_vec *page_vec; /* represents SG to fmr maps*
257 * maps serialized as tx is*/ 273 * maps serialized as tx is*/
258 struct list_head conn_list; /* entry in ig conn list */ 274 struct list_head conn_list; /* entry in ig conn list */
275
276 char *login_buf;
277 u64 login_dma;
278 unsigned int rx_desc_head;
279 struct iser_rx_desc *rx_descs;
280 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
259}; 281};
260 282
261struct iscsi_iser_conn { 283struct iscsi_iser_conn {
@@ -319,8 +341,9 @@ void iser_conn_put(struct iser_conn *ib_conn);
319 341
320void iser_conn_terminate(struct iser_conn *ib_conn); 342void iser_conn_terminate(struct iser_conn *ib_conn);
321 343
322void iser_rcv_completion(struct iser_desc *desc, 344void iser_rcv_completion(struct iser_rx_desc *desc,
323 unsigned long dto_xfer_len); 345 unsigned long dto_xfer_len,
346 struct iser_conn *ib_conn);
324 347
325void iser_snd_completion(struct iser_desc *desc); 348void iser_snd_completion(struct iser_desc *desc);
326 349
@@ -332,6 +355,8 @@ void iser_dto_buffs_release(struct iser_dto *dto);
332 355
333int iser_regd_buff_release(struct iser_regd_buf *regd_buf); 356int iser_regd_buff_release(struct iser_regd_buf *regd_buf);
334 357
358void iser_free_rx_descriptors(struct iser_conn *ib_conn);
359
335void iser_reg_single(struct iser_device *device, 360void iser_reg_single(struct iser_device *device,
336 struct iser_regd_buf *regd_buf, 361 struct iser_regd_buf *regd_buf,
337 enum dma_data_direction direction); 362 enum dma_data_direction direction);
@@ -353,7 +378,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
353 378
354void iser_unreg_mem(struct iser_mem_reg *mem_reg); 379void iser_unreg_mem(struct iser_mem_reg *mem_reg);
355 380
356int iser_post_recv(struct iser_desc *rx_desc); 381int iser_post_recvl(struct iser_conn *ib_conn);
382int iser_post_recvm(struct iser_conn *ib_conn, int count);
357int iser_post_send(struct iser_desc *tx_desc); 383int iser_post_send(struct iser_desc *tx_desc);
358 384
359int iser_conn_state_comp(struct iser_conn *ib_conn, 385int iser_conn_state_comp(struct iser_conn *ib_conn,
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 5f42fbe3080c..6d9bbe6363ee 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -39,9 +39,6 @@
39 39
40#include "iscsi_iser.h" 40#include "iscsi_iser.h"
41 41
42/* Constant PDU lengths calculations */
43#define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \
44 sizeof (struct iscsi_hdr))
45 42
46/* iser_dto_add_regd_buff - increments the reference count for * 43/* iser_dto_add_regd_buff - increments the reference count for *
47 * the registered buffer & adds it to the DTO object */ 44 * the registered buffer & adds it to the DTO object */
@@ -172,78 +169,6 @@ iser_prepare_write_cmd(struct iscsi_task *task,
172 return 0; 169 return 0;
173} 170}
174 171
175/**
176 * iser_post_receive_control - allocates, initializes and posts receive DTO.
177 */
178static int iser_post_receive_control(struct iscsi_conn *conn)
179{
180 struct iscsi_iser_conn *iser_conn = conn->dd_data;
181 struct iser_desc *rx_desc;
182 struct iser_regd_buf *regd_hdr;
183 struct iser_regd_buf *regd_data;
184 struct iser_dto *recv_dto = NULL;
185 struct iser_device *device = iser_conn->ib_conn->device;
186 int rx_data_size, err = 0;
187
188 rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
189 if (rx_desc == NULL) {
190 iser_err("Failed to alloc desc for post recv\n");
191 return -ENOMEM;
192 }
193 rx_desc->type = ISCSI_RX;
194
195 /* for the login sequence we must support rx of upto 8K; login is done
196 * after conn create/bind (connect) and conn stop/bind (reconnect),
197 * what's common for both schemes is that the connection is not started
198 */
199 if (conn->c_stage != ISCSI_CONN_STARTED)
200 rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN;
201 else /* FIXME till user space sets conn->max_recv_dlength correctly */
202 rx_data_size = 128;
203
204 rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
205 if (rx_desc->data == NULL) {
206 iser_err("Failed to alloc data buf for post recv\n");
207 err = -ENOMEM;
208 goto post_rx_kmalloc_failure;
209 }
210
211 recv_dto = &rx_desc->dto;
212 recv_dto->ib_conn = iser_conn->ib_conn;
213 recv_dto->regd_vector_len = 0;
214
215 regd_hdr = &rx_desc->hdr_regd_buf;
216 memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
217 regd_hdr->device = device;
218 regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
219 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
220
221 iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE);
222
223 iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
224
225 regd_data = &rx_desc->data_regd_buf;
226 memset(regd_data, 0, sizeof(struct iser_regd_buf));
227 regd_data->device = device;
228 regd_data->virt_addr = rx_desc->data;
229 regd_data->data_size = rx_data_size;
230
231 iser_reg_single(device, regd_data, DMA_FROM_DEVICE);
232
233 iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
234
235 err = iser_post_recv(rx_desc);
236 if (!err)
237 return 0;
238
239 /* iser_post_recv failed */
240 iser_dto_buffs_release(recv_dto);
241 kfree(rx_desc->data);
242post_rx_kmalloc_failure:
243 kmem_cache_free(ig.desc_cache, rx_desc);
244 return err;
245}
246
247/* creates a new tx descriptor and adds header regd buffer */ 172/* creates a new tx descriptor and adds header regd buffer */
248static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, 173static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
249 struct iser_desc *tx_desc) 174 struct iser_desc *tx_desc)
@@ -254,7 +179,7 @@ static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
254 memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 179 memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
255 regd_hdr->device = iser_conn->ib_conn->device; 180 regd_hdr->device = iser_conn->ib_conn->device;
256 regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ 181 regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */
257 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; 182 regd_hdr->data_size = ISER_HEADERS_LEN;
258 183
259 send_dto->ib_conn = iser_conn->ib_conn; 184 send_dto->ib_conn = iser_conn->ib_conn;
260 send_dto->notify_enable = 1; 185 send_dto->notify_enable = 1;
@@ -266,6 +191,72 @@ static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
266 iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0); 191 iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0);
267} 192}
268 193
194int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
195{
196 int i, j;
197 u64 dma_addr;
198 struct iser_rx_desc *rx_desc;
199 struct ib_sge *rx_sg;
200 struct iser_device *device = ib_conn->device;
201
202 ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
203 sizeof(struct iser_rx_desc), GFP_KERNEL);
204 if (!ib_conn->rx_descs)
205 goto rx_desc_alloc_fail;
206
207 rx_desc = ib_conn->rx_descs;
208
209 for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) {
210 dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
211 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
212 if (ib_dma_mapping_error(device->ib_device, dma_addr))
213 goto rx_desc_dma_map_failed;
214
215 rx_desc->dma_addr = dma_addr;
216
217 rx_sg = &rx_desc->rx_sg;
218 rx_sg->addr = rx_desc->dma_addr;
219 rx_sg->length = ISER_RX_PAYLOAD_SIZE;
220 rx_sg->lkey = device->mr->lkey;
221 }
222
223 ib_conn->rx_desc_head = 0;
224 return 0;
225
226rx_desc_dma_map_failed:
227 rx_desc = ib_conn->rx_descs;
228 for (j = 0; j < i; j++, rx_desc++)
229 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
230 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
231 kfree(ib_conn->rx_descs);
232 ib_conn->rx_descs = NULL;
233rx_desc_alloc_fail:
234 iser_err("failed allocating rx descriptors / data buffers\n");
235 return -ENOMEM;
236}
237
238void iser_free_rx_descriptors(struct iser_conn *ib_conn)
239{
240 int i;
241 struct iser_rx_desc *rx_desc;
242 struct iser_device *device = ib_conn->device;
243
244 if (ib_conn->login_buf) {
245 ib_dma_unmap_single(device->ib_device, ib_conn->login_dma,
246 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
247 kfree(ib_conn->login_buf);
248 }
249
250 if (!ib_conn->rx_descs)
251 return;
252
253 rx_desc = ib_conn->rx_descs;
254 for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
255 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
256 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
257 kfree(ib_conn->rx_descs);
258}
259
269/** 260/**
270 * iser_conn_set_full_featured_mode - (iSER API) 261 * iser_conn_set_full_featured_mode - (iSER API)
271 */ 262 */
@@ -273,27 +264,20 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
273{ 264{
274 struct iscsi_iser_conn *iser_conn = conn->dd_data; 265 struct iscsi_iser_conn *iser_conn = conn->dd_data;
275 266
276 int i; 267 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
277 /* no need to keep it in a var, we are after login so if this should
278 * be negotiated, by now the result should be available here */
279 int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
280
281 iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
282 268
283 /* Check that there is no posted recv or send buffers left - */ 269 /* Check that there is no posted recv or send buffers left - */
284 /* they must be consumed during the login phase */ 270 /* they must be consumed during the login phase */
285 BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0); 271 BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0);
286 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); 272 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
287 273
274 if (iser_alloc_rx_descriptors(iser_conn->ib_conn))
275 return -ENOMEM;
276
288 /* Initial post receive buffers */ 277 /* Initial post receive buffers */
289 for (i = 0; i < initial_post_recv_bufs_num; i++) { 278 if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
290 if (iser_post_receive_control(conn) != 0) { 279 return -ENOMEM;
291 iser_err("Failed to post recv bufs at:%d conn:0x%p\n", 280
292 i, conn);
293 return -ENOMEM;
294 }
295 }
296 iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn);
297 return 0; 281 return 0;
298} 282}
299 283
@@ -321,7 +305,7 @@ int iser_send_command(struct iscsi_conn *conn,
321 struct iscsi_iser_task *iser_task = task->dd_data; 305 struct iscsi_iser_task *iser_task = task->dd_data;
322 struct iser_dto *send_dto = NULL; 306 struct iser_dto *send_dto = NULL;
323 unsigned long edtl; 307 unsigned long edtl;
324 int err = 0; 308 int err;
325 struct iser_data_buf *data_buf; 309 struct iser_data_buf *data_buf;
326 struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr; 310 struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr;
327 struct scsi_cmnd *sc = task->sc; 311 struct scsi_cmnd *sc = task->sc;
@@ -371,12 +355,6 @@ int iser_send_command(struct iscsi_conn *conn,
371 iser_reg_single(iser_conn->ib_conn->device, 355 iser_reg_single(iser_conn->ib_conn->device,
372 send_dto->regd[0], DMA_TO_DEVICE); 356 send_dto->regd[0], DMA_TO_DEVICE);
373 357
374 if (iser_post_receive_control(conn) != 0) {
375 iser_err("post_recv failed!\n");
376 err = -ENOMEM;
377 goto send_command_error;
378 }
379
380 iser_task->status = ISER_TASK_STATUS_STARTED; 358 iser_task->status = ISER_TASK_STATUS_STARTED;
381 359
382 err = iser_post_send(&iser_task->desc); 360 err = iser_post_send(&iser_task->desc);
@@ -474,7 +452,7 @@ int iser_send_control(struct iscsi_conn *conn,
474 struct iser_desc *mdesc = &iser_task->desc; 452 struct iser_desc *mdesc = &iser_task->desc;
475 struct iser_dto *send_dto = NULL; 453 struct iser_dto *send_dto = NULL;
476 unsigned long data_seg_len; 454 unsigned long data_seg_len;
477 int err = 0; 455 int err;
478 struct iser_regd_buf *regd_buf; 456 struct iser_regd_buf *regd_buf;
479 struct iser_device *device; 457 struct iser_device *device;
480 458
@@ -511,10 +489,10 @@ int iser_send_control(struct iscsi_conn *conn,
511 data_seg_len); 489 data_seg_len);
512 } 490 }
513 491
514 if (iser_post_receive_control(conn) != 0) { 492 if (task == conn->login_task) {
515 iser_err("post_rcv_buff failed!\n"); 493 err = iser_post_recvl(iser_conn->ib_conn);
516 err = -ENOMEM; 494 if (err)
517 goto send_control_error; 495 goto send_control_error;
518 } 496 }
519 497
520 err = iser_post_send(mdesc); 498 err = iser_post_send(mdesc);
@@ -530,27 +508,34 @@ send_control_error:
530/** 508/**
531 * iser_rcv_dto_completion - recv DTO completion 509 * iser_rcv_dto_completion - recv DTO completion
532 */ 510 */
533void iser_rcv_completion(struct iser_desc *rx_desc, 511void iser_rcv_completion(struct iser_rx_desc *rx_desc,
534 unsigned long dto_xfer_len) 512 unsigned long rx_xfer_len,
513 struct iser_conn *ib_conn)
535{ 514{
536 struct iser_dto *dto = &rx_desc->dto; 515 struct iscsi_iser_conn *conn = ib_conn->iser_conn;
537 struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn;
538 struct iscsi_task *task; 516 struct iscsi_task *task;
539 struct iscsi_iser_task *iser_task; 517 struct iscsi_iser_task *iser_task;
540 struct iscsi_hdr *hdr; 518 struct iscsi_hdr *hdr;
541 char *rx_data = NULL;
542 int rx_data_len = 0;
543 unsigned char opcode; 519 unsigned char opcode;
520 u64 rx_dma;
521 int rx_buflen, outstanding, count, err;
522
523 /* differentiate between login to all other PDUs */
524 if ((char *)rx_desc == ib_conn->login_buf) {
525 rx_dma = ib_conn->login_dma;
526 rx_buflen = ISER_RX_LOGIN_SIZE;
527 } else {
528 rx_dma = rx_desc->dma_addr;
529 rx_buflen = ISER_RX_PAYLOAD_SIZE;
530 }
544 531
545 hdr = &rx_desc->iscsi_header; 532 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
533 rx_buflen, DMA_FROM_DEVICE);
546 534
547 iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt); 535 hdr = &rx_desc->iscsi_header;
548 536
549 if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */ 537 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
550 rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN; 538 hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
551 rx_data = dto->regd[1]->virt_addr;
552 rx_data += dto->offset[1];
553 }
554 539
555 opcode = hdr->opcode & ISCSI_OPCODE_MASK; 540 opcode = hdr->opcode & ISCSI_OPCODE_MASK;
556 541
@@ -573,18 +558,30 @@ void iser_rcv_completion(struct iser_desc *rx_desc,
573 iscsi_put_task(task); 558 iscsi_put_task(task);
574 } 559 }
575 } 560 }
576 iser_dto_buffs_release(dto);
577 561
578 iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); 562 iscsi_iser_recv(conn->iscsi_conn, hdr,
563 rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN);
579 564
580 kfree(rx_desc->data); 565 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
581 kmem_cache_free(ig.desc_cache, rx_desc); 566 rx_buflen, DMA_FROM_DEVICE);
582 567
583 /* decrementing conn->post_recv_buf_count only --after-- freeing the * 568 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
584 * task eliminates the need to worry on tasks which are completed in * 569 * task eliminates the need to worry on tasks which are completed in *
585 * parallel to the execution of iser_conn_term. So the code that waits * 570 * parallel to the execution of iser_conn_term. So the code that waits *
586 * for the posted rx bufs refcount to become zero handles everything */ 571 * for the posted rx bufs refcount to become zero handles everything */
587 atomic_dec(&conn->ib_conn->post_recv_buf_count); 572 atomic_dec(&conn->ib_conn->post_recv_buf_count);
573
574 if (rx_dma == ib_conn->login_dma)
575 return;
576
577 outstanding = atomic_read(&ib_conn->post_recv_buf_count);
578 if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
579 count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
580 ISER_MIN_POSTED_RX);
581 err = iser_post_recvm(ib_conn, count);
582 if (err)
583 iser_err("posting %d rx bufs err %d\n", count, err);
584 }
588} 585}
589 586
590void iser_snd_completion(struct iser_desc *tx_desc) 587void iser_snd_completion(struct iser_desc *tx_desc)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 7092503a10e3..89b956044060 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -129,13 +129,23 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
129{ 129{
130 struct iser_device *device; 130 struct iser_device *device;
131 struct ib_qp_init_attr init_attr; 131 struct ib_qp_init_attr init_attr;
132 int ret; 132 int ret = -ENOMEM;
133 struct ib_fmr_pool_param params; 133 struct ib_fmr_pool_param params;
134 134
135 BUG_ON(ib_conn->device == NULL); 135 BUG_ON(ib_conn->device == NULL);
136 136
137 device = ib_conn->device; 137 device = ib_conn->device;
138 138
139 ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
140 if (!ib_conn->login_buf) {
141 goto alloc_err;
142 ret = -ENOMEM;
143 }
144
145 ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device,
146 (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE,
147 DMA_FROM_DEVICE);
148
139 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + 149 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
140 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), 150 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
141 GFP_KERNEL); 151 GFP_KERNEL);
@@ -174,7 +184,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
174 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 184 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
175 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 185 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
176 init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; 186 init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN;
177 init_attr.cap.max_recv_sge = 2; 187 init_attr.cap.max_recv_sge = 1;
178 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 188 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
179 init_attr.qp_type = IB_QPT_RC; 189 init_attr.qp_type = IB_QPT_RC;
180 190
@@ -192,6 +202,7 @@ qp_err:
192 (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); 202 (void)ib_destroy_fmr_pool(ib_conn->fmr_pool);
193fmr_pool_err: 203fmr_pool_err:
194 kfree(ib_conn->page_vec); 204 kfree(ib_conn->page_vec);
205 kfree(ib_conn->login_buf);
195alloc_err: 206alloc_err:
196 iser_err("unable to alloc mem or create resource, err %d\n", ret); 207 iser_err("unable to alloc mem or create resource, err %d\n", ret);
197 return ret; 208 return ret;
@@ -314,7 +325,7 @@ static void iser_conn_release(struct iser_conn *ib_conn)
314 mutex_lock(&ig.connlist_mutex); 325 mutex_lock(&ig.connlist_mutex);
315 list_del(&ib_conn->conn_list); 326 list_del(&ib_conn->conn_list);
316 mutex_unlock(&ig.connlist_mutex); 327 mutex_unlock(&ig.connlist_mutex);
317 328 iser_free_rx_descriptors(ib_conn);
318 iser_free_ib_conn_res(ib_conn); 329 iser_free_ib_conn_res(ib_conn);
319 ib_conn->device = NULL; 330 ib_conn->device = NULL;
320 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 331 /* on EVENT_ADDR_ERROR there's no device yet for this conn */
@@ -625,6 +636,60 @@ void iser_unreg_mem(struct iser_mem_reg *reg)
625 reg->mem_h = NULL; 636 reg->mem_h = NULL;
626} 637}
627 638
639int iser_post_recvl(struct iser_conn *ib_conn)
640{
641 struct ib_recv_wr rx_wr, *rx_wr_failed;
642 struct ib_sge sge;
643 int ib_ret;
644
645 sge.addr = ib_conn->login_dma;
646 sge.length = ISER_RX_LOGIN_SIZE;
647 sge.lkey = ib_conn->device->mr->lkey;
648
649 rx_wr.wr_id = (unsigned long)ib_conn->login_buf;
650 rx_wr.sg_list = &sge;
651 rx_wr.num_sge = 1;
652 rx_wr.next = NULL;
653
654 atomic_inc(&ib_conn->post_recv_buf_count);
655 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
656 if (ib_ret) {
657 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
658 atomic_dec(&ib_conn->post_recv_buf_count);
659 }
660 return ib_ret;
661}
662
663int iser_post_recvm(struct iser_conn *ib_conn, int count)
664{
665 struct ib_recv_wr *rx_wr, *rx_wr_failed;
666 int i, ib_ret;
667 unsigned int my_rx_head = ib_conn->rx_desc_head;
668 struct iser_rx_desc *rx_desc;
669
670 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
671 rx_desc = &ib_conn->rx_descs[my_rx_head];
672 rx_wr->wr_id = (unsigned long)rx_desc;
673 rx_wr->sg_list = &rx_desc->rx_sg;
674 rx_wr->num_sge = 1;
675 rx_wr->next = rx_wr + 1;
676 my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
677 }
678
679 rx_wr--;
680 rx_wr->next = NULL; /* mark end of work requests list */
681
682 atomic_add(count, &ib_conn->post_recv_buf_count);
683 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
684 if (ib_ret) {
685 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
686 atomic_sub(count, &ib_conn->post_recv_buf_count);
687 } else
688 ib_conn->rx_desc_head = my_rx_head;
689 return ib_ret;
690}
691
692
628/** 693/**
629 * iser_dto_to_iov - builds IOV from a dto descriptor 694 * iser_dto_to_iov - builds IOV from a dto descriptor
630 */ 695 */
@@ -665,39 +730,6 @@ static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_le
665 } 730 }
666} 731}
667 732
668/**
669 * iser_post_recv - Posts a receive buffer.
670 *
671 * returns 0 on success, -1 on failure
672 */
673int iser_post_recv(struct iser_desc *rx_desc)
674{
675 int ib_ret, ret_val = 0;
676 struct ib_recv_wr recv_wr, *recv_wr_failed;
677 struct ib_sge iov[2];
678 struct iser_conn *ib_conn;
679 struct iser_dto *recv_dto = &rx_desc->dto;
680
681 /* Retrieve conn */
682 ib_conn = recv_dto->ib_conn;
683
684 iser_dto_to_iov(recv_dto, iov, 2);
685
686 recv_wr.next = NULL;
687 recv_wr.sg_list = iov;
688 recv_wr.num_sge = recv_dto->regd_vector_len;
689 recv_wr.wr_id = (unsigned long)rx_desc;
690
691 atomic_inc(&ib_conn->post_recv_buf_count);
692 ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed);
693 if (ib_ret) {
694 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
695 atomic_dec(&ib_conn->post_recv_buf_count);
696 ret_val = -1;
697 }
698
699 return ret_val;
700}
701 733
702/** 734/**
703 * iser_start_send - Initiate a Send DTO operation 735 * iser_start_send - Initiate a Send DTO operation
@@ -737,18 +769,17 @@ int iser_post_send(struct iser_desc *tx_desc)
737 return ret_val; 769 return ret_val;
738} 770}
739 771
740static void iser_handle_comp_error(struct iser_desc *desc) 772static void iser_handle_comp_error(struct iser_desc *desc,
773 struct iser_conn *ib_conn)
741{ 774{
742 struct iser_dto *dto = &desc->dto; 775 struct iser_rx_desc *rx = (struct iser_rx_desc *)desc;
743 struct iser_conn *ib_conn = dto->ib_conn; 776 struct iser_rx_desc *rx_first = ib_conn->rx_descs;
744 777 struct iser_rx_desc *rx_last = rx_first + (ISER_QP_MAX_RECV_DTOS - 1);
745 iser_dto_buffs_release(dto);
746 778
747 if (desc->type == ISCSI_RX) { 779 if ((char *)desc == ib_conn->login_buf ||
748 kfree(desc->data); 780 (rx_first <= rx && rx <= rx_last))
749 kmem_cache_free(ig.desc_cache, desc);
750 atomic_dec(&ib_conn->post_recv_buf_count); 781 atomic_dec(&ib_conn->post_recv_buf_count);
751 } else { /* type is TX control/command/dataout */ 782 else { /* type is TX control/command/dataout */
752 if (desc->type == ISCSI_TX_DATAOUT) 783 if (desc->type == ISCSI_TX_DATAOUT)
753 kmem_cache_free(ig.desc_cache, desc); 784 kmem_cache_free(ig.desc_cache, desc);
754 atomic_dec(&ib_conn->post_send_buf_count); 785 atomic_dec(&ib_conn->post_send_buf_count);
@@ -780,20 +811,25 @@ static void iser_cq_tasklet_fn(unsigned long data)
780 struct ib_wc wc; 811 struct ib_wc wc;
781 struct iser_desc *desc; 812 struct iser_desc *desc;
782 unsigned long xfer_len; 813 unsigned long xfer_len;
814 struct iser_conn *ib_conn;
783 815
784 while (ib_poll_cq(cq, 1, &wc) == 1) { 816 while (ib_poll_cq(cq, 1, &wc) == 1) {
785 desc = (struct iser_desc *) (unsigned long) wc.wr_id; 817 desc = (struct iser_desc *) (unsigned long) wc.wr_id;
786 BUG_ON(desc == NULL); 818 BUG_ON(desc == NULL);
819 ib_conn = wc.qp->qp_context;
787 820
788 if (wc.status == IB_WC_SUCCESS) { 821 if (wc.status == IB_WC_SUCCESS) {
789 if (desc->type == ISCSI_RX) { 822 if (wc.opcode == IB_WC_RECV) {
790 xfer_len = (unsigned long)wc.byte_len; 823 xfer_len = (unsigned long)wc.byte_len;
791 iser_rcv_completion(desc, xfer_len); 824 iser_rcv_completion((struct iser_rx_desc *)desc,
825 xfer_len, ib_conn);
792 } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ 826 } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */
793 iser_snd_completion(desc); 827 iser_snd_completion(desc);
794 } else { 828 } else {
795 iser_err("comp w. error op %d status %d\n",desc->type,wc.status); 829 if (wc.status != IB_WC_WR_FLUSH_ERR)
796 iser_handle_comp_error(desc); 830 iser_err("id %llx status %d vend_err %x\n",
831 wc.wr_id, wc.status, wc.vendor_err);
832 iser_handle_comp_error(desc, ib_conn);
797 } 833 }
798 } 834 }
799 /* #warning "it is assumed here that arming CQ only once its empty" * 835 /* #warning "it is assumed here that arming CQ only once its empty" *