aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2010-03-02 02:51:56 -0500
committerRoland Dreier <rolandd@cisco.com>2010-03-02 02:51:56 -0500
commit5c2187f0a184d6c5ec87aab403b79a8bb24a7988 (patch)
tree18d9e853fa3980f8f96e7fbc374bc64ea4c08d7f
parent7f1681622a7b70b083f0034afb837c06f8ba2612 (diff)
parent88ec415772144f4fc4a50b123bb6200de686898d (diff)
Merge branch 'iser' into for-next
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c47
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h97
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c506
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c64
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c281
5 files changed, 391 insertions, 604 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 5f7a6fca0a4d..71237f8f78f7 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -128,6 +128,28 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
128 return 0; 128 return 0;
129} 129}
130 130
131int iser_initialize_task_headers(struct iscsi_task *task,
132 struct iser_tx_desc *tx_desc)
133{
134 struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
135 struct iser_device *device = iser_conn->ib_conn->device;
136 struct iscsi_iser_task *iser_task = task->dd_data;
137 u64 dma_addr;
138
139 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
140 ISER_HEADERS_LEN, DMA_TO_DEVICE);
141 if (ib_dma_mapping_error(device->ib_device, dma_addr))
142 return -ENOMEM;
143
144 tx_desc->dma_addr = dma_addr;
145 tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
146 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
147 tx_desc->tx_sg[0].lkey = device->mr->lkey;
148
149 iser_task->headers_initialized = 1;
150 iser_task->iser_conn = iser_conn;
151 return 0;
152}
131/** 153/**
132 * iscsi_iser_task_init - Initialize task 154 * iscsi_iser_task_init - Initialize task
133 * @task: iscsi task 155 * @task: iscsi task
@@ -137,17 +159,17 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
137static int 159static int
138iscsi_iser_task_init(struct iscsi_task *task) 160iscsi_iser_task_init(struct iscsi_task *task)
139{ 161{
140 struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
141 struct iscsi_iser_task *iser_task = task->dd_data; 162 struct iscsi_iser_task *iser_task = task->dd_data;
142 163
164 if (!iser_task->headers_initialized)
165 if (iser_initialize_task_headers(task, &iser_task->desc))
166 return -ENOMEM;
167
143 /* mgmt task */ 168 /* mgmt task */
144 if (!task->sc) { 169 if (!task->sc)
145 iser_task->desc.data = task->data;
146 return 0; 170 return 0;
147 }
148 171
149 iser_task->command_sent = 0; 172 iser_task->command_sent = 0;
150 iser_task->iser_conn = iser_conn;
151 iser_task_rdma_init(iser_task); 173 iser_task_rdma_init(iser_task);
152 return 0; 174 return 0;
153} 175}
@@ -168,7 +190,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task)
168{ 190{
169 int error = 0; 191 int error = 0;
170 192
171 iser_dbg("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); 193 iser_dbg("mtask xmit [cid %d itt 0x%x]\n", conn->id, task->itt);
172 194
173 error = iser_send_control(conn, task); 195 error = iser_send_control(conn, task);
174 196
@@ -178,9 +200,6 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task)
178 * - if yes, the task is recycled at iscsi_complete_pdu 200 * - if yes, the task is recycled at iscsi_complete_pdu
179 * - if no, the task is recycled at iser_snd_completion 201 * - if no, the task is recycled at iser_snd_completion
180 */ 202 */
181 if (error && error != -ENOBUFS)
182 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
183
184 return error; 203 return error;
185} 204}
186 205
@@ -232,7 +251,7 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
232 task->imm_count, task->unsol_r2t.data_length); 251 task->imm_count, task->unsol_r2t.data_length);
233 } 252 }
234 253
235 iser_dbg("task deq [cid %d itt 0x%x]\n", 254 iser_dbg("ctask xmit [cid %d itt 0x%x]\n",
236 conn->id, task->itt); 255 conn->id, task->itt);
237 256
238 /* Send the cmd PDU */ 257 /* Send the cmd PDU */
@@ -248,8 +267,6 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
248 error = iscsi_iser_task_xmit_unsol_data(conn, task); 267 error = iscsi_iser_task_xmit_unsol_data(conn, task);
249 268
250 iscsi_iser_task_xmit_exit: 269 iscsi_iser_task_xmit_exit:
251 if (error && error != -ENOBUFS)
252 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
253 return error; 270 return error;
254} 271}
255 272
@@ -283,7 +300,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
283 * due to issues with the login code re iser sematics 300 * due to issues with the login code re iser sematics
284 * this not set in iscsi_conn_setup - FIXME 301 * this not set in iscsi_conn_setup - FIXME
285 */ 302 */
286 conn->max_recv_dlength = 128; 303 conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN;
287 304
288 iser_conn = conn->dd_data; 305 iser_conn = conn->dd_data;
289 conn->dd_data = iser_conn; 306 conn->dd_data = iser_conn;
@@ -401,7 +418,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
401 struct Scsi_Host *shost; 418 struct Scsi_Host *shost;
402 struct iser_conn *ib_conn; 419 struct iser_conn *ib_conn;
403 420
404 shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 1); 421 shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
405 if (!shost) 422 if (!shost)
406 return NULL; 423 return NULL;
407 shost->transportt = iscsi_iser_scsi_transport; 424 shost->transportt = iscsi_iser_scsi_transport;
@@ -675,7 +692,7 @@ static int __init iser_init(void)
675 memset(&ig, 0, sizeof(struct iser_global)); 692 memset(&ig, 0, sizeof(struct iser_global));
676 693
677 ig.desc_cache = kmem_cache_create("iser_descriptors", 694 ig.desc_cache = kmem_cache_create("iser_descriptors",
678 sizeof (struct iser_desc), 695 sizeof(struct iser_tx_desc),
679 0, SLAB_HWCACHE_ALIGN, 696 0, SLAB_HWCACHE_ALIGN,
680 NULL); 697 NULL);
681 if (ig.desc_cache == NULL) 698 if (ig.desc_cache == NULL)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9d529cae1f0d..036934cdcb92 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -102,9 +102,9 @@
102#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * 102#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
103 * SCSI_TMFUNC(2), LOGOUT(1) */ 103 * SCSI_TMFUNC(2), LOGOUT(1) */
104 104
105#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \ 105#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
106 ISER_MAX_RX_MISC_PDUS + \ 106
107 ISER_MAX_TX_MISC_PDUS) 107#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
108 108
109/* the max TX (send) WR supported by the iSER QP is defined by * 109/* the max TX (send) WR supported by the iSER QP is defined by *
110 * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * 110 * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
@@ -132,6 +132,12 @@ struct iser_hdr {
132 __be64 read_va; 132 __be64 read_va;
133} __attribute__((packed)); 133} __attribute__((packed));
134 134
135/* Constant PDU lengths calculations */
136#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
137
138#define ISER_RECV_DATA_SEG_LEN 128
139#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
140#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
135 141
136/* Length of an object name string */ 142/* Length of an object name string */
137#define ISER_OBJECT_NAME_SIZE 64 143#define ISER_OBJECT_NAME_SIZE 64
@@ -187,51 +193,43 @@ struct iser_regd_buf {
187 struct iser_mem_reg reg; /* memory registration info */ 193 struct iser_mem_reg reg; /* memory registration info */
188 void *virt_addr; 194 void *virt_addr;
189 struct iser_device *device; /* device->device for dma_unmap */ 195 struct iser_device *device; /* device->device for dma_unmap */
190 u64 dma_addr; /* if non zero, addr for dma_unmap */
191 enum dma_data_direction direction; /* direction for dma_unmap */ 196 enum dma_data_direction direction; /* direction for dma_unmap */
192 unsigned int data_size; 197 unsigned int data_size;
193 atomic_t ref_count; /* refcount, freed when dec to 0 */
194};
195
196#define MAX_REGD_BUF_VECTOR_LEN 2
197
198struct iser_dto {
199 struct iscsi_iser_task *task;
200 struct iser_conn *ib_conn;
201 int notify_enable;
202
203 /* vector of registered buffers */
204 unsigned int regd_vector_len;
205 struct iser_regd_buf *regd[MAX_REGD_BUF_VECTOR_LEN];
206
207 /* offset into the registered buffer may be specified */
208 unsigned int offset[MAX_REGD_BUF_VECTOR_LEN];
209
210 /* a smaller size may be specified, if 0, then full size is used */
211 unsigned int used_sz[MAX_REGD_BUF_VECTOR_LEN];
212}; 198};
213 199
214enum iser_desc_type { 200enum iser_desc_type {
215 ISCSI_RX,
216 ISCSI_TX_CONTROL , 201 ISCSI_TX_CONTROL ,
217 ISCSI_TX_SCSI_COMMAND, 202 ISCSI_TX_SCSI_COMMAND,
218 ISCSI_TX_DATAOUT 203 ISCSI_TX_DATAOUT
219}; 204};
220 205
221struct iser_desc { 206struct iser_tx_desc {
222 struct iser_hdr iser_header; 207 struct iser_hdr iser_header;
223 struct iscsi_hdr iscsi_header; 208 struct iscsi_hdr iscsi_header;
224 struct iser_regd_buf hdr_regd_buf;
225 void *data; /* used by RX & TX_CONTROL */
226 struct iser_regd_buf data_regd_buf; /* used by RX & TX_CONTROL */
227 enum iser_desc_type type; 209 enum iser_desc_type type;
228 struct iser_dto dto; 210 u64 dma_addr;
211 /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either
212 of immediate data, unsolicited data-out or control (login,text) */
213 struct ib_sge tx_sg[2];
214 int num_sge;
229}; 215};
230 216
217#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
218 sizeof(u64) + sizeof(struct ib_sge)))
219struct iser_rx_desc {
220 struct iser_hdr iser_header;
221 struct iscsi_hdr iscsi_header;
222 char data[ISER_RECV_DATA_SEG_LEN];
223 u64 dma_addr;
224 struct ib_sge rx_sg;
225 char pad[ISER_RX_PAD_SIZE];
226} __attribute__((packed));
227
231struct iser_device { 228struct iser_device {
232 struct ib_device *ib_device; 229 struct ib_device *ib_device;
233 struct ib_pd *pd; 230 struct ib_pd *pd;
234 struct ib_cq *cq; 231 struct ib_cq *rx_cq;
232 struct ib_cq *tx_cq;
235 struct ib_mr *mr; 233 struct ib_mr *mr;
236 struct tasklet_struct cq_tasklet; 234 struct tasklet_struct cq_tasklet;
237 struct list_head ig_list; /* entry in ig devices list */ 235 struct list_head ig_list; /* entry in ig devices list */
@@ -250,15 +248,18 @@ struct iser_conn {
250 struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ 248 struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */
251 int disc_evt_flag; /* disconn event delivered */ 249 int disc_evt_flag; /* disconn event delivered */
252 wait_queue_head_t wait; /* waitq for conn/disconn */ 250 wait_queue_head_t wait; /* waitq for conn/disconn */
253 atomic_t post_recv_buf_count; /* posted rx count */ 251 int post_recv_buf_count; /* posted rx count */
254 atomic_t post_send_buf_count; /* posted tx count */ 252 atomic_t post_send_buf_count; /* posted tx count */
255 atomic_t unexpected_pdu_count;/* count of received *
256 * unexpected pdus *
257 * not yet retired */
258 char name[ISER_OBJECT_NAME_SIZE]; 253 char name[ISER_OBJECT_NAME_SIZE];
259 struct iser_page_vec *page_vec; /* represents SG to fmr maps* 254 struct iser_page_vec *page_vec; /* represents SG to fmr maps*
260 * maps serialized as tx is*/ 255 * maps serialized as tx is*/
261 struct list_head conn_list; /* entry in ig conn list */ 256 struct list_head conn_list; /* entry in ig conn list */
257
258 char *login_buf;
259 u64 login_dma;
260 unsigned int rx_desc_head;
261 struct iser_rx_desc *rx_descs;
262 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
262}; 263};
263 264
264struct iscsi_iser_conn { 265struct iscsi_iser_conn {
@@ -267,7 +268,7 @@ struct iscsi_iser_conn {
267}; 268};
268 269
269struct iscsi_iser_task { 270struct iscsi_iser_task {
270 struct iser_desc desc; 271 struct iser_tx_desc desc;
271 struct iscsi_iser_conn *iser_conn; 272 struct iscsi_iser_conn *iser_conn;
272 enum iser_task_status status; 273 enum iser_task_status status;
273 int command_sent; /* set if command sent */ 274 int command_sent; /* set if command sent */
@@ -275,6 +276,7 @@ struct iscsi_iser_task {
275 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ 276 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
276 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ 277 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
277 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ 278 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
279 int headers_initialized;
278}; 280};
279 281
280struct iser_page_vec { 282struct iser_page_vec {
@@ -322,22 +324,17 @@ void iser_conn_put(struct iser_conn *ib_conn);
322 324
323void iser_conn_terminate(struct iser_conn *ib_conn); 325void iser_conn_terminate(struct iser_conn *ib_conn);
324 326
325void iser_rcv_completion(struct iser_desc *desc, 327void iser_rcv_completion(struct iser_rx_desc *desc,
326 unsigned long dto_xfer_len); 328 unsigned long dto_xfer_len,
329 struct iser_conn *ib_conn);
327 330
328void iser_snd_completion(struct iser_desc *desc); 331void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn);
329 332
330void iser_task_rdma_init(struct iscsi_iser_task *task); 333void iser_task_rdma_init(struct iscsi_iser_task *task);
331 334
332void iser_task_rdma_finalize(struct iscsi_iser_task *task); 335void iser_task_rdma_finalize(struct iscsi_iser_task *task);
333 336
334void iser_dto_buffs_release(struct iser_dto *dto); 337void iser_free_rx_descriptors(struct iser_conn *ib_conn);
335
336int iser_regd_buff_release(struct iser_regd_buf *regd_buf);
337
338void iser_reg_single(struct iser_device *device,
339 struct iser_regd_buf *regd_buf,
340 enum dma_data_direction direction);
341 338
342void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, 339void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
343 enum iser_data_dir cmd_dir); 340 enum iser_data_dir cmd_dir);
@@ -356,11 +353,9 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
356 353
357void iser_unreg_mem(struct iser_mem_reg *mem_reg); 354void iser_unreg_mem(struct iser_mem_reg *mem_reg);
358 355
359int iser_post_recv(struct iser_desc *rx_desc); 356int iser_post_recvl(struct iser_conn *ib_conn);
360int iser_post_send(struct iser_desc *tx_desc); 357int iser_post_recvm(struct iser_conn *ib_conn, int count);
361 358int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc);
362int iser_conn_state_comp(struct iser_conn *ib_conn,
363 enum iser_ib_conn_state comp);
364 359
365int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, 360int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
366 struct iser_data_buf *data, 361 struct iser_data_buf *data,
@@ -368,4 +363,6 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
368 enum dma_data_direction dma_dir); 363 enum dma_data_direction dma_dir);
369 364
370void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); 365void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
366int iser_initialize_task_headers(struct iscsi_task *task,
367 struct iser_tx_desc *tx_desc);
371#endif 368#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 9de640200ad3..0b9ef0716588 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -39,29 +39,6 @@
39 39
40#include "iscsi_iser.h" 40#include "iscsi_iser.h"
41 41
42/* Constant PDU lengths calculations */
43#define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \
44 sizeof (struct iscsi_hdr))
45
46/* iser_dto_add_regd_buff - increments the reference count for *
47 * the registered buffer & adds it to the DTO object */
48static void iser_dto_add_regd_buff(struct iser_dto *dto,
49 struct iser_regd_buf *regd_buf,
50 unsigned long use_offset,
51 unsigned long use_size)
52{
53 int add_idx;
54
55 atomic_inc(&regd_buf->ref_count);
56
57 add_idx = dto->regd_vector_len;
58 dto->regd[add_idx] = regd_buf;
59 dto->used_sz[add_idx] = use_size;
60 dto->offset[add_idx] = use_offset;
61
62 dto->regd_vector_len++;
63}
64
65/* Register user buffer memory and initialize passive rdma 42/* Register user buffer memory and initialize passive rdma
66 * dto descriptor. Total data size is stored in 43 * dto descriptor. Total data size is stored in
67 * iser_task->data[ISER_DIR_IN].data_len 44 * iser_task->data[ISER_DIR_IN].data_len
@@ -122,9 +99,9 @@ iser_prepare_write_cmd(struct iscsi_task *task,
122 struct iscsi_iser_task *iser_task = task->dd_data; 99 struct iscsi_iser_task *iser_task = task->dd_data;
123 struct iser_regd_buf *regd_buf; 100 struct iser_regd_buf *regd_buf;
124 int err; 101 int err;
125 struct iser_dto *send_dto = &iser_task->desc.dto;
126 struct iser_hdr *hdr = &iser_task->desc.iser_header; 102 struct iser_hdr *hdr = &iser_task->desc.iser_header;
127 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; 103 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
104 struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
128 105
129 err = iser_dma_map_task_data(iser_task, 106 err = iser_dma_map_task_data(iser_task,
130 buf_out, 107 buf_out,
@@ -163,135 +140,100 @@ iser_prepare_write_cmd(struct iscsi_task *task,
163 if (imm_sz > 0) { 140 if (imm_sz > 0) {
164 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", 141 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
165 task->itt, imm_sz); 142 task->itt, imm_sz);
166 iser_dto_add_regd_buff(send_dto, 143 tx_dsg->addr = regd_buf->reg.va;
167 regd_buf, 144 tx_dsg->length = imm_sz;
168 0, 145 tx_dsg->lkey = regd_buf->reg.lkey;
169 imm_sz); 146 iser_task->desc.num_sge = 2;
170 } 147 }
171 148
172 return 0; 149 return 0;
173} 150}
174 151
175/** 152/* creates a new tx descriptor and adds header regd buffer */
176 * iser_post_receive_control - allocates, initializes and posts receive DTO. 153static void iser_create_send_desc(struct iser_conn *ib_conn,
177 */ 154 struct iser_tx_desc *tx_desc)
178static int iser_post_receive_control(struct iscsi_conn *conn)
179{ 155{
180 struct iscsi_iser_conn *iser_conn = conn->dd_data; 156 struct iser_device *device = ib_conn->device;
181 struct iser_desc *rx_desc;
182 struct iser_regd_buf *regd_hdr;
183 struct iser_regd_buf *regd_data;
184 struct iser_dto *recv_dto = NULL;
185 struct iser_device *device = iser_conn->ib_conn->device;
186 int rx_data_size, err;
187 int posts, outstanding_unexp_pdus;
188
189 /* for the login sequence we must support rx of upto 8K; login is done
190 * after conn create/bind (connect) and conn stop/bind (reconnect),
191 * what's common for both schemes is that the connection is not started
192 */
193 if (conn->c_stage != ISCSI_CONN_STARTED)
194 rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN;
195 else /* FIXME till user space sets conn->max_recv_dlength correctly */
196 rx_data_size = 128;
197
198 outstanding_unexp_pdus =
199 atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0);
200
201 /*
202 * in addition to the response buffer, replace those consumed by
203 * unexpected pdus.
204 */
205 for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) {
206 rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
207 if (rx_desc == NULL) {
208 iser_err("Failed to alloc desc for post recv %d\n",
209 posts);
210 err = -ENOMEM;
211 goto post_rx_cache_alloc_failure;
212 }
213 rx_desc->type = ISCSI_RX;
214 rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
215 if (rx_desc->data == NULL) {
216 iser_err("Failed to alloc data buf for post recv %d\n",
217 posts);
218 err = -ENOMEM;
219 goto post_rx_kmalloc_failure;
220 }
221
222 recv_dto = &rx_desc->dto;
223 recv_dto->ib_conn = iser_conn->ib_conn;
224 recv_dto->regd_vector_len = 0;
225 157
226 regd_hdr = &rx_desc->hdr_regd_buf; 158 ib_dma_sync_single_for_cpu(device->ib_device,
227 memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 159 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
228 regd_hdr->device = device;
229 regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
230 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
231 160
232 iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); 161 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
233 162 tx_desc->iser_header.flags = ISER_VER;
234 iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
235 163
236 regd_data = &rx_desc->data_regd_buf; 164 tx_desc->num_sge = 1;
237 memset(regd_data, 0, sizeof(struct iser_regd_buf));
238 regd_data->device = device;
239 regd_data->virt_addr = rx_desc->data;
240 regd_data->data_size = rx_data_size;
241 165
242 iser_reg_single(device, regd_data, DMA_FROM_DEVICE); 166 if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
167 tx_desc->tx_sg[0].lkey = device->mr->lkey;
168 iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc);
169 }
170}
243 171
244 iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
245 172
246 err = iser_post_recv(rx_desc); 173int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
247 if (err) { 174{
248 iser_err("Failed iser_post_recv for post %d\n", posts); 175 int i, j;
249 goto post_rx_post_recv_failure; 176 u64 dma_addr;
250 } 177 struct iser_rx_desc *rx_desc;
178 struct ib_sge *rx_sg;
179 struct iser_device *device = ib_conn->device;
180
181 ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
182 sizeof(struct iser_rx_desc), GFP_KERNEL);
183 if (!ib_conn->rx_descs)
184 goto rx_desc_alloc_fail;
185
186 rx_desc = ib_conn->rx_descs;
187
188 for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) {
189 dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
190 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
191 if (ib_dma_mapping_error(device->ib_device, dma_addr))
192 goto rx_desc_dma_map_failed;
193
194 rx_desc->dma_addr = dma_addr;
195
196 rx_sg = &rx_desc->rx_sg;
197 rx_sg->addr = rx_desc->dma_addr;
198 rx_sg->length = ISER_RX_PAYLOAD_SIZE;
199 rx_sg->lkey = device->mr->lkey;
251 } 200 }
252 /* all posts successful */
253 return 0;
254 201
255post_rx_post_recv_failure: 202 ib_conn->rx_desc_head = 0;
256 iser_dto_buffs_release(recv_dto); 203 return 0;
257 kfree(rx_desc->data);
258post_rx_kmalloc_failure:
259 kmem_cache_free(ig.desc_cache, rx_desc);
260post_rx_cache_alloc_failure:
261 if (posts > 0) {
262 /*
263 * response buffer posted, but did not replace all unexpected
264 * pdu recv bufs. Ignore error, retry occurs next send
265 */
266 outstanding_unexp_pdus -= (posts - 1);
267 err = 0;
268 }
269 atomic_add(outstanding_unexp_pdus,
270 &iser_conn->ib_conn->unexpected_pdu_count);
271 204
272 return err; 205rx_desc_dma_map_failed:
206 rx_desc = ib_conn->rx_descs;
207 for (j = 0; j < i; j++, rx_desc++)
208 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
209 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
210 kfree(ib_conn->rx_descs);
211 ib_conn->rx_descs = NULL;
212rx_desc_alloc_fail:
213 iser_err("failed allocating rx descriptors / data buffers\n");
214 return -ENOMEM;
273} 215}
274 216
275/* creates a new tx descriptor and adds header regd buffer */ 217void iser_free_rx_descriptors(struct iser_conn *ib_conn)
276static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
277 struct iser_desc *tx_desc)
278{ 218{
279 struct iser_regd_buf *regd_hdr = &tx_desc->hdr_regd_buf; 219 int i;
280 struct iser_dto *send_dto = &tx_desc->dto; 220 struct iser_rx_desc *rx_desc;
221 struct iser_device *device = ib_conn->device;
281 222
282 memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 223 if (ib_conn->login_buf) {
283 regd_hdr->device = iser_conn->ib_conn->device; 224 ib_dma_unmap_single(device->ib_device, ib_conn->login_dma,
284 regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ 225 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
285 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; 226 kfree(ib_conn->login_buf);
227 }
286 228
287 send_dto->ib_conn = iser_conn->ib_conn; 229 if (!ib_conn->rx_descs)
288 send_dto->notify_enable = 1; 230 return;
289 send_dto->regd_vector_len = 0;
290 231
291 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); 232 rx_desc = ib_conn->rx_descs;
292 tx_desc->iser_header.flags = ISER_VER; 233 for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
293 234 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
294 iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0); 235 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
236 kfree(ib_conn->rx_descs);
295} 237}
296 238
297/** 239/**
@@ -301,46 +243,23 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
301{ 243{
302 struct iscsi_iser_conn *iser_conn = conn->dd_data; 244 struct iscsi_iser_conn *iser_conn = conn->dd_data;
303 245
304 int i; 246 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
305 /*
306 * FIXME this value should be declared to the target during login with
307 * the MaxOutstandingUnexpectedPDUs key when supported
308 */
309 int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
310
311 iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
312 247
313 /* Check that there is no posted recv or send buffers left - */ 248 /* Check that there is no posted recv or send buffers left - */
314 /* they must be consumed during the login phase */ 249 /* they must be consumed during the login phase */
315 BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0); 250 BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0);
316 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); 251 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
317 252
318 /* Initial post receive buffers */ 253 if (iser_alloc_rx_descriptors(iser_conn->ib_conn))
319 for (i = 0; i < initial_post_recv_bufs_num; i++) { 254 return -ENOMEM;
320 if (iser_post_receive_control(conn) != 0) {
321 iser_err("Failed to post recv bufs at:%d conn:0x%p\n",
322 i, conn);
323 return -ENOMEM;
324 }
325 }
326 iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn);
327 return 0;
328}
329 255
330static int 256 /* Initial post receive buffers */
331iser_check_xmit(struct iscsi_conn *conn, void *task) 257 if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
332{ 258 return -ENOMEM;
333 struct iscsi_iser_conn *iser_conn = conn->dd_data;
334 259
335 if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
336 ISER_QP_MAX_REQ_DTOS) {
337 iser_dbg("%ld can't xmit task %p\n",jiffies,task);
338 return -ENOBUFS;
339 }
340 return 0; 260 return 0;
341} 261}
342 262
343
344/** 263/**
345 * iser_send_command - send command PDU 264 * iser_send_command - send command PDU
346 */ 265 */
@@ -349,27 +268,18 @@ int iser_send_command(struct iscsi_conn *conn,
349{ 268{
350 struct iscsi_iser_conn *iser_conn = conn->dd_data; 269 struct iscsi_iser_conn *iser_conn = conn->dd_data;
351 struct iscsi_iser_task *iser_task = task->dd_data; 270 struct iscsi_iser_task *iser_task = task->dd_data;
352 struct iser_dto *send_dto = NULL;
353 unsigned long edtl; 271 unsigned long edtl;
354 int err = 0; 272 int err;
355 struct iser_data_buf *data_buf; 273 struct iser_data_buf *data_buf;
356 struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr; 274 struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr;
357 struct scsi_cmnd *sc = task->sc; 275 struct scsi_cmnd *sc = task->sc;
358 276 struct iser_tx_desc *tx_desc = &iser_task->desc;
359 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
360 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
361 return -EPERM;
362 }
363 if (iser_check_xmit(conn, task))
364 return -ENOBUFS;
365 277
366 edtl = ntohl(hdr->data_length); 278 edtl = ntohl(hdr->data_length);
367 279
368 /* build the tx desc regd header and add it to the tx desc dto */ 280 /* build the tx desc regd header and add it to the tx desc dto */
369 iser_task->desc.type = ISCSI_TX_SCSI_COMMAND; 281 tx_desc->type = ISCSI_TX_SCSI_COMMAND;
370 send_dto = &iser_task->desc.dto; 282 iser_create_send_desc(iser_conn->ib_conn, tx_desc);
371 send_dto->task = iser_task;
372 iser_create_send_desc(iser_conn, &iser_task->desc);
373 283
374 if (hdr->flags & ISCSI_FLAG_CMD_READ) 284 if (hdr->flags & ISCSI_FLAG_CMD_READ)
375 data_buf = &iser_task->data[ISER_DIR_IN]; 285 data_buf = &iser_task->data[ISER_DIR_IN];
@@ -398,23 +308,13 @@ int iser_send_command(struct iscsi_conn *conn,
398 goto send_command_error; 308 goto send_command_error;
399 } 309 }
400 310
401 iser_reg_single(iser_conn->ib_conn->device,
402 send_dto->regd[0], DMA_TO_DEVICE);
403
404 if (iser_post_receive_control(conn) != 0) {
405 iser_err("post_recv failed!\n");
406 err = -ENOMEM;
407 goto send_command_error;
408 }
409
410 iser_task->status = ISER_TASK_STATUS_STARTED; 311 iser_task->status = ISER_TASK_STATUS_STARTED;
411 312
412 err = iser_post_send(&iser_task->desc); 313 err = iser_post_send(iser_conn->ib_conn, tx_desc);
413 if (!err) 314 if (!err)
414 return 0; 315 return 0;
415 316
416send_command_error: 317send_command_error:
417 iser_dto_buffs_release(send_dto);
418 iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err); 318 iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err);
419 return err; 319 return err;
420} 320}
@@ -428,20 +328,13 @@ int iser_send_data_out(struct iscsi_conn *conn,
428{ 328{
429 struct iscsi_iser_conn *iser_conn = conn->dd_data; 329 struct iscsi_iser_conn *iser_conn = conn->dd_data;
430 struct iscsi_iser_task *iser_task = task->dd_data; 330 struct iscsi_iser_task *iser_task = task->dd_data;
431 struct iser_desc *tx_desc = NULL; 331 struct iser_tx_desc *tx_desc = NULL;
432 struct iser_dto *send_dto = NULL; 332 struct iser_regd_buf *regd_buf;
433 unsigned long buf_offset; 333 unsigned long buf_offset;
434 unsigned long data_seg_len; 334 unsigned long data_seg_len;
435 uint32_t itt; 335 uint32_t itt;
436 int err = 0; 336 int err = 0;
437 337 struct ib_sge *tx_dsg;
438 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
439 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
440 return -EPERM;
441 }
442
443 if (iser_check_xmit(conn, task))
444 return -ENOBUFS;
445 338
446 itt = (__force uint32_t)hdr->itt; 339 itt = (__force uint32_t)hdr->itt;
447 data_seg_len = ntoh24(hdr->dlength); 340 data_seg_len = ntoh24(hdr->dlength);
@@ -450,28 +343,25 @@ int iser_send_data_out(struct iscsi_conn *conn,
450 iser_dbg("%s itt %d dseg_len %d offset %d\n", 343 iser_dbg("%s itt %d dseg_len %d offset %d\n",
451 __func__,(int)itt,(int)data_seg_len,(int)buf_offset); 344 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);
452 345
453 tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); 346 tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC);
454 if (tx_desc == NULL) { 347 if (tx_desc == NULL) {
455 iser_err("Failed to alloc desc for post dataout\n"); 348 iser_err("Failed to alloc desc for post dataout\n");
456 return -ENOMEM; 349 return -ENOMEM;
457 } 350 }
458 351
459 tx_desc->type = ISCSI_TX_DATAOUT; 352 tx_desc->type = ISCSI_TX_DATAOUT;
353 tx_desc->iser_header.flags = ISER_VER;
460 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); 354 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
461 355
462 /* build the tx desc regd header and add it to the tx desc dto */ 356 /* build the tx desc */
463 send_dto = &tx_desc->dto; 357 iser_initialize_task_headers(task, tx_desc);
464 send_dto->task = iser_task;
465 iser_create_send_desc(iser_conn, tx_desc);
466
467 iser_reg_single(iser_conn->ib_conn->device,
468 send_dto->regd[0], DMA_TO_DEVICE);
469 358
470 /* all data was registered for RDMA, we can use the lkey */ 359 regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
471 iser_dto_add_regd_buff(send_dto, 360 tx_dsg = &tx_desc->tx_sg[1];
472 &iser_task->rdma_regd[ISER_DIR_OUT], 361 tx_dsg->addr = regd_buf->reg.va + buf_offset;
473 buf_offset, 362 tx_dsg->length = data_seg_len;
474 data_seg_len); 363 tx_dsg->lkey = regd_buf->reg.lkey;
364 tx_desc->num_sge = 2;
475 365
476 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { 366 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
477 iser_err("Offset:%ld & DSL:%ld in Data-Out " 367 iser_err("Offset:%ld & DSL:%ld in Data-Out "
@@ -485,12 +375,11 @@ int iser_send_data_out(struct iscsi_conn *conn,
485 itt, buf_offset, data_seg_len); 375 itt, buf_offset, data_seg_len);
486 376
487 377
488 err = iser_post_send(tx_desc); 378 err = iser_post_send(iser_conn->ib_conn, tx_desc);
489 if (!err) 379 if (!err)
490 return 0; 380 return 0;
491 381
492send_data_out_error: 382send_data_out_error:
493 iser_dto_buffs_release(send_dto);
494 kmem_cache_free(ig.desc_cache, tx_desc); 383 kmem_cache_free(ig.desc_cache, tx_desc);
495 iser_err("conn %p failed err %d\n",conn, err); 384 iser_err("conn %p failed err %d\n",conn, err);
496 return err; 385 return err;
@@ -501,64 +390,44 @@ int iser_send_control(struct iscsi_conn *conn,
501{ 390{
502 struct iscsi_iser_conn *iser_conn = conn->dd_data; 391 struct iscsi_iser_conn *iser_conn = conn->dd_data;
503 struct iscsi_iser_task *iser_task = task->dd_data; 392 struct iscsi_iser_task *iser_task = task->dd_data;
504 struct iser_desc *mdesc = &iser_task->desc; 393 struct iser_tx_desc *mdesc = &iser_task->desc;
505 struct iser_dto *send_dto = NULL;
506 unsigned long data_seg_len; 394 unsigned long data_seg_len;
507 int err = 0; 395 int err = 0;
508 struct iser_regd_buf *regd_buf;
509 struct iser_device *device; 396 struct iser_device *device;
510 unsigned char opcode;
511
512 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
513 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
514 return -EPERM;
515 }
516
517 if (iser_check_xmit(conn, task))
518 return -ENOBUFS;
519 397
520 /* build the tx desc regd header and add it to the tx desc dto */ 398 /* build the tx desc regd header and add it to the tx desc dto */
521 mdesc->type = ISCSI_TX_CONTROL; 399 mdesc->type = ISCSI_TX_CONTROL;
522 send_dto = &mdesc->dto; 400 iser_create_send_desc(iser_conn->ib_conn, mdesc);
523 send_dto->task = NULL;
524 iser_create_send_desc(iser_conn, mdesc);
525 401
526 device = iser_conn->ib_conn->device; 402 device = iser_conn->ib_conn->device;
527 403
528 iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE);
529
530 data_seg_len = ntoh24(task->hdr->dlength); 404 data_seg_len = ntoh24(task->hdr->dlength);
531 405
532 if (data_seg_len > 0) { 406 if (data_seg_len > 0) {
533 regd_buf = &mdesc->data_regd_buf; 407 struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
534 memset(regd_buf, 0, sizeof(struct iser_regd_buf)); 408 if (task != conn->login_task) {
535 regd_buf->device = device; 409 iser_err("data present on non login task!!!\n");
536 regd_buf->virt_addr = task->data; 410 goto send_control_error;
537 regd_buf->data_size = task->data_count; 411 }
538 iser_reg_single(device, regd_buf, 412 memcpy(iser_conn->ib_conn->login_buf, task->data,
539 DMA_TO_DEVICE); 413 task->data_count);
540 iser_dto_add_regd_buff(send_dto, regd_buf, 414 tx_dsg->addr = iser_conn->ib_conn->login_dma;
541 0, 415 tx_dsg->length = data_seg_len;
542 data_seg_len); 416 tx_dsg->lkey = device->mr->lkey;
417 mdesc->num_sge = 2;
543 } 418 }
544 419
545 opcode = task->hdr->opcode & ISCSI_OPCODE_MASK; 420 if (task == conn->login_task) {
546 421 err = iser_post_recvl(iser_conn->ib_conn);
547 /* post recv buffer for response if one is expected */ 422 if (err)
548 if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) {
549 if (iser_post_receive_control(conn) != 0) {
550 iser_err("post_rcv_buff failed!\n");
551 err = -ENOMEM;
552 goto send_control_error; 423 goto send_control_error;
553 }
554 } 424 }
555 425
556 err = iser_post_send(mdesc); 426 err = iser_post_send(iser_conn->ib_conn, mdesc);
557 if (!err) 427 if (!err)
558 return 0; 428 return 0;
559 429
560send_control_error: 430send_control_error:
561 iser_dto_buffs_release(send_dto);
562 iser_err("conn %p failed err %d\n",conn, err); 431 iser_err("conn %p failed err %d\n",conn, err);
563 return err; 432 return err;
564} 433}
@@ -566,104 +435,71 @@ send_control_error:
566/** 435/**
567 * iser_rcv_dto_completion - recv DTO completion 436 * iser_rcv_dto_completion - recv DTO completion
568 */ 437 */
569void iser_rcv_completion(struct iser_desc *rx_desc, 438void iser_rcv_completion(struct iser_rx_desc *rx_desc,
570 unsigned long dto_xfer_len) 439 unsigned long rx_xfer_len,
440 struct iser_conn *ib_conn)
571{ 441{
572 struct iser_dto *dto = &rx_desc->dto; 442 struct iscsi_iser_conn *conn = ib_conn->iser_conn;
573 struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn;
574 struct iscsi_task *task;
575 struct iscsi_iser_task *iser_task;
576 struct iscsi_hdr *hdr; 443 struct iscsi_hdr *hdr;
577 char *rx_data = NULL; 444 u64 rx_dma;
578 int rx_data_len = 0; 445 int rx_buflen, outstanding, count, err;
579 unsigned char opcode; 446
580 447 /* differentiate between login to all other PDUs */
581 hdr = &rx_desc->iscsi_header; 448 if ((char *)rx_desc == ib_conn->login_buf) {
449 rx_dma = ib_conn->login_dma;
450 rx_buflen = ISER_RX_LOGIN_SIZE;
451 } else {
452 rx_dma = rx_desc->dma_addr;
453 rx_buflen = ISER_RX_PAYLOAD_SIZE;
454 }
582 455
583 iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt); 456 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
457 rx_buflen, DMA_FROM_DEVICE);
584 458
585 if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */ 459 hdr = &rx_desc->iscsi_header;
586 rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN;
587 rx_data = dto->regd[1]->virt_addr;
588 rx_data += dto->offset[1];
589 }
590 460
591 opcode = hdr->opcode & ISCSI_OPCODE_MASK; 461 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
592 462 hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
593 if (opcode == ISCSI_OP_SCSI_CMD_RSP) {
594 spin_lock(&conn->iscsi_conn->session->lock);
595 task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt);
596 if (task)
597 __iscsi_get_task(task);
598 spin_unlock(&conn->iscsi_conn->session->lock);
599
600 if (!task)
601 iser_err("itt can't be matched to task!!! "
602 "conn %p opcode %d itt %d\n",
603 conn->iscsi_conn, opcode, hdr->itt);
604 else {
605 iser_task = task->dd_data;
606 iser_dbg("itt %d task %p\n",hdr->itt, task);
607 iser_task->status = ISER_TASK_STATUS_COMPLETED;
608 iser_task_rdma_finalize(iser_task);
609 iscsi_put_task(task);
610 }
611 }
612 iser_dto_buffs_release(dto);
613 463
614 iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); 464 iscsi_iser_recv(conn->iscsi_conn, hdr,
465 rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN);
615 466
616 kfree(rx_desc->data); 467 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
617 kmem_cache_free(ig.desc_cache, rx_desc); 468 rx_buflen, DMA_FROM_DEVICE);
618 469
619 /* decrementing conn->post_recv_buf_count only --after-- freeing the * 470 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
620 * task eliminates the need to worry on tasks which are completed in * 471 * task eliminates the need to worry on tasks which are completed in *
621 * parallel to the execution of iser_conn_term. So the code that waits * 472 * parallel to the execution of iser_conn_term. So the code that waits *
622 * for the posted rx bufs refcount to become zero handles everything */ 473 * for the posted rx bufs refcount to become zero handles everything */
623 atomic_dec(&conn->ib_conn->post_recv_buf_count); 474 conn->ib_conn->post_recv_buf_count--;
624 475
625 /* 476 if (rx_dma == ib_conn->login_dma)
626 * if an unexpected PDU was received then the recv wr consumed must 477 return;
627 * be replaced, this is done in the next send of a control-type PDU 478
628 */ 479 outstanding = ib_conn->post_recv_buf_count;
629 if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) { 480 if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
630 /* nop-in with itt = 0xffffffff */ 481 count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
631 atomic_inc(&conn->ib_conn->unexpected_pdu_count); 482 ISER_MIN_POSTED_RX);
632 } 483 err = iser_post_recvm(ib_conn, count);
633 else if (opcode == ISCSI_OP_ASYNC_EVENT) { 484 if (err)
634 /* asyncronous message */ 485 iser_err("posting %d rx bufs err %d\n", count, err);
635 atomic_inc(&conn->ib_conn->unexpected_pdu_count);
636 } 486 }
637 /* a reject PDU consumes the recv buf posted for the response */
638} 487}
639 488
640void iser_snd_completion(struct iser_desc *tx_desc) 489void iser_snd_completion(struct iser_tx_desc *tx_desc,
490 struct iser_conn *ib_conn)
641{ 491{
642 struct iser_dto *dto = &tx_desc->dto;
643 struct iser_conn *ib_conn = dto->ib_conn;
644 struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn;
645 struct iscsi_conn *conn = iser_conn->iscsi_conn;
646 struct iscsi_task *task; 492 struct iscsi_task *task;
647 int resume_tx = 0; 493 struct iser_device *device = ib_conn->device;
648
649 iser_dbg("Initiator, Data sent dto=0x%p\n", dto);
650
651 iser_dto_buffs_release(dto);
652 494
653 if (tx_desc->type == ISCSI_TX_DATAOUT) 495 if (tx_desc->type == ISCSI_TX_DATAOUT) {
496 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
497 ISER_HEADERS_LEN, DMA_TO_DEVICE);
654 kmem_cache_free(ig.desc_cache, tx_desc); 498 kmem_cache_free(ig.desc_cache, tx_desc);
655 499 }
656 if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
657 ISER_QP_MAX_REQ_DTOS)
658 resume_tx = 1;
659 500
660 atomic_dec(&ib_conn->post_send_buf_count); 501 atomic_dec(&ib_conn->post_send_buf_count);
661 502
662 if (resume_tx) {
663 iser_dbg("%ld resuming tx\n",jiffies);
664 iscsi_conn_queue_work(conn);
665 }
666
667 if (tx_desc->type == ISCSI_TX_CONTROL) { 503 if (tx_desc->type == ISCSI_TX_CONTROL) {
668 /* this arithmetic is legal by libiscsi dd_data allocation */ 504 /* this arithmetic is legal by libiscsi dd_data allocation */
669 task = (void *) ((long)(void *)tx_desc - 505 task = (void *) ((long)(void *)tx_desc -
@@ -692,7 +528,6 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
692 528
693void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) 529void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
694{ 530{
695 int deferred;
696 int is_rdma_aligned = 1; 531 int is_rdma_aligned = 1;
697 struct iser_regd_buf *regd; 532 struct iser_regd_buf *regd;
698 533
@@ -710,32 +545,17 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
710 545
711 if (iser_task->dir[ISER_DIR_IN]) { 546 if (iser_task->dir[ISER_DIR_IN]) {
712 regd = &iser_task->rdma_regd[ISER_DIR_IN]; 547 regd = &iser_task->rdma_regd[ISER_DIR_IN];
713 deferred = iser_regd_buff_release(regd); 548 if (regd->reg.is_fmr)
714 if (deferred) { 549 iser_unreg_mem(&regd->reg);
715 iser_err("%d references remain for BUF-IN rdma reg\n",
716 atomic_read(&regd->ref_count));
717 }
718 } 550 }
719 551
720 if (iser_task->dir[ISER_DIR_OUT]) { 552 if (iser_task->dir[ISER_DIR_OUT]) {
721 regd = &iser_task->rdma_regd[ISER_DIR_OUT]; 553 regd = &iser_task->rdma_regd[ISER_DIR_OUT];
722 deferred = iser_regd_buff_release(regd); 554 if (regd->reg.is_fmr)
723 if (deferred) { 555 iser_unreg_mem(&regd->reg);
724 iser_err("%d references remain for BUF-OUT rdma reg\n",
725 atomic_read(&regd->ref_count));
726 }
727 } 556 }
728 557
729 /* if the data was unaligned, it was already unmapped and then copied */ 558 /* if the data was unaligned, it was already unmapped and then copied */
730 if (is_rdma_aligned) 559 if (is_rdma_aligned)
731 iser_dma_unmap_task_data(iser_task); 560 iser_dma_unmap_task_data(iser_task);
732} 561}
733
734void iser_dto_buffs_release(struct iser_dto *dto)
735{
736 int i;
737
738 for (i = 0; i < dto->regd_vector_len; i++)
739 iser_regd_buff_release(dto->regd[i]);
740}
741
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 274c883ef3ea..fb88d6896b67 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -41,62 +41,6 @@
41#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ 41#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
42 42
43/** 43/**
44 * Decrements the reference count for the
45 * registered buffer & releases it
46 *
47 * returns 0 if released, 1 if deferred
48 */
49int iser_regd_buff_release(struct iser_regd_buf *regd_buf)
50{
51 struct ib_device *dev;
52
53 if ((atomic_read(&regd_buf->ref_count) == 0) ||
54 atomic_dec_and_test(&regd_buf->ref_count)) {
55 /* if we used the dma mr, unreg is just NOP */
56 if (regd_buf->reg.is_fmr)
57 iser_unreg_mem(&regd_buf->reg);
58
59 if (regd_buf->dma_addr) {
60 dev = regd_buf->device->ib_device;
61 ib_dma_unmap_single(dev,
62 regd_buf->dma_addr,
63 regd_buf->data_size,
64 regd_buf->direction);
65 }
66 /* else this regd buf is associated with task which we */
67 /* dma_unmap_single/sg later */
68 return 0;
69 } else {
70 iser_dbg("Release deferred, regd.buff: 0x%p\n", regd_buf);
71 return 1;
72 }
73}
74
75/**
76 * iser_reg_single - fills registered buffer descriptor with
77 * registration information
78 */
79void iser_reg_single(struct iser_device *device,
80 struct iser_regd_buf *regd_buf,
81 enum dma_data_direction direction)
82{
83 u64 dma_addr;
84
85 dma_addr = ib_dma_map_single(device->ib_device,
86 regd_buf->virt_addr,
87 regd_buf->data_size, direction);
88 BUG_ON(ib_dma_mapping_error(device->ib_device, dma_addr));
89
90 regd_buf->reg.lkey = device->mr->lkey;
91 regd_buf->reg.len = regd_buf->data_size;
92 regd_buf->reg.va = dma_addr;
93 regd_buf->reg.is_fmr = 0;
94
95 regd_buf->dma_addr = dma_addr;
96 regd_buf->direction = direction;
97}
98
99/**
100 * iser_start_rdma_unaligned_sg 44 * iser_start_rdma_unaligned_sg
101 */ 45 */
102static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, 46static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
@@ -109,10 +53,10 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
109 unsigned long cmd_data_len = data->data_len; 53 unsigned long cmd_data_len = data->data_len;
110 54
111 if (cmd_data_len > ISER_KMALLOC_THRESHOLD) 55 if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
112 mem = (void *)__get_free_pages(GFP_NOIO, 56 mem = (void *)__get_free_pages(GFP_ATOMIC,
113 ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); 57 ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
114 else 58 else
115 mem = kmalloc(cmd_data_len, GFP_NOIO); 59 mem = kmalloc(cmd_data_len, GFP_ATOMIC);
116 60
117 if (mem == NULL) { 61 if (mem == NULL) {
118 iser_err("Failed to allocate mem size %d %d for copying sglist\n", 62 iser_err("Failed to allocate mem size %d %d for copying sglist\n",
@@ -474,9 +418,5 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
474 return err; 418 return err;
475 } 419 }
476 } 420 }
477
478 /* take a reference on this regd buf such that it will not be released *
479 * (eg in send dto completion) before we get the scsi response */
480 atomic_inc(&regd_buf->ref_count);
481 return 0; 421 return 0;
482} 422}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 8579f32ce38e..308d17bb5146 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -37,9 +37,8 @@
37#include "iscsi_iser.h" 37#include "iscsi_iser.h"
38 38
39#define ISCSI_ISER_MAX_CONN 8 39#define ISCSI_ISER_MAX_CONN 8
40#define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ 40#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
41 ISER_QP_MAX_REQ_DTOS) * \ 41#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
42 ISCSI_ISER_MAX_CONN)
43 42
44static void iser_cq_tasklet_fn(unsigned long data); 43static void iser_cq_tasklet_fn(unsigned long data);
45static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 44static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
@@ -67,15 +66,23 @@ static int iser_create_device_ib_res(struct iser_device *device)
67 if (IS_ERR(device->pd)) 66 if (IS_ERR(device->pd))
68 goto pd_err; 67 goto pd_err;
69 68
70 device->cq = ib_create_cq(device->ib_device, 69 device->rx_cq = ib_create_cq(device->ib_device,
71 iser_cq_callback, 70 iser_cq_callback,
72 iser_cq_event_callback, 71 iser_cq_event_callback,
73 (void *)device, 72 (void *)device,
74 ISER_MAX_CQ_LEN, 0); 73 ISER_MAX_RX_CQ_LEN, 0);
75 if (IS_ERR(device->cq)) 74 if (IS_ERR(device->rx_cq))
76 goto cq_err; 75 goto rx_cq_err;
77 76
78 if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP)) 77 device->tx_cq = ib_create_cq(device->ib_device,
78 NULL, iser_cq_event_callback,
79 (void *)device,
80 ISER_MAX_TX_CQ_LEN, 0);
81
82 if (IS_ERR(device->tx_cq))
83 goto tx_cq_err;
84
85 if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
79 goto cq_arm_err; 86 goto cq_arm_err;
80 87
81 tasklet_init(&device->cq_tasklet, 88 tasklet_init(&device->cq_tasklet,
@@ -93,8 +100,10 @@ static int iser_create_device_ib_res(struct iser_device *device)
93dma_mr_err: 100dma_mr_err:
94 tasklet_kill(&device->cq_tasklet); 101 tasklet_kill(&device->cq_tasklet);
95cq_arm_err: 102cq_arm_err:
96 ib_destroy_cq(device->cq); 103 ib_destroy_cq(device->tx_cq);
97cq_err: 104tx_cq_err:
105 ib_destroy_cq(device->rx_cq);
106rx_cq_err:
98 ib_dealloc_pd(device->pd); 107 ib_dealloc_pd(device->pd);
99pd_err: 108pd_err:
100 iser_err("failed to allocate an IB resource\n"); 109 iser_err("failed to allocate an IB resource\n");
@@ -112,11 +121,13 @@ static void iser_free_device_ib_res(struct iser_device *device)
112 tasklet_kill(&device->cq_tasklet); 121 tasklet_kill(&device->cq_tasklet);
113 122
114 (void)ib_dereg_mr(device->mr); 123 (void)ib_dereg_mr(device->mr);
115 (void)ib_destroy_cq(device->cq); 124 (void)ib_destroy_cq(device->tx_cq);
125 (void)ib_destroy_cq(device->rx_cq);
116 (void)ib_dealloc_pd(device->pd); 126 (void)ib_dealloc_pd(device->pd);
117 127
118 device->mr = NULL; 128 device->mr = NULL;
119 device->cq = NULL; 129 device->tx_cq = NULL;
130 device->rx_cq = NULL;
120 device->pd = NULL; 131 device->pd = NULL;
121} 132}
122 133
@@ -129,13 +140,23 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
129{ 140{
130 struct iser_device *device; 141 struct iser_device *device;
131 struct ib_qp_init_attr init_attr; 142 struct ib_qp_init_attr init_attr;
132 int ret; 143 int ret = -ENOMEM;
133 struct ib_fmr_pool_param params; 144 struct ib_fmr_pool_param params;
134 145
135 BUG_ON(ib_conn->device == NULL); 146 BUG_ON(ib_conn->device == NULL);
136 147
137 device = ib_conn->device; 148 device = ib_conn->device;
138 149
150 ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
151 if (!ib_conn->login_buf) {
152 goto alloc_err;
153 ret = -ENOMEM;
154 }
155
156 ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device,
157 (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE,
158 DMA_FROM_DEVICE);
159
139 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + 160 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
140 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), 161 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
141 GFP_KERNEL); 162 GFP_KERNEL);
@@ -169,12 +190,12 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
169 190
170 init_attr.event_handler = iser_qp_event_callback; 191 init_attr.event_handler = iser_qp_event_callback;
171 init_attr.qp_context = (void *)ib_conn; 192 init_attr.qp_context = (void *)ib_conn;
172 init_attr.send_cq = device->cq; 193 init_attr.send_cq = device->tx_cq;
173 init_attr.recv_cq = device->cq; 194 init_attr.recv_cq = device->rx_cq;
174 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 195 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
175 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 196 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
176 init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; 197 init_attr.cap.max_send_sge = 2;
177 init_attr.cap.max_recv_sge = 2; 198 init_attr.cap.max_recv_sge = 1;
178 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 199 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
179 init_attr.qp_type = IB_QPT_RC; 200 init_attr.qp_type = IB_QPT_RC;
180 201
@@ -192,6 +213,7 @@ qp_err:
192 (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); 213 (void)ib_destroy_fmr_pool(ib_conn->fmr_pool);
193fmr_pool_err: 214fmr_pool_err:
194 kfree(ib_conn->page_vec); 215 kfree(ib_conn->page_vec);
216 kfree(ib_conn->login_buf);
195alloc_err: 217alloc_err:
196 iser_err("unable to alloc mem or create resource, err %d\n", ret); 218 iser_err("unable to alloc mem or create resource, err %d\n", ret);
197 return ret; 219 return ret;
@@ -278,17 +300,6 @@ static void iser_device_try_release(struct iser_device *device)
278 mutex_unlock(&ig.device_list_mutex); 300 mutex_unlock(&ig.device_list_mutex);
279} 301}
280 302
281int iser_conn_state_comp(struct iser_conn *ib_conn,
282 enum iser_ib_conn_state comp)
283{
284 int ret;
285
286 spin_lock_bh(&ib_conn->lock);
287 ret = (ib_conn->state == comp);
288 spin_unlock_bh(&ib_conn->lock);
289 return ret;
290}
291
292static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 303static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
293 enum iser_ib_conn_state comp, 304 enum iser_ib_conn_state comp,
294 enum iser_ib_conn_state exch) 305 enum iser_ib_conn_state exch)
@@ -314,7 +325,7 @@ static void iser_conn_release(struct iser_conn *ib_conn)
314 mutex_lock(&ig.connlist_mutex); 325 mutex_lock(&ig.connlist_mutex);
315 list_del(&ib_conn->conn_list); 326 list_del(&ib_conn->conn_list);
316 mutex_unlock(&ig.connlist_mutex); 327 mutex_unlock(&ig.connlist_mutex);
317 328 iser_free_rx_descriptors(ib_conn);
318 iser_free_ib_conn_res(ib_conn); 329 iser_free_ib_conn_res(ib_conn);
319 ib_conn->device = NULL; 330 ib_conn->device = NULL;
320 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 331 /* on EVENT_ADDR_ERROR there's no device yet for this conn */
@@ -442,7 +453,7 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
442 ISCSI_ERR_CONN_FAILED); 453 ISCSI_ERR_CONN_FAILED);
443 454
444 /* Complete the termination process if no posts are pending */ 455 /* Complete the termination process if no posts are pending */
445 if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) && 456 if (ib_conn->post_recv_buf_count == 0 &&
446 (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 457 (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
447 ib_conn->state = ISER_CONN_DOWN; 458 ib_conn->state = ISER_CONN_DOWN;
448 wake_up_interruptible(&ib_conn->wait); 459 wake_up_interruptible(&ib_conn->wait);
@@ -489,9 +500,8 @@ void iser_conn_init(struct iser_conn *ib_conn)
489{ 500{
490 ib_conn->state = ISER_CONN_INIT; 501 ib_conn->state = ISER_CONN_INIT;
491 init_waitqueue_head(&ib_conn->wait); 502 init_waitqueue_head(&ib_conn->wait);
492 atomic_set(&ib_conn->post_recv_buf_count, 0); 503 ib_conn->post_recv_buf_count = 0;
493 atomic_set(&ib_conn->post_send_buf_count, 0); 504 atomic_set(&ib_conn->post_send_buf_count, 0);
494 atomic_set(&ib_conn->unexpected_pdu_count, 0);
495 atomic_set(&ib_conn->refcount, 1); 505 atomic_set(&ib_conn->refcount, 1);
496 INIT_LIST_HEAD(&ib_conn->conn_list); 506 INIT_LIST_HEAD(&ib_conn->conn_list);
497 spin_lock_init(&ib_conn->lock); 507 spin_lock_init(&ib_conn->lock);
@@ -626,136 +636,97 @@ void iser_unreg_mem(struct iser_mem_reg *reg)
626 reg->mem_h = NULL; 636 reg->mem_h = NULL;
627} 637}
628 638
629/** 639int iser_post_recvl(struct iser_conn *ib_conn)
630 * iser_dto_to_iov - builds IOV from a dto descriptor
631 */
632static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len)
633{ 640{
634 int i; 641 struct ib_recv_wr rx_wr, *rx_wr_failed;
635 struct ib_sge *sge; 642 struct ib_sge sge;
636 struct iser_regd_buf *regd_buf; 643 int ib_ret;
637
638 if (dto->regd_vector_len > iov_len) {
639 iser_err("iov size %d too small for posting dto of len %d\n",
640 iov_len, dto->regd_vector_len);
641 BUG();
642 }
643 644
644 for (i = 0; i < dto->regd_vector_len; i++) { 645 sge.addr = ib_conn->login_dma;
645 sge = &iov[i]; 646 sge.length = ISER_RX_LOGIN_SIZE;
646 regd_buf = dto->regd[i]; 647 sge.lkey = ib_conn->device->mr->lkey;
647
648 sge->addr = regd_buf->reg.va;
649 sge->length = regd_buf->reg.len;
650 sge->lkey = regd_buf->reg.lkey;
651
652 if (dto->used_sz[i] > 0) /* Adjust size */
653 sge->length = dto->used_sz[i];
654
655 /* offset and length should not exceed the regd buf length */
656 if (sge->length + dto->offset[i] > regd_buf->reg.len) {
657 iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:"
658 "%ld in dto:0x%p [%d], va:0x%08lX\n",
659 (unsigned long)sge->length, dto->offset[i],
660 (unsigned long)regd_buf->reg.len, dto, i,
661 (unsigned long)sge->addr);
662 BUG();
663 }
664 648
665 sge->addr += dto->offset[i]; /* Adjust offset */ 649 rx_wr.wr_id = (unsigned long)ib_conn->login_buf;
650 rx_wr.sg_list = &sge;
651 rx_wr.num_sge = 1;
652 rx_wr.next = NULL;
653
654 ib_conn->post_recv_buf_count++;
655 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
656 if (ib_ret) {
657 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
658 ib_conn->post_recv_buf_count--;
666 } 659 }
660 return ib_ret;
667} 661}
668 662
669/** 663int iser_post_recvm(struct iser_conn *ib_conn, int count)
670 * iser_post_recv - Posts a receive buffer.
671 *
672 * returns 0 on success, -1 on failure
673 */
674int iser_post_recv(struct iser_desc *rx_desc)
675{ 664{
676 int ib_ret, ret_val = 0; 665 struct ib_recv_wr *rx_wr, *rx_wr_failed;
677 struct ib_recv_wr recv_wr, *recv_wr_failed; 666 int i, ib_ret;
678 struct ib_sge iov[2]; 667 unsigned int my_rx_head = ib_conn->rx_desc_head;
679 struct iser_conn *ib_conn; 668 struct iser_rx_desc *rx_desc;
680 struct iser_dto *recv_dto = &rx_desc->dto; 669
681 670 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
682 /* Retrieve conn */ 671 rx_desc = &ib_conn->rx_descs[my_rx_head];
683 ib_conn = recv_dto->ib_conn; 672 rx_wr->wr_id = (unsigned long)rx_desc;
684 673 rx_wr->sg_list = &rx_desc->rx_sg;
685 iser_dto_to_iov(recv_dto, iov, 2); 674 rx_wr->num_sge = 1;
675 rx_wr->next = rx_wr + 1;
676 my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
677 }
686 678
687 recv_wr.next = NULL; 679 rx_wr--;
688 recv_wr.sg_list = iov; 680 rx_wr->next = NULL; /* mark end of work requests list */
689 recv_wr.num_sge = recv_dto->regd_vector_len;
690 recv_wr.wr_id = (unsigned long)rx_desc;
691 681
692 atomic_inc(&ib_conn->post_recv_buf_count); 682 ib_conn->post_recv_buf_count += count;
693 ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed); 683 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
694 if (ib_ret) { 684 if (ib_ret) {
695 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 685 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
696 atomic_dec(&ib_conn->post_recv_buf_count); 686 ib_conn->post_recv_buf_count -= count;
697 ret_val = -1; 687 } else
698 } 688 ib_conn->rx_desc_head = my_rx_head;
699 689 return ib_ret;
700 return ret_val;
701} 690}
702 691
692
703/** 693/**
704 * iser_start_send - Initiate a Send DTO operation 694 * iser_start_send - Initiate a Send DTO operation
705 * 695 *
706 * returns 0 on success, -1 on failure 696 * returns 0 on success, -1 on failure
707 */ 697 */
708int iser_post_send(struct iser_desc *tx_desc) 698int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc)
709{ 699{
710 int ib_ret, ret_val = 0; 700 int ib_ret;
711 struct ib_send_wr send_wr, *send_wr_failed; 701 struct ib_send_wr send_wr, *send_wr_failed;
712 struct ib_sge iov[MAX_REGD_BUF_VECTOR_LEN];
713 struct iser_conn *ib_conn;
714 struct iser_dto *dto = &tx_desc->dto;
715 702
716 ib_conn = dto->ib_conn; 703 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
717 704 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
718 iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN);
719 705
720 send_wr.next = NULL; 706 send_wr.next = NULL;
721 send_wr.wr_id = (unsigned long)tx_desc; 707 send_wr.wr_id = (unsigned long)tx_desc;
722 send_wr.sg_list = iov; 708 send_wr.sg_list = tx_desc->tx_sg;
723 send_wr.num_sge = dto->regd_vector_len; 709 send_wr.num_sge = tx_desc->num_sge;
724 send_wr.opcode = IB_WR_SEND; 710 send_wr.opcode = IB_WR_SEND;
725 send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0; 711 send_wr.send_flags = IB_SEND_SIGNALED;
726 712
727 atomic_inc(&ib_conn->post_send_buf_count); 713 atomic_inc(&ib_conn->post_send_buf_count);
728 714
729 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 715 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
730 if (ib_ret) { 716 if (ib_ret) {
731 iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n",
732 dto, dto->regd_vector_len);
733 iser_err("ib_post_send failed, ret:%d\n", ib_ret); 717 iser_err("ib_post_send failed, ret:%d\n", ib_ret);
734 atomic_dec(&ib_conn->post_send_buf_count); 718 atomic_dec(&ib_conn->post_send_buf_count);
735 ret_val = -1;
736 } 719 }
737 720 return ib_ret;
738 return ret_val;
739} 721}
740 722
741static void iser_handle_comp_error(struct iser_desc *desc) 723static void iser_handle_comp_error(struct iser_tx_desc *desc,
724 struct iser_conn *ib_conn)
742{ 725{
743 struct iser_dto *dto = &desc->dto; 726 if (desc && desc->type == ISCSI_TX_DATAOUT)
744 struct iser_conn *ib_conn = dto->ib_conn;
745
746 iser_dto_buffs_release(dto);
747
748 if (desc->type == ISCSI_RX) {
749 kfree(desc->data);
750 kmem_cache_free(ig.desc_cache, desc); 727 kmem_cache_free(ig.desc_cache, desc);
751 atomic_dec(&ib_conn->post_recv_buf_count);
752 } else { /* type is TX control/command/dataout */
753 if (desc->type == ISCSI_TX_DATAOUT)
754 kmem_cache_free(ig.desc_cache, desc);
755 atomic_dec(&ib_conn->post_send_buf_count);
756 }
757 728
758 if (atomic_read(&ib_conn->post_recv_buf_count) == 0 && 729 if (ib_conn->post_recv_buf_count == 0 &&
759 atomic_read(&ib_conn->post_send_buf_count) == 0) { 730 atomic_read(&ib_conn->post_send_buf_count) == 0) {
760 /* getting here when the state is UP means that the conn is * 731 /* getting here when the state is UP means that the conn is *
761 * being terminated asynchronously from the iSCSI layer's * 732 * being terminated asynchronously from the iSCSI layer's *
@@ -774,32 +745,74 @@ static void iser_handle_comp_error(struct iser_desc *desc)
774 } 745 }
775} 746}
776 747
748static int iser_drain_tx_cq(struct iser_device *device)
749{
750 struct ib_cq *cq = device->tx_cq;
751 struct ib_wc wc;
752 struct iser_tx_desc *tx_desc;
753 struct iser_conn *ib_conn;
754 int completed_tx = 0;
755
756 while (ib_poll_cq(cq, 1, &wc) == 1) {
757 tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id;
758 ib_conn = wc.qp->qp_context;
759 if (wc.status == IB_WC_SUCCESS) {
760 if (wc.opcode == IB_WC_SEND)
761 iser_snd_completion(tx_desc, ib_conn);
762 else
763 iser_err("expected opcode %d got %d\n",
764 IB_WC_SEND, wc.opcode);
765 } else {
766 iser_err("tx id %llx status %d vend_err %x\n",
767 wc.wr_id, wc.status, wc.vendor_err);
768 atomic_dec(&ib_conn->post_send_buf_count);
769 iser_handle_comp_error(tx_desc, ib_conn);
770 }
771 completed_tx++;
772 }
773 return completed_tx;
774}
775
776
777static void iser_cq_tasklet_fn(unsigned long data) 777static void iser_cq_tasklet_fn(unsigned long data)
778{ 778{
779 struct iser_device *device = (struct iser_device *)data; 779 struct iser_device *device = (struct iser_device *)data;
780 struct ib_cq *cq = device->cq; 780 struct ib_cq *cq = device->rx_cq;
781 struct ib_wc wc; 781 struct ib_wc wc;
782 struct iser_desc *desc; 782 struct iser_rx_desc *desc;
783 unsigned long xfer_len; 783 unsigned long xfer_len;
784 struct iser_conn *ib_conn;
785 int completed_tx, completed_rx;
786 completed_tx = completed_rx = 0;
784 787
785 while (ib_poll_cq(cq, 1, &wc) == 1) { 788 while (ib_poll_cq(cq, 1, &wc) == 1) {
786 desc = (struct iser_desc *) (unsigned long) wc.wr_id; 789 desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
787 BUG_ON(desc == NULL); 790 BUG_ON(desc == NULL);
788 791 ib_conn = wc.qp->qp_context;
789 if (wc.status == IB_WC_SUCCESS) { 792 if (wc.status == IB_WC_SUCCESS) {
790 if (desc->type == ISCSI_RX) { 793 if (wc.opcode == IB_WC_RECV) {
791 xfer_len = (unsigned long)wc.byte_len; 794 xfer_len = (unsigned long)wc.byte_len;
792 iser_rcv_completion(desc, xfer_len); 795 iser_rcv_completion(desc, xfer_len, ib_conn);
793 } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ 796 } else
794 iser_snd_completion(desc); 797 iser_err("expected opcode %d got %d\n",
798 IB_WC_RECV, wc.opcode);
795 } else { 799 } else {
796 iser_err("comp w. error op %d status %d\n",desc->type,wc.status); 800 if (wc.status != IB_WC_WR_FLUSH_ERR)
797 iser_handle_comp_error(desc); 801 iser_err("rx id %llx status %d vend_err %x\n",
802 wc.wr_id, wc.status, wc.vendor_err);
803 ib_conn->post_recv_buf_count--;
804 iser_handle_comp_error(NULL, ib_conn);
798 } 805 }
806 completed_rx++;
807 if (!(completed_rx & 63))
808 completed_tx += iser_drain_tx_cq(device);
799 } 809 }
800 /* #warning "it is assumed here that arming CQ only once its empty" * 810 /* #warning "it is assumed here that arming CQ only once its empty" *
801 * " would not cause interrupts to be missed" */ 811 * " would not cause interrupts to be missed" */
802 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 812 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
813
814 completed_tx += iser_drain_tx_cq(device);
815 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
803} 816}
804 817
805static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 818static void iser_cq_callback(struct ib_cq *cq, void *cq_context)