diff options
author | Or Gerlitz <ogerlitz@voltaire.com> | 2010-02-08 08:17:42 -0500 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2010-02-24 12:41:10 -0500 |
commit | bcc60c381d857ced653e912cbe6121294773e147 (patch) | |
tree | 543a2d483a1110f9666ae5503d9e3c53a8782e0c /drivers/infiniband | |
parent | 1cef4659850eeb862c248c7670e404d7a1711ed1 (diff) |
IB/iser: New receive buffer posting logic
Currently, the recv buffer posting logic is based on the transactional
nature of iSER which allows for posting a buffer before sending a PDU.
Change this to post only when the number of outstanding recv buffers
is below a water mark and in a batched manner, thus simplifying and
optimizing the data path. Use a pre-allocated ring of recv buffers
instead of allocating from kmem cache. A special treatment is given
to the login response buffer whose size must be 8K unlike the size of
buffers used for any other purpose which is 128 bytes.
Signed-off-by: Or Gerlitz <ogerlitz@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.h | 40 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_initiator.c | 235 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 134 |
4 files changed, 235 insertions, 176 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 5f7a6fca0a4d..355470e7e904 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c | |||
@@ -283,7 +283,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) | |||
283 | * due to issues with the login code re iser sematics | 283 | * due to issues with the login code re iser sematics |
284 | * this not set in iscsi_conn_setup - FIXME | 284 | * this not set in iscsi_conn_setup - FIXME |
285 | */ | 285 | */ |
286 | conn->max_recv_dlength = 128; | 286 | conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN; |
287 | 287 | ||
288 | iser_conn = conn->dd_data; | 288 | iser_conn = conn->dd_data; |
289 | conn->dd_data = iser_conn; | 289 | conn->dd_data = iser_conn; |
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index e8dfdcfa1daf..83effb610594 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h | |||
@@ -102,9 +102,9 @@ | |||
102 | #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * | 102 | #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * |
103 | * SCSI_TMFUNC(2), LOGOUT(1) */ | 103 | * SCSI_TMFUNC(2), LOGOUT(1) */ |
104 | 104 | ||
105 | #define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \ | 105 | #define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX) |
106 | ISER_MAX_RX_MISC_PDUS + \ | 106 | |
107 | ISER_MAX_TX_MISC_PDUS) | 107 | #define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2) |
108 | 108 | ||
109 | /* the max TX (send) WR supported by the iSER QP is defined by * | 109 | /* the max TX (send) WR supported by the iSER QP is defined by * |
110 | * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * | 110 | * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * |
@@ -132,6 +132,12 @@ struct iser_hdr { | |||
132 | __be64 read_va; | 132 | __be64 read_va; |
133 | } __attribute__((packed)); | 133 | } __attribute__((packed)); |
134 | 134 | ||
135 | /* Constant PDU lengths calculations */ | ||
136 | #define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr)) | ||
137 | |||
138 | #define ISER_RECV_DATA_SEG_LEN 128 | ||
139 | #define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN) | ||
140 | #define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN) | ||
135 | 141 | ||
136 | /* Length of an object name string */ | 142 | /* Length of an object name string */ |
137 | #define ISER_OBJECT_NAME_SIZE 64 | 143 | #define ISER_OBJECT_NAME_SIZE 64 |
@@ -212,7 +218,6 @@ struct iser_dto { | |||
212 | }; | 218 | }; |
213 | 219 | ||
214 | enum iser_desc_type { | 220 | enum iser_desc_type { |
215 | ISCSI_RX, | ||
216 | ISCSI_TX_CONTROL , | 221 | ISCSI_TX_CONTROL , |
217 | ISCSI_TX_SCSI_COMMAND, | 222 | ISCSI_TX_SCSI_COMMAND, |
218 | ISCSI_TX_DATAOUT | 223 | ISCSI_TX_DATAOUT |
@@ -228,6 +233,17 @@ struct iser_desc { | |||
228 | struct iser_dto dto; | 233 | struct iser_dto dto; |
229 | }; | 234 | }; |
230 | 235 | ||
236 | #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ | ||
237 | sizeof(u64) + sizeof(struct ib_sge))) | ||
238 | struct iser_rx_desc { | ||
239 | struct iser_hdr iser_header; | ||
240 | struct iscsi_hdr iscsi_header; | ||
241 | char data[ISER_RECV_DATA_SEG_LEN]; | ||
242 | u64 dma_addr; | ||
243 | struct ib_sge rx_sg; | ||
244 | char pad[ISER_RX_PAD_SIZE]; | ||
245 | } __attribute__((packed)); | ||
246 | |||
231 | struct iser_device { | 247 | struct iser_device { |
232 | struct ib_device *ib_device; | 248 | struct ib_device *ib_device; |
233 | struct ib_pd *pd; | 249 | struct ib_pd *pd; |
@@ -256,6 +272,12 @@ struct iser_conn { | |||
256 | struct iser_page_vec *page_vec; /* represents SG to fmr maps* | 272 | struct iser_page_vec *page_vec; /* represents SG to fmr maps* |
257 | * maps serialized as tx is*/ | 273 | * maps serialized as tx is*/ |
258 | struct list_head conn_list; /* entry in ig conn list */ | 274 | struct list_head conn_list; /* entry in ig conn list */ |
275 | |||
276 | char *login_buf; | ||
277 | u64 login_dma; | ||
278 | unsigned int rx_desc_head; | ||
279 | struct iser_rx_desc *rx_descs; | ||
280 | struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; | ||
259 | }; | 281 | }; |
260 | 282 | ||
261 | struct iscsi_iser_conn { | 283 | struct iscsi_iser_conn { |
@@ -319,8 +341,9 @@ void iser_conn_put(struct iser_conn *ib_conn); | |||
319 | 341 | ||
320 | void iser_conn_terminate(struct iser_conn *ib_conn); | 342 | void iser_conn_terminate(struct iser_conn *ib_conn); |
321 | 343 | ||
322 | void iser_rcv_completion(struct iser_desc *desc, | 344 | void iser_rcv_completion(struct iser_rx_desc *desc, |
323 | unsigned long dto_xfer_len); | 345 | unsigned long dto_xfer_len, |
346 | struct iser_conn *ib_conn); | ||
324 | 347 | ||
325 | void iser_snd_completion(struct iser_desc *desc); | 348 | void iser_snd_completion(struct iser_desc *desc); |
326 | 349 | ||
@@ -332,6 +355,8 @@ void iser_dto_buffs_release(struct iser_dto *dto); | |||
332 | 355 | ||
333 | int iser_regd_buff_release(struct iser_regd_buf *regd_buf); | 356 | int iser_regd_buff_release(struct iser_regd_buf *regd_buf); |
334 | 357 | ||
358 | void iser_free_rx_descriptors(struct iser_conn *ib_conn); | ||
359 | |||
335 | void iser_reg_single(struct iser_device *device, | 360 | void iser_reg_single(struct iser_device *device, |
336 | struct iser_regd_buf *regd_buf, | 361 | struct iser_regd_buf *regd_buf, |
337 | enum dma_data_direction direction); | 362 | enum dma_data_direction direction); |
@@ -353,7 +378,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, | |||
353 | 378 | ||
354 | void iser_unreg_mem(struct iser_mem_reg *mem_reg); | 379 | void iser_unreg_mem(struct iser_mem_reg *mem_reg); |
355 | 380 | ||
356 | int iser_post_recv(struct iser_desc *rx_desc); | 381 | int iser_post_recvl(struct iser_conn *ib_conn); |
382 | int iser_post_recvm(struct iser_conn *ib_conn, int count); | ||
357 | int iser_post_send(struct iser_desc *tx_desc); | 383 | int iser_post_send(struct iser_desc *tx_desc); |
358 | 384 | ||
359 | int iser_conn_state_comp(struct iser_conn *ib_conn, | 385 | int iser_conn_state_comp(struct iser_conn *ib_conn, |
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 5f42fbe3080c..6d9bbe6363ee 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c | |||
@@ -39,9 +39,6 @@ | |||
39 | 39 | ||
40 | #include "iscsi_iser.h" | 40 | #include "iscsi_iser.h" |
41 | 41 | ||
42 | /* Constant PDU lengths calculations */ | ||
43 | #define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \ | ||
44 | sizeof (struct iscsi_hdr)) | ||
45 | 42 | ||
46 | /* iser_dto_add_regd_buff - increments the reference count for * | 43 | /* iser_dto_add_regd_buff - increments the reference count for * |
47 | * the registered buffer & adds it to the DTO object */ | 44 | * the registered buffer & adds it to the DTO object */ |
@@ -172,78 +169,6 @@ iser_prepare_write_cmd(struct iscsi_task *task, | |||
172 | return 0; | 169 | return 0; |
173 | } | 170 | } |
174 | 171 | ||
175 | /** | ||
176 | * iser_post_receive_control - allocates, initializes and posts receive DTO. | ||
177 | */ | ||
178 | static int iser_post_receive_control(struct iscsi_conn *conn) | ||
179 | { | ||
180 | struct iscsi_iser_conn *iser_conn = conn->dd_data; | ||
181 | struct iser_desc *rx_desc; | ||
182 | struct iser_regd_buf *regd_hdr; | ||
183 | struct iser_regd_buf *regd_data; | ||
184 | struct iser_dto *recv_dto = NULL; | ||
185 | struct iser_device *device = iser_conn->ib_conn->device; | ||
186 | int rx_data_size, err = 0; | ||
187 | |||
188 | rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); | ||
189 | if (rx_desc == NULL) { | ||
190 | iser_err("Failed to alloc desc for post recv\n"); | ||
191 | return -ENOMEM; | ||
192 | } | ||
193 | rx_desc->type = ISCSI_RX; | ||
194 | |||
195 | /* for the login sequence we must support rx of upto 8K; login is done | ||
196 | * after conn create/bind (connect) and conn stop/bind (reconnect), | ||
197 | * what's common for both schemes is that the connection is not started | ||
198 | */ | ||
199 | if (conn->c_stage != ISCSI_CONN_STARTED) | ||
200 | rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN; | ||
201 | else /* FIXME till user space sets conn->max_recv_dlength correctly */ | ||
202 | rx_data_size = 128; | ||
203 | |||
204 | rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); | ||
205 | if (rx_desc->data == NULL) { | ||
206 | iser_err("Failed to alloc data buf for post recv\n"); | ||
207 | err = -ENOMEM; | ||
208 | goto post_rx_kmalloc_failure; | ||
209 | } | ||
210 | |||
211 | recv_dto = &rx_desc->dto; | ||
212 | recv_dto->ib_conn = iser_conn->ib_conn; | ||
213 | recv_dto->regd_vector_len = 0; | ||
214 | |||
215 | regd_hdr = &rx_desc->hdr_regd_buf; | ||
216 | memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); | ||
217 | regd_hdr->device = device; | ||
218 | regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */ | ||
219 | regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; | ||
220 | |||
221 | iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); | ||
222 | |||
223 | iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); | ||
224 | |||
225 | regd_data = &rx_desc->data_regd_buf; | ||
226 | memset(regd_data, 0, sizeof(struct iser_regd_buf)); | ||
227 | regd_data->device = device; | ||
228 | regd_data->virt_addr = rx_desc->data; | ||
229 | regd_data->data_size = rx_data_size; | ||
230 | |||
231 | iser_reg_single(device, regd_data, DMA_FROM_DEVICE); | ||
232 | |||
233 | iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); | ||
234 | |||
235 | err = iser_post_recv(rx_desc); | ||
236 | if (!err) | ||
237 | return 0; | ||
238 | |||
239 | /* iser_post_recv failed */ | ||
240 | iser_dto_buffs_release(recv_dto); | ||
241 | kfree(rx_desc->data); | ||
242 | post_rx_kmalloc_failure: | ||
243 | kmem_cache_free(ig.desc_cache, rx_desc); | ||
244 | return err; | ||
245 | } | ||
246 | |||
247 | /* creates a new tx descriptor and adds header regd buffer */ | 172 | /* creates a new tx descriptor and adds header regd buffer */ |
248 | static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, | 173 | static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, |
249 | struct iser_desc *tx_desc) | 174 | struct iser_desc *tx_desc) |
@@ -254,7 +179,7 @@ static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, | |||
254 | memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); | 179 | memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); |
255 | regd_hdr->device = iser_conn->ib_conn->device; | 180 | regd_hdr->device = iser_conn->ib_conn->device; |
256 | regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ | 181 | regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ |
257 | regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; | 182 | regd_hdr->data_size = ISER_HEADERS_LEN; |
258 | 183 | ||
259 | send_dto->ib_conn = iser_conn->ib_conn; | 184 | send_dto->ib_conn = iser_conn->ib_conn; |
260 | send_dto->notify_enable = 1; | 185 | send_dto->notify_enable = 1; |
@@ -266,6 +191,72 @@ static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, | |||
266 | iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0); | 191 | iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0); |
267 | } | 192 | } |
268 | 193 | ||
194 | int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) | ||
195 | { | ||
196 | int i, j; | ||
197 | u64 dma_addr; | ||
198 | struct iser_rx_desc *rx_desc; | ||
199 | struct ib_sge *rx_sg; | ||
200 | struct iser_device *device = ib_conn->device; | ||
201 | |||
202 | ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS * | ||
203 | sizeof(struct iser_rx_desc), GFP_KERNEL); | ||
204 | if (!ib_conn->rx_descs) | ||
205 | goto rx_desc_alloc_fail; | ||
206 | |||
207 | rx_desc = ib_conn->rx_descs; | ||
208 | |||
209 | for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) { | ||
210 | dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, | ||
211 | ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); | ||
212 | if (ib_dma_mapping_error(device->ib_device, dma_addr)) | ||
213 | goto rx_desc_dma_map_failed; | ||
214 | |||
215 | rx_desc->dma_addr = dma_addr; | ||
216 | |||
217 | rx_sg = &rx_desc->rx_sg; | ||
218 | rx_sg->addr = rx_desc->dma_addr; | ||
219 | rx_sg->length = ISER_RX_PAYLOAD_SIZE; | ||
220 | rx_sg->lkey = device->mr->lkey; | ||
221 | } | ||
222 | |||
223 | ib_conn->rx_desc_head = 0; | ||
224 | return 0; | ||
225 | |||
226 | rx_desc_dma_map_failed: | ||
227 | rx_desc = ib_conn->rx_descs; | ||
228 | for (j = 0; j < i; j++, rx_desc++) | ||
229 | ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, | ||
230 | ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); | ||
231 | kfree(ib_conn->rx_descs); | ||
232 | ib_conn->rx_descs = NULL; | ||
233 | rx_desc_alloc_fail: | ||
234 | iser_err("failed allocating rx descriptors / data buffers\n"); | ||
235 | return -ENOMEM; | ||
236 | } | ||
237 | |||
238 | void iser_free_rx_descriptors(struct iser_conn *ib_conn) | ||
239 | { | ||
240 | int i; | ||
241 | struct iser_rx_desc *rx_desc; | ||
242 | struct iser_device *device = ib_conn->device; | ||
243 | |||
244 | if (ib_conn->login_buf) { | ||
245 | ib_dma_unmap_single(device->ib_device, ib_conn->login_dma, | ||
246 | ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); | ||
247 | kfree(ib_conn->login_buf); | ||
248 | } | ||
249 | |||
250 | if (!ib_conn->rx_descs) | ||
251 | return; | ||
252 | |||
253 | rx_desc = ib_conn->rx_descs; | ||
254 | for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) | ||
255 | ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, | ||
256 | ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); | ||
257 | kfree(ib_conn->rx_descs); | ||
258 | } | ||
259 | |||
269 | /** | 260 | /** |
270 | * iser_conn_set_full_featured_mode - (iSER API) | 261 | * iser_conn_set_full_featured_mode - (iSER API) |
271 | */ | 262 | */ |
@@ -273,27 +264,20 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) | |||
273 | { | 264 | { |
274 | struct iscsi_iser_conn *iser_conn = conn->dd_data; | 265 | struct iscsi_iser_conn *iser_conn = conn->dd_data; |
275 | 266 | ||
276 | int i; | 267 | iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX); |
277 | /* no need to keep it in a var, we are after login so if this should | ||
278 | * be negotiated, by now the result should be available here */ | ||
279 | int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS; | ||
280 | |||
281 | iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num); | ||
282 | 268 | ||
283 | /* Check that there is no posted recv or send buffers left - */ | 269 | /* Check that there is no posted recv or send buffers left - */ |
284 | /* they must be consumed during the login phase */ | 270 | /* they must be consumed during the login phase */ |
285 | BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0); | 271 | BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0); |
286 | BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); | 272 | BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); |
287 | 273 | ||
274 | if (iser_alloc_rx_descriptors(iser_conn->ib_conn)) | ||
275 | return -ENOMEM; | ||
276 | |||
288 | /* Initial post receive buffers */ | 277 | /* Initial post receive buffers */ |
289 | for (i = 0; i < initial_post_recv_bufs_num; i++) { | 278 | if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) |
290 | if (iser_post_receive_control(conn) != 0) { | 279 | return -ENOMEM; |
291 | iser_err("Failed to post recv bufs at:%d conn:0x%p\n", | 280 | |
292 | i, conn); | ||
293 | return -ENOMEM; | ||
294 | } | ||
295 | } | ||
296 | iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn); | ||
297 | return 0; | 281 | return 0; |
298 | } | 282 | } |
299 | 283 | ||
@@ -321,7 +305,7 @@ int iser_send_command(struct iscsi_conn *conn, | |||
321 | struct iscsi_iser_task *iser_task = task->dd_data; | 305 | struct iscsi_iser_task *iser_task = task->dd_data; |
322 | struct iser_dto *send_dto = NULL; | 306 | struct iser_dto *send_dto = NULL; |
323 | unsigned long edtl; | 307 | unsigned long edtl; |
324 | int err = 0; | 308 | int err; |
325 | struct iser_data_buf *data_buf; | 309 | struct iser_data_buf *data_buf; |
326 | struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr; | 310 | struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr; |
327 | struct scsi_cmnd *sc = task->sc; | 311 | struct scsi_cmnd *sc = task->sc; |
@@ -371,12 +355,6 @@ int iser_send_command(struct iscsi_conn *conn, | |||
371 | iser_reg_single(iser_conn->ib_conn->device, | 355 | iser_reg_single(iser_conn->ib_conn->device, |
372 | send_dto->regd[0], DMA_TO_DEVICE); | 356 | send_dto->regd[0], DMA_TO_DEVICE); |
373 | 357 | ||
374 | if (iser_post_receive_control(conn) != 0) { | ||
375 | iser_err("post_recv failed!\n"); | ||
376 | err = -ENOMEM; | ||
377 | goto send_command_error; | ||
378 | } | ||
379 | |||
380 | iser_task->status = ISER_TASK_STATUS_STARTED; | 358 | iser_task->status = ISER_TASK_STATUS_STARTED; |
381 | 359 | ||
382 | err = iser_post_send(&iser_task->desc); | 360 | err = iser_post_send(&iser_task->desc); |
@@ -474,7 +452,7 @@ int iser_send_control(struct iscsi_conn *conn, | |||
474 | struct iser_desc *mdesc = &iser_task->desc; | 452 | struct iser_desc *mdesc = &iser_task->desc; |
475 | struct iser_dto *send_dto = NULL; | 453 | struct iser_dto *send_dto = NULL; |
476 | unsigned long data_seg_len; | 454 | unsigned long data_seg_len; |
477 | int err = 0; | 455 | int err; |
478 | struct iser_regd_buf *regd_buf; | 456 | struct iser_regd_buf *regd_buf; |
479 | struct iser_device *device; | 457 | struct iser_device *device; |
480 | 458 | ||
@@ -511,10 +489,10 @@ int iser_send_control(struct iscsi_conn *conn, | |||
511 | data_seg_len); | 489 | data_seg_len); |
512 | } | 490 | } |
513 | 491 | ||
514 | if (iser_post_receive_control(conn) != 0) { | 492 | if (task == conn->login_task) { |
515 | iser_err("post_rcv_buff failed!\n"); | 493 | err = iser_post_recvl(iser_conn->ib_conn); |
516 | err = -ENOMEM; | 494 | if (err) |
517 | goto send_control_error; | 495 | goto send_control_error; |
518 | } | 496 | } |
519 | 497 | ||
520 | err = iser_post_send(mdesc); | 498 | err = iser_post_send(mdesc); |
@@ -530,27 +508,34 @@ send_control_error: | |||
530 | /** | 508 | /** |
531 | * iser_rcv_dto_completion - recv DTO completion | 509 | * iser_rcv_dto_completion - recv DTO completion |
532 | */ | 510 | */ |
533 | void iser_rcv_completion(struct iser_desc *rx_desc, | 511 | void iser_rcv_completion(struct iser_rx_desc *rx_desc, |
534 | unsigned long dto_xfer_len) | 512 | unsigned long rx_xfer_len, |
513 | struct iser_conn *ib_conn) | ||
535 | { | 514 | { |
536 | struct iser_dto *dto = &rx_desc->dto; | 515 | struct iscsi_iser_conn *conn = ib_conn->iser_conn; |
537 | struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; | ||
538 | struct iscsi_task *task; | 516 | struct iscsi_task *task; |
539 | struct iscsi_iser_task *iser_task; | 517 | struct iscsi_iser_task *iser_task; |
540 | struct iscsi_hdr *hdr; | 518 | struct iscsi_hdr *hdr; |
541 | char *rx_data = NULL; | ||
542 | int rx_data_len = 0; | ||
543 | unsigned char opcode; | 519 | unsigned char opcode; |
520 | u64 rx_dma; | ||
521 | int rx_buflen, outstanding, count, err; | ||
522 | |||
523 | /* differentiate between login to all other PDUs */ | ||
524 | if ((char *)rx_desc == ib_conn->login_buf) { | ||
525 | rx_dma = ib_conn->login_dma; | ||
526 | rx_buflen = ISER_RX_LOGIN_SIZE; | ||
527 | } else { | ||
528 | rx_dma = rx_desc->dma_addr; | ||
529 | rx_buflen = ISER_RX_PAYLOAD_SIZE; | ||
530 | } | ||
544 | 531 | ||
545 | hdr = &rx_desc->iscsi_header; | 532 | ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, |
533 | rx_buflen, DMA_FROM_DEVICE); | ||
546 | 534 | ||
547 | iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt); | 535 | hdr = &rx_desc->iscsi_header; |
548 | 536 | ||
549 | if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */ | 537 | iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, |
550 | rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN; | 538 | hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); |
551 | rx_data = dto->regd[1]->virt_addr; | ||
552 | rx_data += dto->offset[1]; | ||
553 | } | ||
554 | 539 | ||
555 | opcode = hdr->opcode & ISCSI_OPCODE_MASK; | 540 | opcode = hdr->opcode & ISCSI_OPCODE_MASK; |
556 | 541 | ||
@@ -573,18 +558,30 @@ void iser_rcv_completion(struct iser_desc *rx_desc, | |||
573 | iscsi_put_task(task); | 558 | iscsi_put_task(task); |
574 | } | 559 | } |
575 | } | 560 | } |
576 | iser_dto_buffs_release(dto); | ||
577 | 561 | ||
578 | iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); | 562 | iscsi_iser_recv(conn->iscsi_conn, hdr, |
563 | rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); | ||
579 | 564 | ||
580 | kfree(rx_desc->data); | 565 | ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, |
581 | kmem_cache_free(ig.desc_cache, rx_desc); | 566 | rx_buflen, DMA_FROM_DEVICE); |
582 | 567 | ||
583 | /* decrementing conn->post_recv_buf_count only --after-- freeing the * | 568 | /* decrementing conn->post_recv_buf_count only --after-- freeing the * |
584 | * task eliminates the need to worry on tasks which are completed in * | 569 | * task eliminates the need to worry on tasks which are completed in * |
585 | * parallel to the execution of iser_conn_term. So the code that waits * | 570 | * parallel to the execution of iser_conn_term. So the code that waits * |
586 | * for the posted rx bufs refcount to become zero handles everything */ | 571 | * for the posted rx bufs refcount to become zero handles everything */ |
587 | atomic_dec(&conn->ib_conn->post_recv_buf_count); | 572 | atomic_dec(&conn->ib_conn->post_recv_buf_count); |
573 | |||
574 | if (rx_dma == ib_conn->login_dma) | ||
575 | return; | ||
576 | |||
577 | outstanding = atomic_read(&ib_conn->post_recv_buf_count); | ||
578 | if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) { | ||
579 | count = min(ISER_QP_MAX_RECV_DTOS - outstanding, | ||
580 | ISER_MIN_POSTED_RX); | ||
581 | err = iser_post_recvm(ib_conn, count); | ||
582 | if (err) | ||
583 | iser_err("posting %d rx bufs err %d\n", count, err); | ||
584 | } | ||
588 | } | 585 | } |
589 | 586 | ||
590 | void iser_snd_completion(struct iser_desc *tx_desc) | 587 | void iser_snd_completion(struct iser_desc *tx_desc) |
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 7092503a10e3..89b956044060 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c | |||
@@ -129,13 +129,23 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) | |||
129 | { | 129 | { |
130 | struct iser_device *device; | 130 | struct iser_device *device; |
131 | struct ib_qp_init_attr init_attr; | 131 | struct ib_qp_init_attr init_attr; |
132 | int ret; | 132 | int ret = -ENOMEM; |
133 | struct ib_fmr_pool_param params; | 133 | struct ib_fmr_pool_param params; |
134 | 134 | ||
135 | BUG_ON(ib_conn->device == NULL); | 135 | BUG_ON(ib_conn->device == NULL); |
136 | 136 | ||
137 | device = ib_conn->device; | 137 | device = ib_conn->device; |
138 | 138 | ||
139 | ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL); | ||
140 | if (!ib_conn->login_buf) { | ||
141 | goto alloc_err; | ||
142 | ret = -ENOMEM; | ||
143 | } | ||
144 | |||
145 | ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device, | ||
146 | (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE, | ||
147 | DMA_FROM_DEVICE); | ||
148 | |||
139 | ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + | 149 | ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + |
140 | (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), | 150 | (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), |
141 | GFP_KERNEL); | 151 | GFP_KERNEL); |
@@ -174,7 +184,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) | |||
174 | init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; | 184 | init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; |
175 | init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; | 185 | init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; |
176 | init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; | 186 | init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; |
177 | init_attr.cap.max_recv_sge = 2; | 187 | init_attr.cap.max_recv_sge = 1; |
178 | init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; | 188 | init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; |
179 | init_attr.qp_type = IB_QPT_RC; | 189 | init_attr.qp_type = IB_QPT_RC; |
180 | 190 | ||
@@ -192,6 +202,7 @@ qp_err: | |||
192 | (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); | 202 | (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); |
193 | fmr_pool_err: | 203 | fmr_pool_err: |
194 | kfree(ib_conn->page_vec); | 204 | kfree(ib_conn->page_vec); |
205 | kfree(ib_conn->login_buf); | ||
195 | alloc_err: | 206 | alloc_err: |
196 | iser_err("unable to alloc mem or create resource, err %d\n", ret); | 207 | iser_err("unable to alloc mem or create resource, err %d\n", ret); |
197 | return ret; | 208 | return ret; |
@@ -314,7 +325,7 @@ static void iser_conn_release(struct iser_conn *ib_conn) | |||
314 | mutex_lock(&ig.connlist_mutex); | 325 | mutex_lock(&ig.connlist_mutex); |
315 | list_del(&ib_conn->conn_list); | 326 | list_del(&ib_conn->conn_list); |
316 | mutex_unlock(&ig.connlist_mutex); | 327 | mutex_unlock(&ig.connlist_mutex); |
317 | 328 | iser_free_rx_descriptors(ib_conn); | |
318 | iser_free_ib_conn_res(ib_conn); | 329 | iser_free_ib_conn_res(ib_conn); |
319 | ib_conn->device = NULL; | 330 | ib_conn->device = NULL; |
320 | /* on EVENT_ADDR_ERROR there's no device yet for this conn */ | 331 | /* on EVENT_ADDR_ERROR there's no device yet for this conn */ |
@@ -625,6 +636,60 @@ void iser_unreg_mem(struct iser_mem_reg *reg) | |||
625 | reg->mem_h = NULL; | 636 | reg->mem_h = NULL; |
626 | } | 637 | } |
627 | 638 | ||
639 | int iser_post_recvl(struct iser_conn *ib_conn) | ||
640 | { | ||
641 | struct ib_recv_wr rx_wr, *rx_wr_failed; | ||
642 | struct ib_sge sge; | ||
643 | int ib_ret; | ||
644 | |||
645 | sge.addr = ib_conn->login_dma; | ||
646 | sge.length = ISER_RX_LOGIN_SIZE; | ||
647 | sge.lkey = ib_conn->device->mr->lkey; | ||
648 | |||
649 | rx_wr.wr_id = (unsigned long)ib_conn->login_buf; | ||
650 | rx_wr.sg_list = &sge; | ||
651 | rx_wr.num_sge = 1; | ||
652 | rx_wr.next = NULL; | ||
653 | |||
654 | atomic_inc(&ib_conn->post_recv_buf_count); | ||
655 | ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); | ||
656 | if (ib_ret) { | ||
657 | iser_err("ib_post_recv failed ret=%d\n", ib_ret); | ||
658 | atomic_dec(&ib_conn->post_recv_buf_count); | ||
659 | } | ||
660 | return ib_ret; | ||
661 | } | ||
662 | |||
663 | int iser_post_recvm(struct iser_conn *ib_conn, int count) | ||
664 | { | ||
665 | struct ib_recv_wr *rx_wr, *rx_wr_failed; | ||
666 | int i, ib_ret; | ||
667 | unsigned int my_rx_head = ib_conn->rx_desc_head; | ||
668 | struct iser_rx_desc *rx_desc; | ||
669 | |||
670 | for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { | ||
671 | rx_desc = &ib_conn->rx_descs[my_rx_head]; | ||
672 | rx_wr->wr_id = (unsigned long)rx_desc; | ||
673 | rx_wr->sg_list = &rx_desc->rx_sg; | ||
674 | rx_wr->num_sge = 1; | ||
675 | rx_wr->next = rx_wr + 1; | ||
676 | my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1); | ||
677 | } | ||
678 | |||
679 | rx_wr--; | ||
680 | rx_wr->next = NULL; /* mark end of work requests list */ | ||
681 | |||
682 | atomic_add(count, &ib_conn->post_recv_buf_count); | ||
683 | ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); | ||
684 | if (ib_ret) { | ||
685 | iser_err("ib_post_recv failed ret=%d\n", ib_ret); | ||
686 | atomic_sub(count, &ib_conn->post_recv_buf_count); | ||
687 | } else | ||
688 | ib_conn->rx_desc_head = my_rx_head; | ||
689 | return ib_ret; | ||
690 | } | ||
691 | |||
692 | |||
628 | /** | 693 | /** |
629 | * iser_dto_to_iov - builds IOV from a dto descriptor | 694 | * iser_dto_to_iov - builds IOV from a dto descriptor |
630 | */ | 695 | */ |
@@ -665,39 +730,6 @@ static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_le | |||
665 | } | 730 | } |
666 | } | 731 | } |
667 | 732 | ||
668 | /** | ||
669 | * iser_post_recv - Posts a receive buffer. | ||
670 | * | ||
671 | * returns 0 on success, -1 on failure | ||
672 | */ | ||
673 | int iser_post_recv(struct iser_desc *rx_desc) | ||
674 | { | ||
675 | int ib_ret, ret_val = 0; | ||
676 | struct ib_recv_wr recv_wr, *recv_wr_failed; | ||
677 | struct ib_sge iov[2]; | ||
678 | struct iser_conn *ib_conn; | ||
679 | struct iser_dto *recv_dto = &rx_desc->dto; | ||
680 | |||
681 | /* Retrieve conn */ | ||
682 | ib_conn = recv_dto->ib_conn; | ||
683 | |||
684 | iser_dto_to_iov(recv_dto, iov, 2); | ||
685 | |||
686 | recv_wr.next = NULL; | ||
687 | recv_wr.sg_list = iov; | ||
688 | recv_wr.num_sge = recv_dto->regd_vector_len; | ||
689 | recv_wr.wr_id = (unsigned long)rx_desc; | ||
690 | |||
691 | atomic_inc(&ib_conn->post_recv_buf_count); | ||
692 | ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed); | ||
693 | if (ib_ret) { | ||
694 | iser_err("ib_post_recv failed ret=%d\n", ib_ret); | ||
695 | atomic_dec(&ib_conn->post_recv_buf_count); | ||
696 | ret_val = -1; | ||
697 | } | ||
698 | |||
699 | return ret_val; | ||
700 | } | ||
701 | 733 | ||
702 | /** | 734 | /** |
703 | * iser_start_send - Initiate a Send DTO operation | 735 | * iser_start_send - Initiate a Send DTO operation |
@@ -737,18 +769,17 @@ int iser_post_send(struct iser_desc *tx_desc) | |||
737 | return ret_val; | 769 | return ret_val; |
738 | } | 770 | } |
739 | 771 | ||
740 | static void iser_handle_comp_error(struct iser_desc *desc) | 772 | static void iser_handle_comp_error(struct iser_desc *desc, |
773 | struct iser_conn *ib_conn) | ||
741 | { | 774 | { |
742 | struct iser_dto *dto = &desc->dto; | 775 | struct iser_rx_desc *rx = (struct iser_rx_desc *)desc; |
743 | struct iser_conn *ib_conn = dto->ib_conn; | 776 | struct iser_rx_desc *rx_first = ib_conn->rx_descs; |
744 | 777 | struct iser_rx_desc *rx_last = rx_first + (ISER_QP_MAX_RECV_DTOS - 1); | |
745 | iser_dto_buffs_release(dto); | ||
746 | 778 | ||
747 | if (desc->type == ISCSI_RX) { | 779 | if ((char *)desc == ib_conn->login_buf || |
748 | kfree(desc->data); | 780 | (rx_first <= rx && rx <= rx_last)) |
749 | kmem_cache_free(ig.desc_cache, desc); | ||
750 | atomic_dec(&ib_conn->post_recv_buf_count); | 781 | atomic_dec(&ib_conn->post_recv_buf_count); |
751 | } else { /* type is TX control/command/dataout */ | 782 | else { /* type is TX control/command/dataout */ |
752 | if (desc->type == ISCSI_TX_DATAOUT) | 783 | if (desc->type == ISCSI_TX_DATAOUT) |
753 | kmem_cache_free(ig.desc_cache, desc); | 784 | kmem_cache_free(ig.desc_cache, desc); |
754 | atomic_dec(&ib_conn->post_send_buf_count); | 785 | atomic_dec(&ib_conn->post_send_buf_count); |
@@ -780,20 +811,25 @@ static void iser_cq_tasklet_fn(unsigned long data) | |||
780 | struct ib_wc wc; | 811 | struct ib_wc wc; |
781 | struct iser_desc *desc; | 812 | struct iser_desc *desc; |
782 | unsigned long xfer_len; | 813 | unsigned long xfer_len; |
814 | struct iser_conn *ib_conn; | ||
783 | 815 | ||
784 | while (ib_poll_cq(cq, 1, &wc) == 1) { | 816 | while (ib_poll_cq(cq, 1, &wc) == 1) { |
785 | desc = (struct iser_desc *) (unsigned long) wc.wr_id; | 817 | desc = (struct iser_desc *) (unsigned long) wc.wr_id; |
786 | BUG_ON(desc == NULL); | 818 | BUG_ON(desc == NULL); |
819 | ib_conn = wc.qp->qp_context; | ||
787 | 820 | ||
788 | if (wc.status == IB_WC_SUCCESS) { | 821 | if (wc.status == IB_WC_SUCCESS) { |
789 | if (desc->type == ISCSI_RX) { | 822 | if (wc.opcode == IB_WC_RECV) { |
790 | xfer_len = (unsigned long)wc.byte_len; | 823 | xfer_len = (unsigned long)wc.byte_len; |
791 | iser_rcv_completion(desc, xfer_len); | 824 | iser_rcv_completion((struct iser_rx_desc *)desc, |
825 | xfer_len, ib_conn); | ||
792 | } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ | 826 | } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ |
793 | iser_snd_completion(desc); | 827 | iser_snd_completion(desc); |
794 | } else { | 828 | } else { |
795 | iser_err("comp w. error op %d status %d\n",desc->type,wc.status); | 829 | if (wc.status != IB_WC_WR_FLUSH_ERR) |
796 | iser_handle_comp_error(desc); | 830 | iser_err("id %llx status %d vend_err %x\n", |
831 | wc.wr_id, wc.status, wc.vendor_err); | ||
832 | iser_handle_comp_error(desc, ib_conn); | ||
797 | } | 833 | } |
798 | } | 834 | } |
799 | /* #warning "it is assumed here that arming CQ only once its empty" * | 835 | /* #warning "it is assumed here that arming CQ only once its empty" * |