aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorDavid Disseldorp <ddiss@sgi.com>2008-12-21 16:56:50 -0500
committerRoland Dreier <rolandd@cisco.com>2008-12-21 16:56:50 -0500
commitbba7ebba3b17f4fe8c5907a32e16d9bd3fcf5192 (patch)
tree8f4773e3a504bf17a8e0dfc0eaa3f3efa55941c0 /drivers/infiniband
parent061e41fdb5047b1fb161e89664057835935ca1d2 (diff)
IB/iser: Avoid recv buffer exhaustion caused by unexpected PDUs
iSCSI/iSER targets may send PDUs without a prior request from the initiator. RFC 5046 refers to these PDUs as "unexpected". NOP-In PDUs with itt=RESERVED and Asynchronous Message PDUs occupy this category. The amount of active "unexpected" PDU's an iSER target may have at any time is governed by the MaxOutstandingUnexpectedPDUs key, which is not yet supported. Currently when an iSER target sends an "unexpected" PDU, the initiators recv buffer consumed by the PDU is not replaced. If over initial_post_recv_bufs_num "unexpected" PDUs are received then the receive queue will run out of receive work requests entirely. This patch ensures recv buffers consumed by "unexpected" PDUs are replaced in the next iser_post_receive_control() call. Signed-off-by: David Disseldorp <ddiss@sgi.com> Signed-off-by: Ken Sandars <ksandars@sgi.com> Acked-by: Or Gerlitz <ogerlitz@voltaire.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h3
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c132
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c1
3 files changed, 95 insertions, 41 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 81a82628a5f1..861119593f2b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -252,6 +252,9 @@ struct iser_conn {
252 wait_queue_head_t wait; /* waitq for conn/disconn */ 252 wait_queue_head_t wait; /* waitq for conn/disconn */
253 atomic_t post_recv_buf_count; /* posted rx count */ 253 atomic_t post_recv_buf_count; /* posted rx count */
254 atomic_t post_send_buf_count; /* posted tx count */ 254 atomic_t post_send_buf_count; /* posted tx count */
255 atomic_t unexpected_pdu_count;/* count of received *
256 * unexpected pdus *
257 * not yet retired */
255 char name[ISER_OBJECT_NAME_SIZE]; 258 char name[ISER_OBJECT_NAME_SIZE];
256 struct iser_page_vec *page_vec; /* represents SG to fmr maps* 259 struct iser_page_vec *page_vec; /* represents SG to fmr maps*
257 * maps serialized as tx is*/ 260 * maps serialized as tx is*/
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index cdd283189047..ed1aff21b7ea 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -183,14 +183,8 @@ static int iser_post_receive_control(struct iscsi_conn *conn)
183 struct iser_regd_buf *regd_data; 183 struct iser_regd_buf *regd_data;
184 struct iser_dto *recv_dto = NULL; 184 struct iser_dto *recv_dto = NULL;
185 struct iser_device *device = iser_conn->ib_conn->device; 185 struct iser_device *device = iser_conn->ib_conn->device;
186 int rx_data_size, err = 0; 186 int rx_data_size, err;
187 187 int posts, outstanding_unexp_pdus;
188 rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
189 if (rx_desc == NULL) {
190 iser_err("Failed to alloc desc for post recv\n");
191 return -ENOMEM;
192 }
193 rx_desc->type = ISCSI_RX;
194 188
195 /* for the login sequence we must support rx of upto 8K; login is done 189 /* for the login sequence we must support rx of upto 8K; login is done
196 * after conn create/bind (connect) and conn stop/bind (reconnect), 190 * after conn create/bind (connect) and conn stop/bind (reconnect),
@@ -201,46 +195,80 @@ static int iser_post_receive_control(struct iscsi_conn *conn)
201 else /* FIXME till user space sets conn->max_recv_dlength correctly */ 195 else /* FIXME till user space sets conn->max_recv_dlength correctly */
202 rx_data_size = 128; 196 rx_data_size = 128;
203 197
204 rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); 198 outstanding_unexp_pdus =
205 if (rx_desc->data == NULL) { 199 atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0);
206 iser_err("Failed to alloc data buf for post recv\n");
207 err = -ENOMEM;
208 goto post_rx_kmalloc_failure;
209 }
210 200
211 recv_dto = &rx_desc->dto; 201 /*
212 recv_dto->ib_conn = iser_conn->ib_conn; 202 * in addition to the response buffer, replace those consumed by
213 recv_dto->regd_vector_len = 0; 203 * unexpected pdus.
204 */
205 for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) {
206 rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
207 if (rx_desc == NULL) {
208 iser_err("Failed to alloc desc for post recv %d\n",
209 posts);
210 err = -ENOMEM;
211 goto post_rx_cache_alloc_failure;
212 }
213 rx_desc->type = ISCSI_RX;
214 rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
215 if (rx_desc->data == NULL) {
216 iser_err("Failed to alloc data buf for post recv %d\n",
217 posts);
218 err = -ENOMEM;
219 goto post_rx_kmalloc_failure;
220 }
214 221
215 regd_hdr = &rx_desc->hdr_regd_buf; 222 recv_dto = &rx_desc->dto;
216 memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 223 recv_dto->ib_conn = iser_conn->ib_conn;
217 regd_hdr->device = device; 224 recv_dto->regd_vector_len = 0;
218 regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
219 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
220 225
221 iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); 226 regd_hdr = &rx_desc->hdr_regd_buf;
227 memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
228 regd_hdr->device = device;
229 regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
230 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
222 231
223 iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); 232 iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE);
224 233
225 regd_data = &rx_desc->data_regd_buf; 234 iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
226 memset(regd_data, 0, sizeof(struct iser_regd_buf));
227 regd_data->device = device;
228 regd_data->virt_addr = rx_desc->data;
229 regd_data->data_size = rx_data_size;
230 235
231 iser_reg_single(device, regd_data, DMA_FROM_DEVICE); 236 regd_data = &rx_desc->data_regd_buf;
237 memset(regd_data, 0, sizeof(struct iser_regd_buf));
238 regd_data->device = device;
239 regd_data->virt_addr = rx_desc->data;
240 regd_data->data_size = rx_data_size;
232 241
233 iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); 242 iser_reg_single(device, regd_data, DMA_FROM_DEVICE);
234 243
235 err = iser_post_recv(rx_desc); 244 iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
236 if (!err)
237 return 0;
238 245
239 /* iser_post_recv failed */ 246 err = iser_post_recv(rx_desc);
247 if (err) {
248 iser_err("Failed iser_post_recv for post %d\n", posts);
249 goto post_rx_post_recv_failure;
250 }
251 }
252 /* all posts successful */
253 return 0;
254
255post_rx_post_recv_failure:
240 iser_dto_buffs_release(recv_dto); 256 iser_dto_buffs_release(recv_dto);
241 kfree(rx_desc->data); 257 kfree(rx_desc->data);
242post_rx_kmalloc_failure: 258post_rx_kmalloc_failure:
243 kmem_cache_free(ig.desc_cache, rx_desc); 259 kmem_cache_free(ig.desc_cache, rx_desc);
260post_rx_cache_alloc_failure:
261 if (posts > 0) {
262 /*
263 * response buffer posted, but did not replace all unexpected
264 * pdu recv bufs. Ignore error, retry occurs next send
265 */
266 outstanding_unexp_pdus -= (posts - 1);
267 err = 0;
268 }
269 atomic_add(outstanding_unexp_pdus,
270 &iser_conn->ib_conn->unexpected_pdu_count);
271
244 return err; 272 return err;
245} 273}
246 274
@@ -274,8 +302,10 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
274 struct iscsi_iser_conn *iser_conn = conn->dd_data; 302 struct iscsi_iser_conn *iser_conn = conn->dd_data;
275 303
276 int i; 304 int i;
277 /* no need to keep it in a var, we are after login so if this should 305 /*
278 * be negotiated, by now the result should be available here */ 306 * FIXME this value should be declared to the target during login with
307 * the MaxOutstandingUnexpectedPDUs key when supported
308 */
279 int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS; 309 int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
280 310
281 iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num); 311 iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
@@ -478,6 +508,7 @@ int iser_send_control(struct iscsi_conn *conn,
478 int err = 0; 508 int err = 0;
479 struct iser_regd_buf *regd_buf; 509 struct iser_regd_buf *regd_buf;
480 struct iser_device *device; 510 struct iser_device *device;
511 unsigned char opcode;
481 512
482 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { 513 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
483 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); 514 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
@@ -512,10 +543,15 @@ int iser_send_control(struct iscsi_conn *conn,
512 data_seg_len); 543 data_seg_len);
513 } 544 }
514 545
515 if (iser_post_receive_control(conn) != 0) { 546 opcode = task->hdr->opcode & ISCSI_OPCODE_MASK;
516 iser_err("post_rcv_buff failed!\n"); 547
517 err = -ENOMEM; 548 /* post recv buffer for response if one is expected */
518 goto send_control_error; 549 if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) {
550 if (iser_post_receive_control(conn) != 0) {
551 iser_err("post_rcv_buff failed!\n");
552 err = -ENOMEM;
553 goto send_control_error;
554 }
519 } 555 }
520 556
521 err = iser_post_send(mdesc); 557 err = iser_post_send(mdesc);
@@ -586,6 +622,20 @@ void iser_rcv_completion(struct iser_desc *rx_desc,
586 * parallel to the execution of iser_conn_term. So the code that waits * 622 * parallel to the execution of iser_conn_term. So the code that waits *
587 * for the posted rx bufs refcount to become zero handles everything */ 623 * for the posted rx bufs refcount to become zero handles everything */
588 atomic_dec(&conn->ib_conn->post_recv_buf_count); 624 atomic_dec(&conn->ib_conn->post_recv_buf_count);
625
626 /*
627 * if an unexpected PDU was received then the recv wr consumed must
628 * be replaced, this is done in the next send of a control-type PDU
629 */
630 if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) {
631 /* nop-in with itt = 0xffffffff */
632 atomic_inc(&conn->ib_conn->unexpected_pdu_count);
633 }
634 else if (opcode == ISCSI_OP_ASYNC_EVENT) {
635 /* asyncronous message */
636 atomic_inc(&conn->ib_conn->unexpected_pdu_count);
637 }
638 /* a reject PDU consumes the recv buf posted for the response */
589} 639}
590 640
591void iser_snd_completion(struct iser_desc *tx_desc) 641void iser_snd_completion(struct iser_desc *tx_desc)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 26ff6214a81f..6dc6b174cdd4 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -498,6 +498,7 @@ void iser_conn_init(struct iser_conn *ib_conn)
498 init_waitqueue_head(&ib_conn->wait); 498 init_waitqueue_head(&ib_conn->wait);
499 atomic_set(&ib_conn->post_recv_buf_count, 0); 499 atomic_set(&ib_conn->post_recv_buf_count, 0);
500 atomic_set(&ib_conn->post_send_buf_count, 0); 500 atomic_set(&ib_conn->post_send_buf_count, 0);
501 atomic_set(&ib_conn->unexpected_pdu_count, 0);
501 atomic_set(&ib_conn->refcount, 1); 502 atomic_set(&ib_conn->refcount, 1);
502 INIT_LIST_HEAD(&ib_conn->conn_list); 503 INIT_LIST_HEAD(&ib_conn->conn_list);
503 spin_lock_init(&ib_conn->lock); 504 spin_lock_init(&ib_conn->lock);