diff options
author | Or Gerlitz <ogerlitz@voltaire.com> | 2006-05-11 03:02:46 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2006-06-22 10:51:11 -0400 |
commit | 1cfa0a75dbef1d5bf687aacafabb023288f6b36a (patch) | |
tree | 8296842d0f7afb479e1437330d54bd31a0272c95 /drivers/infiniband | |
parent | e85b24b5e7de9f507c6253183d089370f37618c5 (diff) |
IB/iser: iSER RDMA CM (CMA) and IB verbs interaction
This file contains the low level interaction with the RDMA CM
and the IB verbs, where iSER is consumer of both.
Signed-off-by: Or Gerlitz <ogerlitz@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 827 |
1 files changed, 827 insertions, 0 deletions
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c new file mode 100644 index 000000000000..ff117bbf81b4 --- /dev/null +++ b/drivers/infiniband/ulp/iser/iser_verbs.c | |||
@@ -0,0 +1,827 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. | ||
3 | * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. | ||
4 | * | ||
5 | * This software is available to you under a choice of one of two | ||
6 | * licenses. You may choose to be licensed under the terms of the GNU | ||
7 | * General Public License (GPL) Version 2, available from the file | ||
8 | * COPYING in the main directory of this source tree, or the | ||
9 | * OpenIB.org BSD license below: | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or | ||
12 | * without modification, are permitted provided that the following | ||
13 | * conditions are met: | ||
14 | * | ||
15 | * - Redistributions of source code must retain the above | ||
16 | * copyright notice, this list of conditions and the following | ||
17 | * disclaimer. | ||
18 | * | ||
19 | * - Redistributions in binary form must reproduce the above | ||
20 | * copyright notice, this list of conditions and the following | ||
21 | * disclaimer in the documentation and/or other materials | ||
22 | * provided with the distribution. | ||
23 | * | ||
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
31 | * SOFTWARE. | ||
32 | * | ||
33 | * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $ | ||
34 | */ | ||
35 | #include <asm/io.h> | ||
36 | #include <linux/kernel.h> | ||
37 | #include <linux/module.h> | ||
38 | #include <linux/smp_lock.h> | ||
39 | #include <linux/delay.h> | ||
40 | #include <linux/version.h> | ||
41 | |||
42 | #include "iscsi_iser.h" | ||
43 | |||
44 | #define ISCSI_ISER_MAX_CONN 8 | ||
45 | #define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ | ||
46 | ISER_QP_MAX_REQ_DTOS) * \ | ||
47 | ISCSI_ISER_MAX_CONN) | ||
48 | |||
49 | static void iser_cq_tasklet_fn(unsigned long data); | ||
50 | static void iser_cq_callback(struct ib_cq *cq, void *cq_context); | ||
51 | static void iser_comp_error_worker(void *data); | ||
52 | |||
53 | static void iser_cq_event_callback(struct ib_event *cause, void *context) | ||
54 | { | ||
55 | iser_err("got cq event %d \n", cause->event); | ||
56 | } | ||
57 | |||
58 | static void iser_qp_event_callback(struct ib_event *cause, void *context) | ||
59 | { | ||
60 | iser_err("got qp event %d\n",cause->event); | ||
61 | } | ||
62 | |||
63 | /** | ||
64 | * iser_create_device_ib_res - creates Protection Domain (PD), Completion | ||
65 | * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with | ||
66 | * the adapator. | ||
67 | * | ||
68 | * returns 0 on success, -1 on failure | ||
69 | */ | ||
70 | static int iser_create_device_ib_res(struct iser_device *device) | ||
71 | { | ||
72 | device->pd = ib_alloc_pd(device->ib_device); | ||
73 | if (IS_ERR(device->pd)) | ||
74 | goto pd_err; | ||
75 | |||
76 | device->cq = ib_create_cq(device->ib_device, | ||
77 | iser_cq_callback, | ||
78 | iser_cq_event_callback, | ||
79 | (void *)device, | ||
80 | ISER_MAX_CQ_LEN); | ||
81 | if (IS_ERR(device->cq)) | ||
82 | goto cq_err; | ||
83 | |||
84 | if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP)) | ||
85 | goto cq_arm_err; | ||
86 | |||
87 | tasklet_init(&device->cq_tasklet, | ||
88 | iser_cq_tasklet_fn, | ||
89 | (unsigned long)device); | ||
90 | |||
91 | device->mr = ib_get_dma_mr(device->pd, | ||
92 | IB_ACCESS_LOCAL_WRITE); | ||
93 | if (IS_ERR(device->mr)) | ||
94 | goto dma_mr_err; | ||
95 | |||
96 | return 0; | ||
97 | |||
98 | dma_mr_err: | ||
99 | tasklet_kill(&device->cq_tasklet); | ||
100 | cq_arm_err: | ||
101 | ib_destroy_cq(device->cq); | ||
102 | cq_err: | ||
103 | ib_dealloc_pd(device->pd); | ||
104 | pd_err: | ||
105 | iser_err("failed to allocate an IB resource\n"); | ||
106 | return -1; | ||
107 | } | ||
108 | |||
109 | /** | ||
110 | * iser_free_device_ib_res - destory/dealloc/dereg the DMA MR, | ||
111 | * CQ and PD created with the device associated with the adapator. | ||
112 | */ | ||
113 | static void iser_free_device_ib_res(struct iser_device *device) | ||
114 | { | ||
115 | BUG_ON(device->mr == NULL); | ||
116 | |||
117 | tasklet_kill(&device->cq_tasklet); | ||
118 | |||
119 | (void)ib_dereg_mr(device->mr); | ||
120 | (void)ib_destroy_cq(device->cq); | ||
121 | (void)ib_dealloc_pd(device->pd); | ||
122 | |||
123 | device->mr = NULL; | ||
124 | device->cq = NULL; | ||
125 | device->pd = NULL; | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP) | ||
130 | * | ||
131 | * returns 0 on success, -1 on failure | ||
132 | */ | ||
133 | static int iser_create_ib_conn_res(struct iser_conn *ib_conn) | ||
134 | { | ||
135 | struct iser_device *device; | ||
136 | struct ib_qp_init_attr init_attr; | ||
137 | int ret; | ||
138 | struct ib_fmr_pool_param params; | ||
139 | |||
140 | BUG_ON(ib_conn->device == NULL); | ||
141 | |||
142 | device = ib_conn->device; | ||
143 | |||
144 | ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + | ||
145 | (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), | ||
146 | GFP_KERNEL); | ||
147 | if (!ib_conn->page_vec) { | ||
148 | ret = -ENOMEM; | ||
149 | goto alloc_err; | ||
150 | } | ||
151 | ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); | ||
152 | |||
153 | params.page_shift = PAGE_SHIFT; | ||
154 | /* when the first/last SG element are not start/end * | ||
155 | * page aligned, the map whould be of N+1 pages */ | ||
156 | params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; | ||
157 | /* make the pool size twice the max number of SCSI commands * | ||
158 | * the ML is expected to queue, watermark for unmap at 50% */ | ||
159 | params.pool_size = ISCSI_XMIT_CMDS_MAX * 2; | ||
160 | params.dirty_watermark = ISCSI_XMIT_CMDS_MAX; | ||
161 | params.cache = 0; | ||
162 | params.flush_function = NULL; | ||
163 | params.access = (IB_ACCESS_LOCAL_WRITE | | ||
164 | IB_ACCESS_REMOTE_WRITE | | ||
165 | IB_ACCESS_REMOTE_READ); | ||
166 | |||
167 | ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); | ||
168 | if (IS_ERR(ib_conn->fmr_pool)) { | ||
169 | ret = PTR_ERR(ib_conn->fmr_pool); | ||
170 | goto fmr_pool_err; | ||
171 | } | ||
172 | |||
173 | memset(&init_attr, 0, sizeof init_attr); | ||
174 | |||
175 | init_attr.event_handler = iser_qp_event_callback; | ||
176 | init_attr.qp_context = (void *)ib_conn; | ||
177 | init_attr.send_cq = device->cq; | ||
178 | init_attr.recv_cq = device->cq; | ||
179 | init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; | ||
180 | init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; | ||
181 | init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; | ||
182 | init_attr.cap.max_recv_sge = 2; | ||
183 | init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; | ||
184 | init_attr.qp_type = IB_QPT_RC; | ||
185 | |||
186 | ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); | ||
187 | if (ret) | ||
188 | goto qp_err; | ||
189 | |||
190 | ib_conn->qp = ib_conn->cma_id->qp; | ||
191 | iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n", | ||
192 | ib_conn, ib_conn->cma_id, | ||
193 | ib_conn->fmr_pool, ib_conn->cma_id->qp); | ||
194 | return ret; | ||
195 | |||
196 | qp_err: | ||
197 | (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); | ||
198 | fmr_pool_err: | ||
199 | kfree(ib_conn->page_vec); | ||
200 | alloc_err: | ||
201 | iser_err("unable to alloc mem or create resource, err %d\n", ret); | ||
202 | return ret; | ||
203 | } | ||
204 | |||
205 | /** | ||
206 | * releases the FMR pool, QP and CMA ID objects, returns 0 on success, | ||
207 | * -1 on failure | ||
208 | */ | ||
209 | static int iser_free_ib_conn_res(struct iser_conn *ib_conn) | ||
210 | { | ||
211 | BUG_ON(ib_conn == NULL); | ||
212 | |||
213 | iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n", | ||
214 | ib_conn, ib_conn->cma_id, | ||
215 | ib_conn->fmr_pool, ib_conn->qp); | ||
216 | |||
217 | /* qp is created only once both addr & route are resolved */ | ||
218 | if (ib_conn->fmr_pool != NULL) | ||
219 | ib_destroy_fmr_pool(ib_conn->fmr_pool); | ||
220 | |||
221 | if (ib_conn->qp != NULL) | ||
222 | rdma_destroy_qp(ib_conn->cma_id); | ||
223 | |||
224 | if (ib_conn->cma_id != NULL) | ||
225 | rdma_destroy_id(ib_conn->cma_id); | ||
226 | |||
227 | ib_conn->fmr_pool = NULL; | ||
228 | ib_conn->qp = NULL; | ||
229 | ib_conn->cma_id = NULL; | ||
230 | kfree(ib_conn->page_vec); | ||
231 | |||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * based on the resolved device node GUID see if there already allocated | ||
237 | * device for this device. If there's no such, create one. | ||
238 | */ | ||
239 | static | ||
240 | struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) | ||
241 | { | ||
242 | struct list_head *p_list; | ||
243 | struct iser_device *device = NULL; | ||
244 | |||
245 | mutex_lock(&ig.device_list_mutex); | ||
246 | |||
247 | p_list = ig.device_list.next; | ||
248 | while (p_list != &ig.device_list) { | ||
249 | device = list_entry(p_list, struct iser_device, ig_list); | ||
250 | /* find if there's a match using the node GUID */ | ||
251 | if (device->ib_device->node_guid == cma_id->device->node_guid) | ||
252 | break; | ||
253 | } | ||
254 | |||
255 | if (device == NULL) { | ||
256 | device = kzalloc(sizeof *device, GFP_KERNEL); | ||
257 | if (device == NULL) | ||
258 | goto out; | ||
259 | /* assign this device to the device */ | ||
260 | device->ib_device = cma_id->device; | ||
261 | /* init the device and link it into ig device list */ | ||
262 | if (iser_create_device_ib_res(device)) { | ||
263 | kfree(device); | ||
264 | device = NULL; | ||
265 | goto out; | ||
266 | } | ||
267 | list_add(&device->ig_list, &ig.device_list); | ||
268 | } | ||
269 | out: | ||
270 | BUG_ON(device == NULL); | ||
271 | device->refcount++; | ||
272 | mutex_unlock(&ig.device_list_mutex); | ||
273 | return device; | ||
274 | } | ||
275 | |||
276 | /* if there's no demand for this device, release it */ | ||
277 | static void iser_device_try_release(struct iser_device *device) | ||
278 | { | ||
279 | mutex_lock(&ig.device_list_mutex); | ||
280 | device->refcount--; | ||
281 | iser_err("device %p refcount %d\n",device,device->refcount); | ||
282 | if (!device->refcount) { | ||
283 | iser_free_device_ib_res(device); | ||
284 | list_del(&device->ig_list); | ||
285 | kfree(device); | ||
286 | } | ||
287 | mutex_unlock(&ig.device_list_mutex); | ||
288 | } | ||
289 | |||
290 | int iser_conn_state_comp(struct iser_conn *ib_conn, | ||
291 | enum iser_ib_conn_state comp) | ||
292 | { | ||
293 | int ret; | ||
294 | |||
295 | spin_lock_bh(&ib_conn->lock); | ||
296 | ret = (ib_conn->state == comp); | ||
297 | spin_unlock_bh(&ib_conn->lock); | ||
298 | return ret; | ||
299 | } | ||
300 | |||
301 | static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, | ||
302 | enum iser_ib_conn_state comp, | ||
303 | enum iser_ib_conn_state exch) | ||
304 | { | ||
305 | int ret; | ||
306 | |||
307 | spin_lock_bh(&ib_conn->lock); | ||
308 | if ((ret = (ib_conn->state == comp))) | ||
309 | ib_conn->state = exch; | ||
310 | spin_unlock_bh(&ib_conn->lock); | ||
311 | return ret; | ||
312 | } | ||
313 | |||
314 | /** | ||
315 | * triggers start of the disconnect procedures and wait for them to be done | ||
316 | */ | ||
317 | void iser_conn_terminate(struct iser_conn *ib_conn) | ||
318 | { | ||
319 | int err = 0; | ||
320 | |||
321 | /* change the ib conn state only if the conn is UP, however always call | ||
322 | * rdma_disconnect since this is the only way to cause the CMA to change | ||
323 | * the QP state to ERROR | ||
324 | */ | ||
325 | |||
326 | iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); | ||
327 | err = rdma_disconnect(ib_conn->cma_id); | ||
328 | if (err) | ||
329 | iser_err("Failed to disconnect, conn: 0x%p err %d\n", | ||
330 | ib_conn,err); | ||
331 | |||
332 | wait_event_interruptible(ib_conn->wait, | ||
333 | ib_conn->state == ISER_CONN_DOWN); | ||
334 | |||
335 | iser_conn_release(ib_conn); | ||
336 | } | ||
337 | |||
338 | static void iser_connect_error(struct rdma_cm_id *cma_id) | ||
339 | { | ||
340 | struct iser_conn *ib_conn; | ||
341 | ib_conn = (struct iser_conn *)cma_id->context; | ||
342 | |||
343 | ib_conn->state = ISER_CONN_DOWN; | ||
344 | wake_up_interruptible(&ib_conn->wait); | ||
345 | } | ||
346 | |||
347 | static void iser_addr_handler(struct rdma_cm_id *cma_id) | ||
348 | { | ||
349 | struct iser_device *device; | ||
350 | struct iser_conn *ib_conn; | ||
351 | int ret; | ||
352 | |||
353 | device = iser_device_find_by_ib_device(cma_id); | ||
354 | ib_conn = (struct iser_conn *)cma_id->context; | ||
355 | ib_conn->device = device; | ||
356 | |||
357 | ret = rdma_resolve_route(cma_id, 1000); | ||
358 | if (ret) { | ||
359 | iser_err("resolve route failed: %d\n", ret); | ||
360 | iser_connect_error(cma_id); | ||
361 | } | ||
362 | return; | ||
363 | } | ||
364 | |||
365 | static void iser_route_handler(struct rdma_cm_id *cma_id) | ||
366 | { | ||
367 | struct rdma_conn_param conn_param; | ||
368 | int ret; | ||
369 | |||
370 | ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); | ||
371 | if (ret) | ||
372 | goto failure; | ||
373 | |||
374 | iser_dbg("path.mtu is %d setting it to %d\n", | ||
375 | cma_id->route.path_rec->mtu, IB_MTU_1024); | ||
376 | |||
377 | /* we must set the MTU to 1024 as this is what the target is assuming */ | ||
378 | if (cma_id->route.path_rec->mtu > IB_MTU_1024) | ||
379 | cma_id->route.path_rec->mtu = IB_MTU_1024; | ||
380 | |||
381 | memset(&conn_param, 0, sizeof conn_param); | ||
382 | conn_param.responder_resources = 4; | ||
383 | conn_param.initiator_depth = 1; | ||
384 | conn_param.retry_count = 7; | ||
385 | conn_param.rnr_retry_count = 6; | ||
386 | |||
387 | ret = rdma_connect(cma_id, &conn_param); | ||
388 | if (ret) { | ||
389 | iser_err("failure connecting: %d\n", ret); | ||
390 | goto failure; | ||
391 | } | ||
392 | |||
393 | return; | ||
394 | failure: | ||
395 | iser_connect_error(cma_id); | ||
396 | } | ||
397 | |||
398 | static void iser_connected_handler(struct rdma_cm_id *cma_id) | ||
399 | { | ||
400 | struct iser_conn *ib_conn; | ||
401 | |||
402 | ib_conn = (struct iser_conn *)cma_id->context; | ||
403 | ib_conn->state = ISER_CONN_UP; | ||
404 | wake_up_interruptible(&ib_conn->wait); | ||
405 | } | ||
406 | |||
407 | static void iser_disconnected_handler(struct rdma_cm_id *cma_id) | ||
408 | { | ||
409 | struct iser_conn *ib_conn; | ||
410 | |||
411 | ib_conn = (struct iser_conn *)cma_id->context; | ||
412 | ib_conn->disc_evt_flag = 1; | ||
413 | |||
414 | /* getting here when the state is UP means that the conn is being * | ||
415 | * terminated asynchronously from the iSCSI layer's perspective. */ | ||
416 | if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, | ||
417 | ISER_CONN_TERMINATING)) | ||
418 | iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, | ||
419 | ISCSI_ERR_CONN_FAILED); | ||
420 | |||
421 | /* Complete the termination process if no posts are pending */ | ||
422 | if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) && | ||
423 | (atomic_read(&ib_conn->post_send_buf_count) == 0)) { | ||
424 | ib_conn->state = ISER_CONN_DOWN; | ||
425 | wake_up_interruptible(&ib_conn->wait); | ||
426 | } | ||
427 | } | ||
428 | |||
429 | static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) | ||
430 | { | ||
431 | int ret = 0; | ||
432 | |||
433 | iser_err("event %d conn %p id %p\n",event->event,cma_id->context,cma_id); | ||
434 | |||
435 | switch (event->event) { | ||
436 | case RDMA_CM_EVENT_ADDR_RESOLVED: | ||
437 | iser_addr_handler(cma_id); | ||
438 | break; | ||
439 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | ||
440 | iser_route_handler(cma_id); | ||
441 | break; | ||
442 | case RDMA_CM_EVENT_ESTABLISHED: | ||
443 | iser_connected_handler(cma_id); | ||
444 | break; | ||
445 | case RDMA_CM_EVENT_ADDR_ERROR: | ||
446 | case RDMA_CM_EVENT_ROUTE_ERROR: | ||
447 | case RDMA_CM_EVENT_CONNECT_ERROR: | ||
448 | case RDMA_CM_EVENT_UNREACHABLE: | ||
449 | case RDMA_CM_EVENT_REJECTED: | ||
450 | iser_err("event: %d, error: %d\n", event->event, event->status); | ||
451 | iser_connect_error(cma_id); | ||
452 | break; | ||
453 | case RDMA_CM_EVENT_DISCONNECTED: | ||
454 | iser_disconnected_handler(cma_id); | ||
455 | break; | ||
456 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | ||
457 | BUG(); | ||
458 | break; | ||
459 | case RDMA_CM_EVENT_CONNECT_RESPONSE: | ||
460 | BUG(); | ||
461 | break; | ||
462 | case RDMA_CM_EVENT_CONNECT_REQUEST: | ||
463 | default: | ||
464 | break; | ||
465 | } | ||
466 | return ret; | ||
467 | } | ||
468 | |||
469 | int iser_conn_init(struct iser_conn **ibconn) | ||
470 | { | ||
471 | struct iser_conn *ib_conn; | ||
472 | |||
473 | ib_conn = kzalloc(sizeof *ib_conn, GFP_KERNEL); | ||
474 | if (!ib_conn) { | ||
475 | iser_err("can't alloc memory for struct iser_conn\n"); | ||
476 | return -ENOMEM; | ||
477 | } | ||
478 | ib_conn->state = ISER_CONN_INIT; | ||
479 | init_waitqueue_head(&ib_conn->wait); | ||
480 | atomic_set(&ib_conn->post_recv_buf_count, 0); | ||
481 | atomic_set(&ib_conn->post_send_buf_count, 0); | ||
482 | INIT_WORK(&ib_conn->comperror_work, iser_comp_error_worker, | ||
483 | ib_conn); | ||
484 | INIT_LIST_HEAD(&ib_conn->conn_list); | ||
485 | spin_lock_init(&ib_conn->lock); | ||
486 | |||
487 | *ibconn = ib_conn; | ||
488 | return 0; | ||
489 | } | ||
490 | |||
491 | /** | ||
492 | * starts the process of connecting to the target | ||
493 | * sleeps untill the connection is established or rejected | ||
494 | */ | ||
495 | int iser_connect(struct iser_conn *ib_conn, | ||
496 | struct sockaddr_in *src_addr, | ||
497 | struct sockaddr_in *dst_addr, | ||
498 | int non_blocking) | ||
499 | { | ||
500 | struct sockaddr *src, *dst; | ||
501 | int err = 0; | ||
502 | |||
503 | sprintf(ib_conn->name,"%d.%d.%d.%d:%d", | ||
504 | NIPQUAD(dst_addr->sin_addr.s_addr), dst_addr->sin_port); | ||
505 | |||
506 | /* the device is known only --after-- address resolution */ | ||
507 | ib_conn->device = NULL; | ||
508 | |||
509 | iser_err("connecting to: %d.%d.%d.%d, port 0x%x\n", | ||
510 | NIPQUAD(dst_addr->sin_addr), dst_addr->sin_port); | ||
511 | |||
512 | ib_conn->state = ISER_CONN_PENDING; | ||
513 | |||
514 | ib_conn->cma_id = rdma_create_id(iser_cma_handler, | ||
515 | (void *)ib_conn, | ||
516 | RDMA_PS_TCP); | ||
517 | if (IS_ERR(ib_conn->cma_id)) { | ||
518 | err = PTR_ERR(ib_conn->cma_id); | ||
519 | iser_err("rdma_create_id failed: %d\n", err); | ||
520 | goto id_failure; | ||
521 | } | ||
522 | |||
523 | src = (struct sockaddr *)src_addr; | ||
524 | dst = (struct sockaddr *)dst_addr; | ||
525 | err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); | ||
526 | if (err) { | ||
527 | iser_err("rdma_resolve_addr failed: %d\n", err); | ||
528 | goto addr_failure; | ||
529 | } | ||
530 | |||
531 | if (!non_blocking) { | ||
532 | wait_event_interruptible(ib_conn->wait, | ||
533 | (ib_conn->state != ISER_CONN_PENDING)); | ||
534 | |||
535 | if (ib_conn->state != ISER_CONN_UP) { | ||
536 | err = -EIO; | ||
537 | goto connect_failure; | ||
538 | } | ||
539 | } | ||
540 | |||
541 | mutex_lock(&ig.connlist_mutex); | ||
542 | list_add(&ib_conn->conn_list, &ig.connlist); | ||
543 | mutex_unlock(&ig.connlist_mutex); | ||
544 | return 0; | ||
545 | |||
546 | id_failure: | ||
547 | ib_conn->cma_id = NULL; | ||
548 | addr_failure: | ||
549 | ib_conn->state = ISER_CONN_DOWN; | ||
550 | connect_failure: | ||
551 | iser_conn_release(ib_conn); | ||
552 | return err; | ||
553 | } | ||
554 | |||
555 | /** | ||
556 | * Frees all conn objects and deallocs conn descriptor | ||
557 | */ | ||
558 | void iser_conn_release(struct iser_conn *ib_conn) | ||
559 | { | ||
560 | struct iser_device *device = ib_conn->device; | ||
561 | |||
562 | BUG_ON(ib_conn->state != ISER_CONN_DOWN); | ||
563 | |||
564 | mutex_lock(&ig.connlist_mutex); | ||
565 | list_del(&ib_conn->conn_list); | ||
566 | mutex_unlock(&ig.connlist_mutex); | ||
567 | |||
568 | iser_free_ib_conn_res(ib_conn); | ||
569 | ib_conn->device = NULL; | ||
570 | /* on EVENT_ADDR_ERROR there's no device yet for this conn */ | ||
571 | if (device != NULL) | ||
572 | iser_device_try_release(device); | ||
573 | kfree(ib_conn); | ||
574 | } | ||
575 | |||
576 | |||
577 | /** | ||
578 | * iser_reg_page_vec - Register physical memory | ||
579 | * | ||
580 | * returns: 0 on success, errno code on failure | ||
581 | */ | ||
582 | int iser_reg_page_vec(struct iser_conn *ib_conn, | ||
583 | struct iser_page_vec *page_vec, | ||
584 | struct iser_mem_reg *mem_reg) | ||
585 | { | ||
586 | struct ib_pool_fmr *mem; | ||
587 | u64 io_addr; | ||
588 | u64 *page_list; | ||
589 | int status; | ||
590 | |||
591 | page_list = page_vec->pages; | ||
592 | io_addr = page_list[0]; | ||
593 | |||
594 | mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool, | ||
595 | page_list, | ||
596 | page_vec->length, | ||
597 | &io_addr); | ||
598 | |||
599 | if (IS_ERR(mem)) { | ||
600 | status = (int)PTR_ERR(mem); | ||
601 | iser_err("ib_fmr_pool_map_phys failed: %d\n", status); | ||
602 | return status; | ||
603 | } | ||
604 | |||
605 | mem_reg->lkey = mem->fmr->lkey; | ||
606 | mem_reg->rkey = mem->fmr->rkey; | ||
607 | mem_reg->len = page_vec->length * PAGE_SIZE; | ||
608 | mem_reg->va = io_addr; | ||
609 | mem_reg->mem_h = (void *)mem; | ||
610 | |||
611 | mem_reg->va += page_vec->offset; | ||
612 | mem_reg->len = page_vec->data_size; | ||
613 | |||
614 | iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, " | ||
615 | "entry[0]: (0x%08lx,%ld)] -> " | ||
616 | "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n", | ||
617 | page_vec, page_vec->length, | ||
618 | (unsigned long)page_vec->pages[0], | ||
619 | (unsigned long)page_vec->data_size, | ||
620 | (unsigned int)mem_reg->lkey, mem_reg->mem_h, | ||
621 | (unsigned long)mem_reg->va, (unsigned long)mem_reg->len); | ||
622 | return 0; | ||
623 | } | ||
624 | |||
625 | /** | ||
626 | * Unregister (previosuly registered) memory. | ||
627 | */ | ||
628 | void iser_unreg_mem(struct iser_mem_reg *reg) | ||
629 | { | ||
630 | int ret; | ||
631 | |||
632 | iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); | ||
633 | |||
634 | ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); | ||
635 | if (ret) | ||
636 | iser_err("ib_fmr_pool_unmap failed %d\n", ret); | ||
637 | |||
638 | reg->mem_h = NULL; | ||
639 | } | ||
640 | |||
641 | /** | ||
642 | * iser_dto_to_iov - builds IOV from a dto descriptor | ||
643 | */ | ||
644 | static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len) | ||
645 | { | ||
646 | int i; | ||
647 | struct ib_sge *sge; | ||
648 | struct iser_regd_buf *regd_buf; | ||
649 | |||
650 | if (dto->regd_vector_len > iov_len) { | ||
651 | iser_err("iov size %d too small for posting dto of len %d\n", | ||
652 | iov_len, dto->regd_vector_len); | ||
653 | BUG(); | ||
654 | } | ||
655 | |||
656 | for (i = 0; i < dto->regd_vector_len; i++) { | ||
657 | sge = &iov[i]; | ||
658 | regd_buf = dto->regd[i]; | ||
659 | |||
660 | sge->addr = regd_buf->reg.va; | ||
661 | sge->length = regd_buf->reg.len; | ||
662 | sge->lkey = regd_buf->reg.lkey; | ||
663 | |||
664 | if (dto->used_sz[i] > 0) /* Adjust size */ | ||
665 | sge->length = dto->used_sz[i]; | ||
666 | |||
667 | /* offset and length should not exceed the regd buf length */ | ||
668 | if (sge->length + dto->offset[i] > regd_buf->reg.len) { | ||
669 | iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:" | ||
670 | "%ld in dto:0x%p [%d], va:0x%08lX\n", | ||
671 | (unsigned long)sge->length, dto->offset[i], | ||
672 | (unsigned long)regd_buf->reg.len, dto, i, | ||
673 | (unsigned long)sge->addr); | ||
674 | BUG(); | ||
675 | } | ||
676 | |||
677 | sge->addr += dto->offset[i]; /* Adjust offset */ | ||
678 | } | ||
679 | } | ||
680 | |||
681 | /** | ||
682 | * iser_post_recv - Posts a receive buffer. | ||
683 | * | ||
684 | * returns 0 on success, -1 on failure | ||
685 | */ | ||
686 | int iser_post_recv(struct iser_desc *rx_desc) | ||
687 | { | ||
688 | int ib_ret, ret_val = 0; | ||
689 | struct ib_recv_wr recv_wr, *recv_wr_failed; | ||
690 | struct ib_sge iov[2]; | ||
691 | struct iser_conn *ib_conn; | ||
692 | struct iser_dto *recv_dto = &rx_desc->dto; | ||
693 | |||
694 | /* Retrieve conn */ | ||
695 | ib_conn = recv_dto->conn->ib_conn; | ||
696 | |||
697 | iser_dto_to_iov(recv_dto, iov, 2); | ||
698 | |||
699 | recv_wr.next = NULL; | ||
700 | recv_wr.sg_list = iov; | ||
701 | recv_wr.num_sge = recv_dto->regd_vector_len; | ||
702 | recv_wr.wr_id = (unsigned long)rx_desc; | ||
703 | |||
704 | atomic_inc(&ib_conn->post_recv_buf_count); | ||
705 | ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed); | ||
706 | if (ib_ret) { | ||
707 | iser_err("ib_post_recv failed ret=%d\n", ib_ret); | ||
708 | atomic_dec(&ib_conn->post_recv_buf_count); | ||
709 | ret_val = -1; | ||
710 | } | ||
711 | |||
712 | return ret_val; | ||
713 | } | ||
714 | |||
715 | /** | ||
716 | * iser_start_send - Initiate a Send DTO operation | ||
717 | * | ||
718 | * returns 0 on success, -1 on failure | ||
719 | */ | ||
720 | int iser_post_send(struct iser_desc *tx_desc) | ||
721 | { | ||
722 | int ib_ret, ret_val = 0; | ||
723 | struct ib_send_wr send_wr, *send_wr_failed; | ||
724 | struct ib_sge iov[MAX_REGD_BUF_VECTOR_LEN]; | ||
725 | struct iser_conn *ib_conn; | ||
726 | struct iser_dto *dto = &tx_desc->dto; | ||
727 | |||
728 | ib_conn = dto->conn->ib_conn; | ||
729 | |||
730 | iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN); | ||
731 | |||
732 | send_wr.next = NULL; | ||
733 | send_wr.wr_id = (unsigned long)tx_desc; | ||
734 | send_wr.sg_list = iov; | ||
735 | send_wr.num_sge = dto->regd_vector_len; | ||
736 | send_wr.opcode = IB_WR_SEND; | ||
737 | send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0; | ||
738 | |||
739 | atomic_inc(&ib_conn->post_send_buf_count); | ||
740 | |||
741 | ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); | ||
742 | if (ib_ret) { | ||
743 | iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n", | ||
744 | dto, dto->regd_vector_len); | ||
745 | iser_err("ib_post_send failed, ret:%d\n", ib_ret); | ||
746 | atomic_dec(&ib_conn->post_send_buf_count); | ||
747 | ret_val = -1; | ||
748 | } | ||
749 | |||
750 | return ret_val; | ||
751 | } | ||
752 | |||
753 | static void iser_comp_error_worker(void *data) | ||
754 | { | ||
755 | struct iser_conn *ib_conn = data; | ||
756 | |||
757 | /* getting here when the state is UP means that the conn is being * | ||
758 | * terminated asynchronously from the iSCSI layer's perspective. */ | ||
759 | if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, | ||
760 | ISER_CONN_TERMINATING)) | ||
761 | iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, | ||
762 | ISCSI_ERR_CONN_FAILED); | ||
763 | |||
764 | /* complete the termination process if disconnect event was delivered * | ||
765 | * note there are no more non completed posts to the QP */ | ||
766 | if (ib_conn->disc_evt_flag) { | ||
767 | ib_conn->state = ISER_CONN_DOWN; | ||
768 | wake_up_interruptible(&ib_conn->wait); | ||
769 | } | ||
770 | } | ||
771 | |||
772 | static void iser_handle_comp_error(struct iser_desc *desc) | ||
773 | { | ||
774 | struct iser_dto *dto = &desc->dto; | ||
775 | struct iser_conn *ib_conn = dto->conn->ib_conn; | ||
776 | |||
777 | iser_dto_buffs_release(dto); | ||
778 | |||
779 | if (desc->type == ISCSI_RX) { | ||
780 | kfree(desc->data); | ||
781 | kmem_cache_free(ig.desc_cache, desc); | ||
782 | atomic_dec(&ib_conn->post_recv_buf_count); | ||
783 | } else { /* type is TX control/command/dataout */ | ||
784 | if (desc->type == ISCSI_TX_DATAOUT) | ||
785 | kmem_cache_free(ig.desc_cache, desc); | ||
786 | atomic_dec(&ib_conn->post_send_buf_count); | ||
787 | } | ||
788 | |||
789 | if (atomic_read(&ib_conn->post_recv_buf_count) == 0 && | ||
790 | atomic_read(&ib_conn->post_send_buf_count) == 0) | ||
791 | schedule_work(&ib_conn->comperror_work); | ||
792 | } | ||
793 | |||
794 | static void iser_cq_tasklet_fn(unsigned long data) | ||
795 | { | ||
796 | struct iser_device *device = (struct iser_device *)data; | ||
797 | struct ib_cq *cq = device->cq; | ||
798 | struct ib_wc wc; | ||
799 | struct iser_desc *desc; | ||
800 | unsigned long xfer_len; | ||
801 | |||
802 | while (ib_poll_cq(cq, 1, &wc) == 1) { | ||
803 | desc = (struct iser_desc *) (unsigned long) wc.wr_id; | ||
804 | BUG_ON(desc == NULL); | ||
805 | |||
806 | if (wc.status == IB_WC_SUCCESS) { | ||
807 | if (desc->type == ISCSI_RX) { | ||
808 | xfer_len = (unsigned long)wc.byte_len; | ||
809 | iser_rcv_completion(desc, xfer_len); | ||
810 | } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ | ||
811 | iser_snd_completion(desc); | ||
812 | } else { | ||
813 | iser_err("comp w. error op %d status %d\n",desc->type,wc.status); | ||
814 | iser_handle_comp_error(desc); | ||
815 | } | ||
816 | } | ||
817 | /* #warning "it is assumed here that arming CQ only once its empty" * | ||
818 | * " would not cause interrupts to be missed" */ | ||
819 | ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); | ||
820 | } | ||
821 | |||
822 | static void iser_cq_callback(struct ib_cq *cq, void *cq_context) | ||
823 | { | ||
824 | struct iser_device *device = (struct iser_device *)cq_context; | ||
825 | |||
826 | tasklet_schedule(&device->cq_tasklet); | ||
827 | } | ||