aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-06-25 19:07:58 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-25 19:07:58 -0400
commit61b9175808670d9abf52156803ae0ed1e3706ac4 (patch)
treefeeaf7190aff1daa6ae365fd7c1107ea2d60fb9f /drivers/infiniband
parentf36f44de721db44b4c2944133c3c5c2e06f633f0 (diff)
parent3f1244a2f8d3892f991b662cea49b2a0b4e0c115 (diff)
Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband: IB/iser: iSER Kconfig and Makefile IB/iser: iSER handling of memory for RDMA IB/iser: iSER RDMA CM (CMA) and IB verbs interaction IB/iser: iSER initiator iSCSI PDU and TX/RX IB/iser: iSCSI iSER transport provider high level code IB/iser: iSCSI iSER transport provider header file IB/uverbs: Remove unnecessary list_del()s IB/uverbs: Don't free wr list when it's known to be empty
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig2
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2
-rw-r--r--drivers/infiniband/core/uverbs_main.c6
-rw-r--r--drivers/infiniband/ulp/iser/Kconfig11
-rw-r--r--drivers/infiniband/ulp/iser/Makefile4
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c790
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h354
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c738
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c401
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c827
11 files changed, 3129 insertions, 7 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index ba2d6505e9a4..69a53d476b5b 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -41,4 +41,6 @@ source "drivers/infiniband/ulp/ipoib/Kconfig"
41 41
42source "drivers/infiniband/ulp/srp/Kconfig" 42source "drivers/infiniband/ulp/srp/Kconfig"
43 43
44source "drivers/infiniband/ulp/iser/Kconfig"
45
44endmenu 46endmenu
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index eea27322a22d..c7ff58c1d0e5 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
3obj-$(CONFIG_IPATH_CORE) += hw/ipath/ 3obj-$(CONFIG_IPATH_CORE) += hw/ipath/
4obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 4obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
5obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ 5obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
6obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 76bf61e9b552..a908a7bdcd7f 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1530,7 +1530,6 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
1530out_put: 1530out_put:
1531 put_qp_read(qp); 1531 put_qp_read(qp);
1532 1532
1533out:
1534 while (wr) { 1533 while (wr) {
1535 if (is_ud && wr->wr.ud.ah) 1534 if (is_ud && wr->wr.ud.ah)
1536 put_ah_read(wr->wr.ud.ah); 1535 put_ah_read(wr->wr.ud.ah);
@@ -1539,6 +1538,7 @@ out:
1539 wr = next; 1538 wr = next;
1540 } 1539 }
1541 1540
1541out:
1542 kfree(user_wr); 1542 kfree(user_wr);
1543 1543
1544 return ret ? ret : in_len; 1544 return ret ? ret : in_len;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index e57d3c50f75f..e725cccc7cde 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -188,7 +188,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
188 188
189 idr_remove_uobj(&ib_uverbs_ah_idr, uobj); 189 idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
190 ib_destroy_ah(ah); 190 ib_destroy_ah(ah);
191 list_del(&uobj->list);
192 kfree(uobj); 191 kfree(uobj);
193 } 192 }
194 193
@@ -200,7 +199,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
200 idr_remove_uobj(&ib_uverbs_qp_idr, uobj); 199 idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
201 ib_uverbs_detach_umcast(qp, uqp); 200 ib_uverbs_detach_umcast(qp, uqp);
202 ib_destroy_qp(qp); 201 ib_destroy_qp(qp);
203 list_del(&uobj->list);
204 ib_uverbs_release_uevent(file, &uqp->uevent); 202 ib_uverbs_release_uevent(file, &uqp->uevent);
205 kfree(uqp); 203 kfree(uqp);
206 } 204 }
@@ -213,7 +211,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
213 211
214 idr_remove_uobj(&ib_uverbs_cq_idr, uobj); 212 idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
215 ib_destroy_cq(cq); 213 ib_destroy_cq(cq);
216 list_del(&uobj->list);
217 ib_uverbs_release_ucq(file, ev_file, ucq); 214 ib_uverbs_release_ucq(file, ev_file, ucq);
218 kfree(ucq); 215 kfree(ucq);
219 } 216 }
@@ -225,7 +222,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
225 222
226 idr_remove_uobj(&ib_uverbs_srq_idr, uobj); 223 idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
227 ib_destroy_srq(srq); 224 ib_destroy_srq(srq);
228 list_del(&uobj->list);
229 ib_uverbs_release_uevent(file, uevent); 225 ib_uverbs_release_uevent(file, uevent);
230 kfree(uevent); 226 kfree(uevent);
231 } 227 }
@@ -243,7 +239,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
243 memobj = container_of(uobj, struct ib_umem_object, uobject); 239 memobj = container_of(uobj, struct ib_umem_object, uobject);
244 ib_umem_release_on_close(mrdev, &memobj->umem); 240 ib_umem_release_on_close(mrdev, &memobj->umem);
245 241
246 list_del(&uobj->list);
247 kfree(memobj); 242 kfree(memobj);
248 } 243 }
249 244
@@ -252,7 +247,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
252 247
253 idr_remove_uobj(&ib_uverbs_pd_idr, uobj); 248 idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
254 ib_dealloc_pd(pd); 249 ib_dealloc_pd(pd);
255 list_del(&uobj->list);
256 kfree(uobj); 250 kfree(uobj);
257 } 251 }
258 252
diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig
new file mode 100644
index 000000000000..fead87d1eff9
--- /dev/null
+++ b/drivers/infiniband/ulp/iser/Kconfig
@@ -0,0 +1,11 @@
1config INFINIBAND_ISER
2 tristate "ISCSI RDMA Protocol"
3 depends on INFINIBAND && SCSI
4 select SCSI_ISCSI_ATTRS
5 ---help---
6 Support for the ISCSI RDMA Protocol over InfiniBand. This
7 allows you to access storage devices that speak ISER/ISCSI
8 over InfiniBand.
9
10 The ISER protocol is defined by IETF.
11 See <http://www.ietf.org/>.
diff --git a/drivers/infiniband/ulp/iser/Makefile b/drivers/infiniband/ulp/iser/Makefile
new file mode 100644
index 000000000000..fe6cd15f2317
--- /dev/null
+++ b/drivers/infiniband/ulp/iser/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_INFINIBAND_ISER) += ib_iser.o
2
3ib_iser-y := iser_verbs.o iser_initiator.o iser_memory.o \
4 iscsi_iser.o
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
new file mode 100644
index 000000000000..4c3f2de2a06e
--- /dev/null
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -0,0 +1,790 @@
1/*
2 * iSCSI Initiator over iSER Data-Path
3 *
4 * Copyright (C) 2004 Dmitry Yusupov
5 * Copyright (C) 2004 Alex Aizman
6 * Copyright (C) 2005 Mike Christie
7 * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved.
8 * maintained by openib-general@openib.org
9 *
10 * This software is available to you under a choice of one of two
11 * licenses. You may choose to be licensed under the terms of the GNU
12 * General Public License (GPL) Version 2, available from the file
13 * COPYING in the main directory of this source tree, or the
14 * OpenIB.org BSD license below:
15 *
16 * Redistribution and use in source and binary forms, with or
17 * without modification, are permitted provided that the following
18 * conditions are met:
19 *
20 * - Redistributions of source code must retain the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer.
23 *
24 * - Redistributions in binary form must reproduce the above
25 * copyright notice, this list of conditions and the following
26 * disclaimer in the documentation and/or other materials
27 * provided with the distribution.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
33 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 * SOFTWARE.
37 *
38 * Credits:
39 * Christoph Hellwig
40 * FUJITA Tomonori
41 * Arne Redlich
42 * Zhenyu Wang
43 * Modified by:
44 * Erez Zilber
45 *
46 *
47 * $Id: iscsi_iser.c 6965 2006-05-07 11:36:20Z ogerlitz $
48 */
49
50#include <linux/types.h>
51#include <linux/list.h>
52#include <linux/hardirq.h>
53#include <linux/kfifo.h>
54#include <linux/blkdev.h>
55#include <linux/init.h>
56#include <linux/ioctl.h>
57#include <linux/devfs_fs_kernel.h>
58#include <linux/cdev.h>
59#include <linux/in.h>
60#include <linux/net.h>
61#include <linux/scatterlist.h>
62#include <linux/delay.h>
63
64#include <net/sock.h>
65
66#include <asm/uaccess.h>
67
68#include <scsi/scsi_cmnd.h>
69#include <scsi/scsi_device.h>
70#include <scsi/scsi_eh.h>
71#include <scsi/scsi_tcq.h>
72#include <scsi/scsi_host.h>
73#include <scsi/scsi.h>
74#include <scsi/scsi_transport_iscsi.h>
75
76#include "iscsi_iser.h"
77
78static unsigned int iscsi_max_lun = 512;
79module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
80
81int iser_debug_level = 0;
82
83MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover "
84 "v" DRV_VER " (" DRV_DATE ")");
85MODULE_LICENSE("Dual BSD/GPL");
86MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz");
87
88module_param_named(debug_level, iser_debug_level, int, 0644);
89MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)");
90
91struct iser_global ig;
92
93void
94iscsi_iser_recv(struct iscsi_conn *conn,
95 struct iscsi_hdr *hdr, char *rx_data, int rx_data_len)
96{
97 int rc = 0;
98 uint32_t ret_itt;
99 int datalen;
100 int ahslen;
101
102 /* verify PDU length */
103 datalen = ntoh24(hdr->dlength);
104 if (datalen != rx_data_len) {
105 printk(KERN_ERR "iscsi_iser: datalen %d (hdr) != %d (IB) \n",
106 datalen, rx_data_len);
107 rc = ISCSI_ERR_DATALEN;
108 goto error;
109 }
110
111 /* read AHS */
112 ahslen = hdr->hlength * 4;
113
114 /* verify itt (itt encoding: age+cid+itt) */
115 rc = iscsi_verify_itt(conn, hdr, &ret_itt);
116
117 if (!rc)
118 rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len);
119
120 if (rc && rc != ISCSI_ERR_NO_SCSI_CMD)
121 goto error;
122
123 return;
124error:
125 iscsi_conn_failure(conn, rc);
126}
127
128
129/**
130 * iscsi_iser_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
131 *
132 **/
133static void
134iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask)
135{
136 struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data;
137 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
138 struct scsi_cmnd *sc = ctask->sc;
139
140 iser_ctask->command_sent = 0;
141 iser_ctask->iser_conn = iser_conn;
142
143 if (sc->sc_data_direction == DMA_TO_DEVICE) {
144 BUG_ON(ctask->total_length == 0);
145 /* bytes to be sent via RDMA operations */
146 iser_ctask->rdma_data_count = ctask->total_length -
147 ctask->imm_count -
148 ctask->unsol_count;
149
150 debug_scsi("cmd [itt %x total %d imm %d unsol_data %d "
151 "rdma_data %d]\n",
152 ctask->itt, ctask->total_length, ctask->imm_count,
153 ctask->unsol_count, iser_ctask->rdma_data_count);
154 } else
155 /* bytes to be sent via RDMA operations */
156 iser_ctask->rdma_data_count = ctask->total_length;
157
158 iser_ctask_rdma_init(iser_ctask);
159}
160
161/**
162 * iscsi_mtask_xmit - xmit management(immediate) task
163 * @conn: iscsi connection
164 * @mtask: task management task
165 *
166 * Notes:
167 * The function can return -EAGAIN in which case caller must
168 * call it again later, or recover. '0' return code means successful
169 * xmit.
170 *
171 **/
172static int
173iscsi_iser_mtask_xmit(struct iscsi_conn *conn,
174 struct iscsi_mgmt_task *mtask)
175{
176 int error = 0;
177
178 debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id, mtask->itt);
179
180 error = iser_send_control(conn, mtask);
181
182 /* since iser xmits control with zero copy, mtasks can not be recycled
183 * right after sending them.
184 * The recycling scheme is based on whether a response is expected
185 * - if yes, the mtask is recycled at iscsi_complete_pdu
186 * - if no, the mtask is recycled at iser_snd_completion
187 */
188 if (error && error != -EAGAIN)
189 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
190
191 return error;
192}
193
194static int
195iscsi_iser_ctask_xmit_unsol_data(struct iscsi_conn *conn,
196 struct iscsi_cmd_task *ctask)
197{
198 struct iscsi_data hdr;
199 int error = 0;
200 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
201
202 /* Send data-out PDUs while there's still unsolicited data to send */
203 while (ctask->unsol_count > 0) {
204 iscsi_prep_unsolicit_data_pdu(ctask, &hdr,
205 iser_ctask->rdma_data_count);
206
207 debug_scsi("Sending data-out: itt 0x%x, data count %d\n",
208 hdr.itt, ctask->data_count);
209
210 /* the buffer description has been passed with the command */
211 /* Send the command */
212 error = iser_send_data_out(conn, ctask, &hdr);
213 if (error) {
214 ctask->unsol_datasn--;
215 goto iscsi_iser_ctask_xmit_unsol_data_exit;
216 }
217 ctask->unsol_count -= ctask->data_count;
218 debug_scsi("Need to send %d more as data-out PDUs\n",
219 ctask->unsol_count);
220 }
221
222iscsi_iser_ctask_xmit_unsol_data_exit:
223 return error;
224}
225
226static int
227iscsi_iser_ctask_xmit(struct iscsi_conn *conn,
228 struct iscsi_cmd_task *ctask)
229{
230 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
231 int error = 0;
232
233 debug_scsi("ctask deq [cid %d itt 0x%x]\n",
234 conn->id, ctask->itt);
235
236 /*
237 * serialize with TMF AbortTask
238 */
239 if (ctask->mtask)
240 return error;
241
242 /* Send the cmd PDU */
243 if (!iser_ctask->command_sent) {
244 error = iser_send_command(conn, ctask);
245 if (error)
246 goto iscsi_iser_ctask_xmit_exit;
247 iser_ctask->command_sent = 1;
248 }
249
250 /* Send unsolicited data-out PDU(s) if necessary */
251 if (ctask->unsol_count)
252 error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask);
253
254 iscsi_iser_ctask_xmit_exit:
255 if (error && error != -EAGAIN)
256 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
257 return error;
258}
259
260static void
261iscsi_iser_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
262{
263 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
264
265 if (iser_ctask->status == ISER_TASK_STATUS_STARTED) {
266 iser_ctask->status = ISER_TASK_STATUS_COMPLETED;
267 iser_ctask_rdma_finalize(iser_ctask);
268 }
269}
270
271static struct iser_conn *
272iscsi_iser_ib_conn_lookup(__u64 ep_handle)
273{
274 struct iser_conn *ib_conn;
275 struct iser_conn *uib_conn = (struct iser_conn *)(unsigned long)ep_handle;
276
277 mutex_lock(&ig.connlist_mutex);
278 list_for_each_entry(ib_conn, &ig.connlist, conn_list) {
279 if (ib_conn == uib_conn) {
280 mutex_unlock(&ig.connlist_mutex);
281 return ib_conn;
282 }
283 }
284 mutex_unlock(&ig.connlist_mutex);
285 iser_err("no conn exists for eph %llx\n",(unsigned long long)ep_handle);
286 return NULL;
287}
288
289static struct iscsi_cls_conn *
290iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
291{
292 struct iscsi_conn *conn;
293 struct iscsi_cls_conn *cls_conn;
294 struct iscsi_iser_conn *iser_conn;
295
296 cls_conn = iscsi_conn_setup(cls_session, conn_idx);
297 if (!cls_conn)
298 return NULL;
299 conn = cls_conn->dd_data;
300
301 /*
302 * due to issues with the login code re iser sematics
303 * this not set in iscsi_conn_setup - FIXME
304 */
305 conn->max_recv_dlength = 128;
306
307 iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL);
308 if (!iser_conn)
309 goto conn_alloc_fail;
310
311 /* currently this is the only field which need to be initiated */
312 rwlock_init(&iser_conn->lock);
313
314 conn->dd_data = iser_conn;
315 iser_conn->iscsi_conn = conn;
316
317 return cls_conn;
318
319conn_alloc_fail:
320 iscsi_conn_teardown(cls_conn);
321 return NULL;
322}
323
324static void
325iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn)
326{
327 struct iscsi_conn *conn = cls_conn->dd_data;
328 struct iscsi_iser_conn *iser_conn = conn->dd_data;
329
330 iscsi_conn_teardown(cls_conn);
331 kfree(iser_conn);
332}
333
334static int
335iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
336 struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
337 int is_leading)
338{
339 struct iscsi_conn *conn = cls_conn->dd_data;
340 struct iscsi_iser_conn *iser_conn;
341 struct iser_conn *ib_conn;
342 int error;
343
344 error = iscsi_conn_bind(cls_session, cls_conn, is_leading);
345 if (error)
346 return error;
347
348 /* the transport ep handle comes from user space so it must be
349 * verified against the global ib connections list */
350 ib_conn = iscsi_iser_ib_conn_lookup(transport_eph);
351 if (!ib_conn) {
352 iser_err("can't bind eph %llx\n",
353 (unsigned long long)transport_eph);
354 return -EINVAL;
355 }
356 /* binds the iSER connection retrieved from the previously
357 * connected ep_handle to the iSCSI layer connection. exchanges
358 * connection pointers */
359 iser_err("binding iscsi conn %p to iser_conn %p\n",conn,ib_conn);
360 iser_conn = conn->dd_data;
361 ib_conn->iser_conn = iser_conn;
362 iser_conn->ib_conn = ib_conn;
363
364 conn->recv_lock = &iser_conn->lock;
365
366 return 0;
367}
368
369static int
370iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
371{
372 struct iscsi_conn *conn = cls_conn->dd_data;
373 int err;
374
375 err = iscsi_conn_start(cls_conn);
376 if (err)
377 return err;
378
379 return iser_conn_set_full_featured_mode(conn);
380}
381
382static void
383iscsi_iser_conn_terminate(struct iscsi_conn *conn)
384{
385 struct iscsi_iser_conn *iser_conn = conn->dd_data;
386 struct iser_conn *ib_conn = iser_conn->ib_conn;
387
388 BUG_ON(!ib_conn);
389 /* starts conn teardown process, waits until all previously *
390 * posted buffers get flushed, deallocates all conn resources */
391 iser_conn_terminate(ib_conn);
392 iser_conn->ib_conn = NULL;
393 conn->recv_lock = NULL;
394}
395
396
397static struct iscsi_transport iscsi_iser_transport;
398
399static struct iscsi_cls_session *
400iscsi_iser_session_create(struct iscsi_transport *iscsit,
401 struct scsi_transport_template *scsit,
402 uint32_t initial_cmdsn, uint32_t *hostno)
403{
404 struct iscsi_cls_session *cls_session;
405 struct iscsi_session *session;
406 int i;
407 uint32_t hn;
408 struct iscsi_cmd_task *ctask;
409 struct iscsi_mgmt_task *mtask;
410 struct iscsi_iser_cmd_task *iser_ctask;
411 struct iser_desc *desc;
412
413 cls_session = iscsi_session_setup(iscsit, scsit,
414 sizeof(struct iscsi_iser_cmd_task),
415 sizeof(struct iser_desc),
416 initial_cmdsn, &hn);
417 if (!cls_session)
418 return NULL;
419
420 *hostno = hn;
421 session = class_to_transport_session(cls_session);
422
423 /* libiscsi setup itts, data and pool so just set desc fields */
424 for (i = 0; i < session->cmds_max; i++) {
425 ctask = session->cmds[i];
426 iser_ctask = ctask->dd_data;
427 ctask->hdr = (struct iscsi_cmd *)&iser_ctask->desc.iscsi_header;
428 }
429
430 for (i = 0; i < session->mgmtpool_max; i++) {
431 mtask = session->mgmt_cmds[i];
432 desc = mtask->dd_data;
433 mtask->hdr = &desc->iscsi_header;
434 desc->data = mtask->data;
435 }
436
437 return cls_session;
438}
439
440static int
441iscsi_iser_conn_set_param(struct iscsi_cls_conn *cls_conn,
442 enum iscsi_param param, uint32_t value)
443{
444 struct iscsi_conn *conn = cls_conn->dd_data;
445 struct iscsi_session *session = conn->session;
446
447 spin_lock_bh(&session->lock);
448 if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE &&
449 conn->stop_stage != STOP_CONN_RECOVER) {
450 printk(KERN_ERR "iscsi_iser: can not change parameter [%d]\n",
451 param);
452 spin_unlock_bh(&session->lock);
453 return 0;
454 }
455 spin_unlock_bh(&session->lock);
456
457 switch (param) {
458 case ISCSI_PARAM_MAX_RECV_DLENGTH:
459 /* TBD */
460 break;
461 case ISCSI_PARAM_MAX_XMIT_DLENGTH:
462 conn->max_xmit_dlength = value;
463 break;
464 case ISCSI_PARAM_HDRDGST_EN:
465 if (value) {
466 printk(KERN_ERR "DataDigest wasn't negotiated to None");
467 return -EPROTO;
468 }
469 break;
470 case ISCSI_PARAM_DATADGST_EN:
471 if (value) {
472 printk(KERN_ERR "DataDigest wasn't negotiated to None");
473 return -EPROTO;
474 }
475 break;
476 case ISCSI_PARAM_INITIAL_R2T_EN:
477 session->initial_r2t_en = value;
478 break;
479 case ISCSI_PARAM_IMM_DATA_EN:
480 session->imm_data_en = value;
481 break;
482 case ISCSI_PARAM_FIRST_BURST:
483 session->first_burst = value;
484 break;
485 case ISCSI_PARAM_MAX_BURST:
486 session->max_burst = value;
487 break;
488 case ISCSI_PARAM_PDU_INORDER_EN:
489 session->pdu_inorder_en = value;
490 break;
491 case ISCSI_PARAM_DATASEQ_INORDER_EN:
492 session->dataseq_inorder_en = value;
493 break;
494 case ISCSI_PARAM_ERL:
495 session->erl = value;
496 break;
497 case ISCSI_PARAM_IFMARKER_EN:
498 if (value) {
499 printk(KERN_ERR "IFMarker wasn't negotiated to No");
500 return -EPROTO;
501 }
502 break;
503 case ISCSI_PARAM_OFMARKER_EN:
504 if (value) {
505 printk(KERN_ERR "OFMarker wasn't negotiated to No");
506 return -EPROTO;
507 }
508 break;
509 default:
510 break;
511 }
512
513 return 0;
514}
515
516static int
517iscsi_iser_session_get_param(struct iscsi_cls_session *cls_session,
518 enum iscsi_param param, uint32_t *value)
519{
520 struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
521 struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
522
523 switch (param) {
524 case ISCSI_PARAM_INITIAL_R2T_EN:
525 *value = session->initial_r2t_en;
526 break;
527 case ISCSI_PARAM_MAX_R2T:
528 *value = session->max_r2t;
529 break;
530 case ISCSI_PARAM_IMM_DATA_EN:
531 *value = session->imm_data_en;
532 break;
533 case ISCSI_PARAM_FIRST_BURST:
534 *value = session->first_burst;
535 break;
536 case ISCSI_PARAM_MAX_BURST:
537 *value = session->max_burst;
538 break;
539 case ISCSI_PARAM_PDU_INORDER_EN:
540 *value = session->pdu_inorder_en;
541 break;
542 case ISCSI_PARAM_DATASEQ_INORDER_EN:
543 *value = session->dataseq_inorder_en;
544 break;
545 case ISCSI_PARAM_ERL:
546 *value = session->erl;
547 break;
548 case ISCSI_PARAM_IFMARKER_EN:
549 *value = 0;
550 break;
551 case ISCSI_PARAM_OFMARKER_EN:
552 *value = 0;
553 break;
554 default:
555 return ISCSI_ERR_PARAM_NOT_FOUND;
556 }
557
558 return 0;
559}
560
561static int
562iscsi_iser_conn_get_param(struct iscsi_cls_conn *cls_conn,
563 enum iscsi_param param, uint32_t *value)
564{
565 struct iscsi_conn *conn = cls_conn->dd_data;
566
567 switch(param) {
568 case ISCSI_PARAM_MAX_RECV_DLENGTH:
569 *value = conn->max_recv_dlength;
570 break;
571 case ISCSI_PARAM_MAX_XMIT_DLENGTH:
572 *value = conn->max_xmit_dlength;
573 break;
574 case ISCSI_PARAM_HDRDGST_EN:
575 *value = 0;
576 break;
577 case ISCSI_PARAM_DATADGST_EN:
578 *value = 0;
579 break;
580 /*case ISCSI_PARAM_TARGET_RECV_DLENGTH:
581 *value = conn->target_recv_dlength;
582 break;
583 case ISCSI_PARAM_INITIATOR_RECV_DLENGTH:
584 *value = conn->initiator_recv_dlength;
585 break;*/
586 default:
587 return ISCSI_ERR_PARAM_NOT_FOUND;
588 }
589
590 return 0;
591}
592
593
594static void
595iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
596{
597 struct iscsi_conn *conn = cls_conn->dd_data;
598
599 stats->txdata_octets = conn->txdata_octets;
600 stats->rxdata_octets = conn->rxdata_octets;
601 stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
602 stats->dataout_pdus = conn->dataout_pdus_cnt;
603 stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
604 stats->datain_pdus = conn->datain_pdus_cnt; /* always 0 */
605 stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
606 stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
607 stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
608 stats->custom_length = 3;
609 strcpy(stats->custom[0].desc, "qp_tx_queue_full");
610 stats->custom[0].value = 0; /* TB iser_conn->qp_tx_queue_full; */
611 strcpy(stats->custom[1].desc, "fmr_map_not_avail");
612 stats->custom[1].value = 0; /* TB iser_conn->fmr_map_not_avail */;
613 strcpy(stats->custom[2].desc, "eh_abort_cnt");
614 stats->custom[2].value = conn->eh_abort_cnt;
615}
616
617static int
618iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking,
619 __u64 *ep_handle)
620{
621 int err;
622 struct iser_conn *ib_conn;
623
624 err = iser_conn_init(&ib_conn);
625 if (err)
626 goto out;
627
628 err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, non_blocking);
629 if (!err)
630 *ep_handle = (__u64)(unsigned long)ib_conn;
631
632out:
633 return err;
634}
635
636static int
637iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms)
638{
639 struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle);
640 int rc;
641
642 if (!ib_conn)
643 return -EINVAL;
644
645 rc = wait_event_interruptible_timeout(ib_conn->wait,
646 ib_conn->state == ISER_CONN_UP,
647 msecs_to_jiffies(timeout_ms));
648
649 /* if conn establishment failed, return error code to iscsi */
650 if (!rc &&
651 (ib_conn->state == ISER_CONN_TERMINATING ||
652 ib_conn->state == ISER_CONN_DOWN))
653 rc = -1;
654
655 iser_err("ib conn %p rc = %d\n", ib_conn, rc);
656
657 if (rc > 0)
658 return 1; /* success, this is the equivalent of POLLOUT */
659 else if (!rc)
660 return 0; /* timeout */
661 else
662 return rc; /* signal */
663}
664
665static void
666iscsi_iser_ep_disconnect(__u64 ep_handle)
667{
668 struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle);
669
670 if (!ib_conn)
671 return;
672
673 iser_err("ib conn %p state %d\n",ib_conn, ib_conn->state);
674
675 iser_conn_terminate(ib_conn);
676}
677
678static struct scsi_host_template iscsi_iser_sht = {
679 .name = "iSCSI Initiator over iSER, v." DRV_VER,
680 .queuecommand = iscsi_queuecommand,
681 .can_queue = ISCSI_XMIT_CMDS_MAX - 1,
682 .sg_tablesize = ISCSI_ISER_SG_TABLESIZE,
683 .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN,
684 .eh_abort_handler = iscsi_eh_abort,
685 .eh_host_reset_handler = iscsi_eh_host_reset,
686 .use_clustering = DISABLE_CLUSTERING,
687 .proc_name = "iscsi_iser",
688 .this_id = -1,
689};
690
691static struct iscsi_transport iscsi_iser_transport = {
692 .owner = THIS_MODULE,
693 .name = "iser",
694 .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T,
695 .param_mask = ISCSI_MAX_RECV_DLENGTH |
696 ISCSI_MAX_XMIT_DLENGTH |
697 ISCSI_HDRDGST_EN |
698 ISCSI_DATADGST_EN |
699 ISCSI_INITIAL_R2T_EN |
700 ISCSI_MAX_R2T |
701 ISCSI_IMM_DATA_EN |
702 ISCSI_FIRST_BURST |
703 ISCSI_MAX_BURST |
704 ISCSI_PDU_INORDER_EN |
705 ISCSI_DATASEQ_INORDER_EN,
706 .host_template = &iscsi_iser_sht,
707 .conndata_size = sizeof(struct iscsi_conn),
708 .max_lun = ISCSI_ISER_MAX_LUN,
709 .max_cmd_len = ISCSI_ISER_MAX_CMD_LEN,
710 /* session management */
711 .create_session = iscsi_iser_session_create,
712 .destroy_session = iscsi_session_teardown,
713 /* connection management */
714 .create_conn = iscsi_iser_conn_create,
715 .bind_conn = iscsi_iser_conn_bind,
716 .destroy_conn = iscsi_iser_conn_destroy,
717 .set_param = iscsi_iser_conn_set_param,
718 .get_conn_param = iscsi_iser_conn_get_param,
719 .get_session_param = iscsi_iser_session_get_param,
720 .start_conn = iscsi_iser_conn_start,
721 .stop_conn = iscsi_conn_stop,
722 /* these are called as part of conn recovery */
723 .suspend_conn_recv = NULL, /* FIXME is/how this relvant to iser? */
724 .terminate_conn = iscsi_iser_conn_terminate,
725 /* IO */
726 .send_pdu = iscsi_conn_send_pdu,
727 .get_stats = iscsi_iser_conn_get_stats,
728 .init_cmd_task = iscsi_iser_cmd_init,
729 .xmit_cmd_task = iscsi_iser_ctask_xmit,
730 .xmit_mgmt_task = iscsi_iser_mtask_xmit,
731 .cleanup_cmd_task = iscsi_iser_cleanup_ctask,
732 /* recovery */
733 .session_recovery_timedout = iscsi_session_recovery_timedout,
734
735 .ep_connect = iscsi_iser_ep_connect,
736 .ep_poll = iscsi_iser_ep_poll,
737 .ep_disconnect = iscsi_iser_ep_disconnect
738};
739
740static int __init iser_init(void)
741{
742 int err;
743
744 iser_dbg("Starting iSER datamover...\n");
745
746 if (iscsi_max_lun < 1) {
747 printk(KERN_ERR "Invalid max_lun value of %u\n", iscsi_max_lun);
748 return -EINVAL;
749 }
750
751 iscsi_iser_transport.max_lun = iscsi_max_lun;
752
753 memset(&ig, 0, sizeof(struct iser_global));
754
755 ig.desc_cache = kmem_cache_create("iser_descriptors",
756 sizeof (struct iser_desc),
757 0, SLAB_HWCACHE_ALIGN,
758 NULL, NULL);
759 if (ig.desc_cache == NULL)
760 return -ENOMEM;
761
762 /* device init is called only after the first addr resolution */
763 mutex_init(&ig.device_list_mutex);
764 INIT_LIST_HEAD(&ig.device_list);
765 mutex_init(&ig.connlist_mutex);
766 INIT_LIST_HEAD(&ig.connlist);
767
768 if (!iscsi_register_transport(&iscsi_iser_transport)) {
769 iser_err("iscsi_register_transport failed\n");
770 err = -EINVAL;
771 goto register_transport_failure;
772 }
773
774 return 0;
775
776register_transport_failure:
777 kmem_cache_destroy(ig.desc_cache);
778
779 return err;
780}
781
782static void __exit iser_exit(void)
783{
784 iser_dbg("Removing iSER datamover...\n");
785 iscsi_unregister_transport(&iscsi_iser_transport);
786 kmem_cache_destroy(ig.desc_cache);
787}
788
789module_init(iser_init);
790module_exit(iser_exit);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
new file mode 100644
index 000000000000..3350ba690cfe
--- /dev/null
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -0,0 +1,354 @@
1/*
2 * iSER transport for the Open iSCSI Initiator & iSER transport internals
3 *
4 * Copyright (C) 2004 Dmitry Yusupov
5 * Copyright (C) 2004 Alex Aizman
6 * Copyright (C) 2005 Mike Christie
7 * based on code maintained by open-iscsi@googlegroups.com
8 *
9 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
10 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
11 *
12 * This software is available to you under a choice of one of two
13 * licenses. You may choose to be licensed under the terms of the GNU
14 * General Public License (GPL) Version 2, available from the file
15 * COPYING in the main directory of this source tree, or the
16 * OpenIB.org BSD license below:
17 *
18 * Redistribution and use in source and binary forms, with or
19 * without modification, are permitted provided that the following
20 * conditions are met:
21 *
22 * - Redistributions of source code must retain the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer.
25 *
26 * - Redistributions in binary form must reproduce the above
27 * copyright notice, this list of conditions and the following
28 * disclaimer in the documentation and/or other materials
29 * provided with the distribution.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
35 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
36 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
37 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 * SOFTWARE.
39 *
40 * $Id: iscsi_iser.h 7051 2006-05-10 12:29:11Z ogerlitz $
41 */
42#ifndef __ISCSI_ISER_H__
43#define __ISCSI_ISER_H__
44
45#include <linux/types.h>
46#include <linux/net.h>
47#include <scsi/libiscsi.h>
48#include <scsi/scsi_transport_iscsi.h>
49
50#include <linux/wait.h>
51#include <linux/sched.h>
52#include <linux/list.h>
53#include <linux/slab.h>
54#include <linux/dma-mapping.h>
55#include <linux/mutex.h>
56#include <linux/mempool.h>
57#include <linux/uio.h>
58
59#include <linux/socket.h>
60#include <linux/in.h>
61#include <linux/in6.h>
62
63#include <rdma/ib_verbs.h>
64#include <rdma/ib_fmr_pool.h>
65#include <rdma/rdma_cm.h>
66
67#define DRV_NAME "iser"
68#define PFX DRV_NAME ": "
69#define DRV_VER "0.1"
70#define DRV_DATE "May 7th, 2006"
71
72#define iser_dbg(fmt, arg...) \
73 do { \
74 if (iser_debug_level > 0) \
75 printk(KERN_DEBUG PFX "%s:" fmt,\
76 __func__ , ## arg); \
77 } while (0)
78
79#define iser_err(fmt, arg...) \
80 do { \
81 printk(KERN_ERR PFX "%s:" fmt, \
82 __func__ , ## arg); \
83 } while (0)
84
85 /* support upto 512KB in one RDMA */
86#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> PAGE_SHIFT)
87#define ISCSI_ISER_MAX_LUN 256
88#define ISCSI_ISER_MAX_CMD_LEN 16
89
90/* QP settings */
91/* Maximal bounds on received asynchronous PDUs */
92#define ISER_MAX_RX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
93
94#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
95 * SCSI_TMFUNC(2), LOGOUT(1) */
96
97#define ISER_QP_MAX_RECV_DTOS (ISCSI_XMIT_CMDS_MAX + \
98 ISER_MAX_RX_MISC_PDUS + \
99 ISER_MAX_TX_MISC_PDUS)
100
101/* the max TX (send) WR supported by the iSER QP is defined by *
102 * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
103 * to have at max for SCSI command. The tx posting & completion handling code *
104 * supports -EAGAIN scheme where tx is suspended till the QP has room for more *
105 * send WR. D=8 comes from 64K/8K */
106
107#define ISER_INFLIGHT_DATAOUTS 8
108
109#define ISER_QP_MAX_REQ_DTOS (ISCSI_XMIT_CMDS_MAX * \
110 (1 + ISER_INFLIGHT_DATAOUTS) + \
111 ISER_MAX_TX_MISC_PDUS + \
112 ISER_MAX_RX_MISC_PDUS)
113
114#define ISER_VER 0x10
115#define ISER_WSV 0x08
116#define ISER_RSV 0x04
117
118struct iser_hdr {
119 u8 flags;
120 u8 rsvd[3];
121 __be32 write_stag; /* write rkey */
122 __be64 write_va;
123 __be32 read_stag; /* read rkey */
124 __be64 read_va;
125} __attribute__((packed));
126
127
128/* Length of an object name string */
129#define ISER_OBJECT_NAME_SIZE 64
130
131enum iser_ib_conn_state {
132 ISER_CONN_INIT, /* descriptor allocd, no conn */
133 ISER_CONN_PENDING, /* in the process of being established */
134 ISER_CONN_UP, /* up and running */
135 ISER_CONN_TERMINATING, /* in the process of being terminated */
136 ISER_CONN_DOWN, /* shut down */
137 ISER_CONN_STATES_NUM
138};
139
140enum iser_task_status {
141 ISER_TASK_STATUS_INIT = 0,
142 ISER_TASK_STATUS_STARTED,
143 ISER_TASK_STATUS_COMPLETED
144};
145
146enum iser_data_dir {
147 ISER_DIR_IN = 0, /* to initiator */
148 ISER_DIR_OUT, /* from initiator */
149 ISER_DIRS_NUM
150};
151
152struct iser_data_buf {
153 void *buf; /* pointer to the sg list */
154 unsigned int size; /* num entries of this sg */
155 unsigned long data_len; /* total data len */
156 unsigned int dma_nents; /* returned by dma_map_sg */
157 char *copy_buf; /* allocated copy buf for SGs unaligned *
158 * for rdma which are copied */
159 struct scatterlist sg_single; /* SG-ified clone of a non SG SC or *
160 * unaligned SG */
161 };
162
163/* fwd declarations */
164struct iser_device;
165struct iscsi_iser_conn;
166struct iscsi_iser_cmd_task;
167
168struct iser_mem_reg {
169 u32 lkey;
170 u32 rkey;
171 u64 va;
172 u64 len;
173 void *mem_h;
174};
175
176struct iser_regd_buf {
177 struct iser_mem_reg reg; /* memory registration info */
178 void *virt_addr;
179 struct iser_device *device; /* device->device for dma_unmap */
180 dma_addr_t dma_addr; /* if non zero, addr for dma_unmap */
181 enum dma_data_direction direction; /* direction for dma_unmap */
182 unsigned int data_size;
183 atomic_t ref_count; /* refcount, freed when dec to 0 */
184};
185
186#define MAX_REGD_BUF_VECTOR_LEN 2
187
188struct iser_dto {
189 struct iscsi_iser_cmd_task *ctask;
190 struct iscsi_iser_conn *conn;
191 int notify_enable;
192
193 /* vector of registered buffers */
194 unsigned int regd_vector_len;
195 struct iser_regd_buf *regd[MAX_REGD_BUF_VECTOR_LEN];
196
197 /* offset into the registered buffer may be specified */
198 unsigned int offset[MAX_REGD_BUF_VECTOR_LEN];
199
200 /* a smaller size may be specified, if 0, then full size is used */
201 unsigned int used_sz[MAX_REGD_BUF_VECTOR_LEN];
202};
203
204enum iser_desc_type {
205 ISCSI_RX,
206 ISCSI_TX_CONTROL ,
207 ISCSI_TX_SCSI_COMMAND,
208 ISCSI_TX_DATAOUT
209};
210
211struct iser_desc {
212 struct iser_hdr iser_header;
213 struct iscsi_hdr iscsi_header;
214 struct iser_regd_buf hdr_regd_buf;
215 void *data; /* used by RX & TX_CONTROL */
216 struct iser_regd_buf data_regd_buf; /* used by RX & TX_CONTROL */
217 enum iser_desc_type type;
218 struct iser_dto dto;
219};
220
221struct iser_device {
222 struct ib_device *ib_device;
223 struct ib_pd *pd;
224 struct ib_cq *cq;
225 struct ib_mr *mr;
226 struct tasklet_struct cq_tasklet;
227 struct list_head ig_list; /* entry in ig devices list */
228 int refcount;
229};
230
231struct iser_conn {
232 struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */
233 enum iser_ib_conn_state state; /* rdma connection state */
234 spinlock_t lock; /* used for state changes */
235 struct iser_device *device; /* device context */
236 struct rdma_cm_id *cma_id; /* CMA ID */
237 struct ib_qp *qp; /* QP */
238 struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */
239 int disc_evt_flag; /* disconn event delivered */
240 wait_queue_head_t wait; /* waitq for conn/disconn */
241 atomic_t post_recv_buf_count; /* posted rx count */
242 atomic_t post_send_buf_count; /* posted tx count */
243 struct work_struct comperror_work; /* conn term sleepable ctx*/
244 char name[ISER_OBJECT_NAME_SIZE];
245 struct iser_page_vec *page_vec; /* represents SG to fmr maps*
246 * maps serialized as tx is*/
247 struct list_head conn_list; /* entry in ig conn list */
248};
249
250struct iscsi_iser_conn {
251 struct iscsi_conn *iscsi_conn;/* ptr to iscsi conn */
252 struct iser_conn *ib_conn; /* iSER IB conn */
253
254 rwlock_t lock;
255};
256
257struct iscsi_iser_cmd_task {
258 struct iser_desc desc;
259 struct iscsi_iser_conn *iser_conn;
260 int rdma_data_count;/* RDMA bytes */
261 enum iser_task_status status;
262 int command_sent; /* set if command sent */
263 int dir[ISER_DIRS_NUM]; /* set if dir use*/
264 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
265 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
266 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
267};
268
269struct iser_page_vec {
270 u64 *pages;
271 int length;
272 int offset;
273 int data_size;
274};
275
276struct iser_global {
277 struct mutex device_list_mutex;/* */
278 struct list_head device_list; /* all iSER devices */
279 struct mutex connlist_mutex;
280 struct list_head connlist; /* all iSER IB connections */
281
282 kmem_cache_t *desc_cache;
283};
284
285extern struct iser_global ig;
286extern int iser_debug_level;
287
288/* allocate connection resources needed for rdma functionality */
289int iser_conn_set_full_featured_mode(struct iscsi_conn *conn);
290
291int iser_send_control(struct iscsi_conn *conn,
292 struct iscsi_mgmt_task *mtask);
293
294int iser_send_command(struct iscsi_conn *conn,
295 struct iscsi_cmd_task *ctask);
296
297int iser_send_data_out(struct iscsi_conn *conn,
298 struct iscsi_cmd_task *ctask,
299 struct iscsi_data *hdr);
300
301void iscsi_iser_recv(struct iscsi_conn *conn,
302 struct iscsi_hdr *hdr,
303 char *rx_data,
304 int rx_data_len);
305
306int iser_conn_init(struct iser_conn **ib_conn);
307
308void iser_conn_terminate(struct iser_conn *ib_conn);
309
310void iser_conn_release(struct iser_conn *ib_conn);
311
312void iser_rcv_completion(struct iser_desc *desc,
313 unsigned long dto_xfer_len);
314
315void iser_snd_completion(struct iser_desc *desc);
316
317void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *ctask);
318
319void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *ctask);
320
321void iser_dto_buffs_release(struct iser_dto *dto);
322
323int iser_regd_buff_release(struct iser_regd_buf *regd_buf);
324
325void iser_reg_single(struct iser_device *device,
326 struct iser_regd_buf *regd_buf,
327 enum dma_data_direction direction);
328
329int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask,
330 enum iser_data_dir cmd_dir);
331
332void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask,
333 enum iser_data_dir cmd_dir);
334
335int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *ctask,
336 enum iser_data_dir cmd_dir);
337
338int iser_connect(struct iser_conn *ib_conn,
339 struct sockaddr_in *src_addr,
340 struct sockaddr_in *dst_addr,
341 int non_blocking);
342
343int iser_reg_page_vec(struct iser_conn *ib_conn,
344 struct iser_page_vec *page_vec,
345 struct iser_mem_reg *mem_reg);
346
347void iser_unreg_mem(struct iser_mem_reg *mem_reg);
348
349int iser_post_recv(struct iser_desc *rx_desc);
350int iser_post_send(struct iser_desc *tx_desc);
351
352int iser_conn_state_comp(struct iser_conn *ib_conn,
353 enum iser_ib_conn_state comp);
354#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
new file mode 100644
index 000000000000..ccf56f6f7236
--- /dev/null
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -0,0 +1,738 @@
1/*
2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: iser_initiator.c 6964 2006-05-07 11:11:43Z ogerlitz $
33 */
34#include <linux/kernel.h>
35#include <linux/slab.h>
36#include <linux/mm.h>
37#include <asm/io.h>
38#include <asm/scatterlist.h>
39#include <linux/scatterlist.h>
40#include <linux/kfifo.h>
41#include <scsi/scsi_cmnd.h>
42#include <scsi/scsi_host.h>
43
44#include "iscsi_iser.h"
45
46/* Constant PDU lengths calculations */
47#define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \
48 sizeof (struct iscsi_hdr))
49
50/* iser_dto_add_regd_buff - increments the reference count for *
51 * the registered buffer & adds it to the DTO object */
52static void iser_dto_add_regd_buff(struct iser_dto *dto,
53 struct iser_regd_buf *regd_buf,
54 unsigned long use_offset,
55 unsigned long use_size)
56{
57 int add_idx;
58
59 atomic_inc(&regd_buf->ref_count);
60
61 add_idx = dto->regd_vector_len;
62 dto->regd[add_idx] = regd_buf;
63 dto->used_sz[add_idx] = use_size;
64 dto->offset[add_idx] = use_offset;
65
66 dto->regd_vector_len++;
67}
68
69static int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask,
70 struct iser_data_buf *data,
71 enum iser_data_dir iser_dir,
72 enum dma_data_direction dma_dir)
73{
74 struct device *dma_device;
75
76 iser_ctask->dir[iser_dir] = 1;
77 dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
78
79 data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir);
80 if (data->dma_nents == 0) {
81 iser_err("dma_map_sg failed!!!\n");
82 return -EINVAL;
83 }
84 return 0;
85}
86
87static void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask)
88{
89 struct device *dma_device;
90 struct iser_data_buf *data;
91
92 dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
93
94 if (iser_ctask->dir[ISER_DIR_IN]) {
95 data = &iser_ctask->data[ISER_DIR_IN];
96 dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE);
97 }
98
99 if (iser_ctask->dir[ISER_DIR_OUT]) {
100 data = &iser_ctask->data[ISER_DIR_OUT];
101 dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE);
102 }
103}
104
105/* Register user buffer memory and initialize passive rdma
106 * dto descriptor. Total data size is stored in
107 * iser_ctask->data[ISER_DIR_IN].data_len
108 */
109static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask,
110 unsigned int edtl)
111
112{
113 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
114 struct iser_regd_buf *regd_buf;
115 int err;
116 struct iser_hdr *hdr = &iser_ctask->desc.iser_header;
117 struct iser_data_buf *buf_in = &iser_ctask->data[ISER_DIR_IN];
118
119 err = iser_dma_map_task_data(iser_ctask,
120 buf_in,
121 ISER_DIR_IN,
122 DMA_FROM_DEVICE);
123 if (err)
124 return err;
125
126 if (edtl > iser_ctask->data[ISER_DIR_IN].data_len) {
127 iser_err("Total data length: %ld, less than EDTL: "
128 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
129 iser_ctask->data[ISER_DIR_IN].data_len, edtl,
130 ctask->itt, iser_ctask->iser_conn);
131 return -EINVAL;
132 }
133
134 err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_IN);
135 if (err) {
136 iser_err("Failed to set up Data-IN RDMA\n");
137 return err;
138 }
139 regd_buf = &iser_ctask->rdma_regd[ISER_DIR_IN];
140
141 hdr->flags |= ISER_RSV;
142 hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey);
143 hdr->read_va = cpu_to_be64(regd_buf->reg.va);
144
145 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
146 ctask->itt, regd_buf->reg.rkey,
147 (unsigned long long)regd_buf->reg.va);
148
149 return 0;
150}
151
152/* Register user buffer memory and initialize passive rdma
153 * dto descriptor. Total data size is stored in
154 * ctask->data[ISER_DIR_OUT].data_len
155 */
156static int
157iser_prepare_write_cmd(struct iscsi_cmd_task *ctask,
158 unsigned int imm_sz,
159 unsigned int unsol_sz,
160 unsigned int edtl)
161{
162 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
163 struct iser_regd_buf *regd_buf;
164 int err;
165 struct iser_dto *send_dto = &iser_ctask->desc.dto;
166 struct iser_hdr *hdr = &iser_ctask->desc.iser_header;
167 struct iser_data_buf *buf_out = &iser_ctask->data[ISER_DIR_OUT];
168
169 err = iser_dma_map_task_data(iser_ctask,
170 buf_out,
171 ISER_DIR_OUT,
172 DMA_TO_DEVICE);
173 if (err)
174 return err;
175
176 if (edtl > iser_ctask->data[ISER_DIR_OUT].data_len) {
177 iser_err("Total data length: %ld, less than EDTL: %d, "
178 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
179 iser_ctask->data[ISER_DIR_OUT].data_len,
180 edtl, ctask->itt, ctask->conn);
181 return -EINVAL;
182 }
183
184 err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_OUT);
185 if (err != 0) {
186 iser_err("Failed to register write cmd RDMA mem\n");
187 return err;
188 }
189
190 regd_buf = &iser_ctask->rdma_regd[ISER_DIR_OUT];
191
192 if (unsol_sz < edtl) {
193 hdr->flags |= ISER_WSV;
194 hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey);
195 hdr->write_va = cpu_to_be64(regd_buf->reg.va + unsol_sz);
196
197 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
198 "VA:%#llX + unsol:%d\n",
199 ctask->itt, regd_buf->reg.rkey,
200 (unsigned long long)regd_buf->reg.va, unsol_sz);
201 }
202
203 if (imm_sz > 0) {
204 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
205 ctask->itt, imm_sz);
206 iser_dto_add_regd_buff(send_dto,
207 regd_buf,
208 0,
209 imm_sz);
210 }
211
212 return 0;
213}
214
215/**
216 * iser_post_receive_control - allocates, initializes and posts receive DTO.
217 */
218static int iser_post_receive_control(struct iscsi_conn *conn)
219{
220 struct iscsi_iser_conn *iser_conn = conn->dd_data;
221 struct iser_desc *rx_desc;
222 struct iser_regd_buf *regd_hdr;
223 struct iser_regd_buf *regd_data;
224 struct iser_dto *recv_dto = NULL;
225 struct iser_device *device = iser_conn->ib_conn->device;
226 int rx_data_size, err = 0;
227
228 rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
229 if (rx_desc == NULL) {
230 iser_err("Failed to alloc desc for post recv\n");
231 return -ENOMEM;
232 }
233 rx_desc->type = ISCSI_RX;
234
235 /* for the login sequence we must support rx of upto 8K; login is done
236 * after conn create/bind (connect) and conn stop/bind (reconnect),
237 * what's common for both schemes is that the connection is not started
238 */
239 if (conn->c_stage != ISCSI_CONN_STARTED)
240 rx_data_size = DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH;
241 else /* FIXME till user space sets conn->max_recv_dlength correctly */
242 rx_data_size = 128;
243
244 rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
245 if (rx_desc->data == NULL) {
246 iser_err("Failed to alloc data buf for post recv\n");
247 err = -ENOMEM;
248 goto post_rx_kmalloc_failure;
249 }
250
251 recv_dto = &rx_desc->dto;
252 recv_dto->conn = iser_conn;
253 recv_dto->regd_vector_len = 0;
254
255 regd_hdr = &rx_desc->hdr_regd_buf;
256 memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
257 regd_hdr->device = device;
258 regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
259 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
260
261 iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE);
262
263 iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
264
265 regd_data = &rx_desc->data_regd_buf;
266 memset(regd_data, 0, sizeof(struct iser_regd_buf));
267 regd_data->device = device;
268 regd_data->virt_addr = rx_desc->data;
269 regd_data->data_size = rx_data_size;
270
271 iser_reg_single(device, regd_data, DMA_FROM_DEVICE);
272
273 iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
274
275 err = iser_post_recv(rx_desc);
276 if (!err)
277 return 0;
278
279 /* iser_post_recv failed */
280 iser_dto_buffs_release(recv_dto);
281 kfree(rx_desc->data);
282post_rx_kmalloc_failure:
283 kmem_cache_free(ig.desc_cache, rx_desc);
284 return err;
285}
286
287/* creates a new tx descriptor and adds header regd buffer */
288static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
289 struct iser_desc *tx_desc)
290{
291 struct iser_regd_buf *regd_hdr = &tx_desc->hdr_regd_buf;
292 struct iser_dto *send_dto = &tx_desc->dto;
293
294 memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
295 regd_hdr->device = iser_conn->ib_conn->device;
296 regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */
297 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
298
299 send_dto->conn = iser_conn;
300 send_dto->notify_enable = 1;
301 send_dto->regd_vector_len = 0;
302
303 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
304 tx_desc->iser_header.flags = ISER_VER;
305
306 iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0);
307}
308
309/**
310 * iser_conn_set_full_featured_mode - (iSER API)
311 */
312int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
313{
314 struct iscsi_iser_conn *iser_conn = conn->dd_data;
315
316 int i;
317 /* no need to keep it in a var, we are after login so if this should
318 * be negotiated, by now the result should be available here */
319 int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
320
321 iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
322
323 /* Check that there is no posted recv or send buffers left - */
324 /* they must be consumed during the login phase */
325 BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0);
326 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
327
328 /* Initial post receive buffers */
329 for (i = 0; i < initial_post_recv_bufs_num; i++) {
330 if (iser_post_receive_control(conn) != 0) {
331 iser_err("Failed to post recv bufs at:%d conn:0x%p\n",
332 i, conn);
333 return -ENOMEM;
334 }
335 }
336 iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn);
337 return 0;
338}
339
340static int
341iser_check_xmit(struct iscsi_conn *conn, void *task)
342{
343 int rc = 0;
344 struct iscsi_iser_conn *iser_conn = conn->dd_data;
345
346 write_lock_bh(conn->recv_lock);
347 if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
348 ISER_QP_MAX_REQ_DTOS) {
349 iser_dbg("%ld can't xmit task %p, suspending tx\n",jiffies,task);
350 set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
351 rc = -EAGAIN;
352 }
353 write_unlock_bh(conn->recv_lock);
354 return rc;
355}
356
357
358/**
359 * iser_send_command - send command PDU
360 */
361int iser_send_command(struct iscsi_conn *conn,
362 struct iscsi_cmd_task *ctask)
363{
364 struct iscsi_iser_conn *iser_conn = conn->dd_data;
365 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
366 struct iser_dto *send_dto = NULL;
367 unsigned long edtl;
368 int err = 0;
369 struct iser_data_buf *data_buf;
370
371 struct iscsi_cmd *hdr = ctask->hdr;
372 struct scsi_cmnd *sc = ctask->sc;
373
374 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
375 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
376 return -EPERM;
377 }
378 if (iser_check_xmit(conn, ctask))
379 return -EAGAIN;
380
381 edtl = ntohl(hdr->data_length);
382
383 /* build the tx desc regd header and add it to the tx desc dto */
384 iser_ctask->desc.type = ISCSI_TX_SCSI_COMMAND;
385 send_dto = &iser_ctask->desc.dto;
386 send_dto->ctask = iser_ctask;
387 iser_create_send_desc(iser_conn, &iser_ctask->desc);
388
389 if (hdr->flags & ISCSI_FLAG_CMD_READ)
390 data_buf = &iser_ctask->data[ISER_DIR_IN];
391 else
392 data_buf = &iser_ctask->data[ISER_DIR_OUT];
393
394 if (sc->use_sg) { /* using a scatter list */
395 data_buf->buf = sc->request_buffer;
396 data_buf->size = sc->use_sg;
397 } else if (sc->request_bufflen) {
398 /* using a single buffer - convert it into one entry SG */
399 sg_init_one(&data_buf->sg_single,
400 sc->request_buffer, sc->request_bufflen);
401 data_buf->buf = &data_buf->sg_single;
402 data_buf->size = 1;
403 }
404
405 data_buf->data_len = sc->request_bufflen;
406
407 if (hdr->flags & ISCSI_FLAG_CMD_READ) {
408 err = iser_prepare_read_cmd(ctask, edtl);
409 if (err)
410 goto send_command_error;
411 }
412 if (hdr->flags & ISCSI_FLAG_CMD_WRITE) {
413 err = iser_prepare_write_cmd(ctask,
414 ctask->imm_count,
415 ctask->imm_count +
416 ctask->unsol_count,
417 edtl);
418 if (err)
419 goto send_command_error;
420 }
421
422 iser_reg_single(iser_conn->ib_conn->device,
423 send_dto->regd[0], DMA_TO_DEVICE);
424
425 if (iser_post_receive_control(conn) != 0) {
426 iser_err("post_recv failed!\n");
427 err = -ENOMEM;
428 goto send_command_error;
429 }
430
431 iser_ctask->status = ISER_TASK_STATUS_STARTED;
432
433 err = iser_post_send(&iser_ctask->desc);
434 if (!err)
435 return 0;
436
437send_command_error:
438 iser_dto_buffs_release(send_dto);
439 iser_err("conn %p failed ctask->itt %d err %d\n",conn, ctask->itt, err);
440 return err;
441}
442
443/**
444 * iser_send_data_out - send data out PDU
445 */
446int iser_send_data_out(struct iscsi_conn *conn,
447 struct iscsi_cmd_task *ctask,
448 struct iscsi_data *hdr)
449{
450 struct iscsi_iser_conn *iser_conn = conn->dd_data;
451 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
452 struct iser_desc *tx_desc = NULL;
453 struct iser_dto *send_dto = NULL;
454 unsigned long buf_offset;
455 unsigned long data_seg_len;
456 unsigned int itt;
457 int err = 0;
458
459 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
460 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
461 return -EPERM;
462 }
463
464 if (iser_check_xmit(conn, ctask))
465 return -EAGAIN;
466
467 itt = ntohl(hdr->itt);
468 data_seg_len = ntoh24(hdr->dlength);
469 buf_offset = ntohl(hdr->offset);
470
471 iser_dbg("%s itt %d dseg_len %d offset %d\n",
472 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);
473
474 tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
475 if (tx_desc == NULL) {
476 iser_err("Failed to alloc desc for post dataout\n");
477 return -ENOMEM;
478 }
479
480 tx_desc->type = ISCSI_TX_DATAOUT;
481 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
482
483 /* build the tx desc regd header and add it to the tx desc dto */
484 send_dto = &tx_desc->dto;
485 send_dto->ctask = iser_ctask;
486 iser_create_send_desc(iser_conn, tx_desc);
487
488 iser_reg_single(iser_conn->ib_conn->device,
489 send_dto->regd[0], DMA_TO_DEVICE);
490
491 /* all data was registered for RDMA, we can use the lkey */
492 iser_dto_add_regd_buff(send_dto,
493 &iser_ctask->rdma_regd[ISER_DIR_OUT],
494 buf_offset,
495 data_seg_len);
496
497 if (buf_offset + data_seg_len > iser_ctask->data[ISER_DIR_OUT].data_len) {
498 iser_err("Offset:%ld & DSL:%ld in Data-Out "
499 "inconsistent with total len:%ld, itt:%d\n",
500 buf_offset, data_seg_len,
501 iser_ctask->data[ISER_DIR_OUT].data_len, itt);
502 err = -EINVAL;
503 goto send_data_out_error;
504 }
505 iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
506 itt, buf_offset, data_seg_len);
507
508
509 err = iser_post_send(tx_desc);
510 if (!err)
511 return 0;
512
513send_data_out_error:
514 iser_dto_buffs_release(send_dto);
515 kmem_cache_free(ig.desc_cache, tx_desc);
516 iser_err("conn %p failed err %d\n",conn, err);
517 return err;
518}
519
520int iser_send_control(struct iscsi_conn *conn,
521 struct iscsi_mgmt_task *mtask)
522{
523 struct iscsi_iser_conn *iser_conn = conn->dd_data;
524 struct iser_desc *mdesc = mtask->dd_data;
525 struct iser_dto *send_dto = NULL;
526 unsigned int itt;
527 unsigned long data_seg_len;
528 int err = 0;
529 unsigned char opcode;
530 struct iser_regd_buf *regd_buf;
531 struct iser_device *device;
532
533 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
534 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
535 return -EPERM;
536 }
537
538 if (iser_check_xmit(conn,mtask))
539 return -EAGAIN;
540
541 /* build the tx desc regd header and add it to the tx desc dto */
542 mdesc->type = ISCSI_TX_CONTROL;
543 send_dto = &mdesc->dto;
544 send_dto->ctask = NULL;
545 iser_create_send_desc(iser_conn, mdesc);
546
547 device = iser_conn->ib_conn->device;
548
549 iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE);
550
551 itt = ntohl(mtask->hdr->itt);
552 opcode = mtask->hdr->opcode & ISCSI_OPCODE_MASK;
553 data_seg_len = ntoh24(mtask->hdr->dlength);
554
555 if (data_seg_len > 0) {
556 regd_buf = &mdesc->data_regd_buf;
557 memset(regd_buf, 0, sizeof(struct iser_regd_buf));
558 regd_buf->device = device;
559 regd_buf->virt_addr = mtask->data;
560 regd_buf->data_size = mtask->data_count;
561 iser_reg_single(device, regd_buf,
562 DMA_TO_DEVICE);
563 iser_dto_add_regd_buff(send_dto, regd_buf,
564 0,
565 data_seg_len);
566 }
567
568 if (iser_post_receive_control(conn) != 0) {
569 iser_err("post_rcv_buff failed!\n");
570 err = -ENOMEM;
571 goto send_control_error;
572 }
573
574 err = iser_post_send(mdesc);
575 if (!err)
576 return 0;
577
578send_control_error:
579 iser_dto_buffs_release(send_dto);
580 iser_err("conn %p failed err %d\n",conn, err);
581 return err;
582}
583
584/**
585 * iser_rcv_dto_completion - recv DTO completion
586 */
587void iser_rcv_completion(struct iser_desc *rx_desc,
588 unsigned long dto_xfer_len)
589{
590 struct iser_dto *dto = &rx_desc->dto;
591 struct iscsi_iser_conn *conn = dto->conn;
592 struct iscsi_session *session = conn->iscsi_conn->session;
593 struct iscsi_cmd_task *ctask;
594 struct iscsi_iser_cmd_task *iser_ctask;
595 struct iscsi_hdr *hdr;
596 char *rx_data = NULL;
597 int rx_data_len = 0;
598 unsigned int itt;
599 unsigned char opcode;
600
601 hdr = &rx_desc->iscsi_header;
602
603 iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt);
604
605 if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */
606 rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN;
607 rx_data = dto->regd[1]->virt_addr;
608 rx_data += dto->offset[1];
609 }
610
611 opcode = hdr->opcode & ISCSI_OPCODE_MASK;
612
613 if (opcode == ISCSI_OP_SCSI_CMD_RSP) {
614 itt = hdr->itt & ISCSI_ITT_MASK; /* mask out cid and age bits */
615 if (!(itt < session->cmds_max))
616 iser_err("itt can't be matched to task!!!"
617 "conn %p opcode %d cmds_max %d itt %d\n",
618 conn->iscsi_conn,opcode,session->cmds_max,itt);
619 /* use the mapping given with the cmds array indexed by itt */
620 ctask = (struct iscsi_cmd_task *)session->cmds[itt];
621 iser_ctask = ctask->dd_data;
622 iser_dbg("itt %d ctask %p\n",itt,ctask);
623 iser_ctask->status = ISER_TASK_STATUS_COMPLETED;
624 iser_ctask_rdma_finalize(iser_ctask);
625 }
626
627 iser_dto_buffs_release(dto);
628
629 iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len);
630
631 kfree(rx_desc->data);
632 kmem_cache_free(ig.desc_cache, rx_desc);
633
634 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
635 * task eliminates the need to worry on tasks which are completed in *
636 * parallel to the execution of iser_conn_term. So the code that waits *
637 * for the posted rx bufs refcount to become zero handles everything */
638 atomic_dec(&conn->ib_conn->post_recv_buf_count);
639}
640
641void iser_snd_completion(struct iser_desc *tx_desc)
642{
643 struct iser_dto *dto = &tx_desc->dto;
644 struct iscsi_iser_conn *iser_conn = dto->conn;
645 struct iscsi_conn *conn = iser_conn->iscsi_conn;
646 struct iscsi_mgmt_task *mtask;
647
648 iser_dbg("Initiator, Data sent dto=0x%p\n", dto);
649
650 iser_dto_buffs_release(dto);
651
652 if (tx_desc->type == ISCSI_TX_DATAOUT)
653 kmem_cache_free(ig.desc_cache, tx_desc);
654
655 atomic_dec(&iser_conn->ib_conn->post_send_buf_count);
656
657 write_lock(conn->recv_lock);
658 if (conn->suspend_tx) {
659 iser_dbg("%ld resuming tx\n",jiffies);
660 clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
661 scsi_queue_work(conn->session->host, &conn->xmitwork);
662 }
663 write_unlock(conn->recv_lock);
664
665 if (tx_desc->type == ISCSI_TX_CONTROL) {
666 /* this arithmetic is legal by libiscsi dd_data allocation */
667 mtask = (void *) ((long)(void *)tx_desc -
668 sizeof(struct iscsi_mgmt_task));
669 if (mtask->hdr->itt == cpu_to_be32(ISCSI_RESERVED_TAG)) {
670 struct iscsi_session *session = conn->session;
671
672 spin_lock(&conn->session->lock);
673 list_del(&mtask->running);
674 __kfifo_put(session->mgmtpool.queue, (void*)&mtask,
675 sizeof(void*));
676 spin_unlock(&session->lock);
677 }
678 }
679}
680
681void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask)
682
683{
684 iser_ctask->status = ISER_TASK_STATUS_INIT;
685
686 iser_ctask->dir[ISER_DIR_IN] = 0;
687 iser_ctask->dir[ISER_DIR_OUT] = 0;
688
689 iser_ctask->data[ISER_DIR_IN].data_len = 0;
690 iser_ctask->data[ISER_DIR_OUT].data_len = 0;
691
692 memset(&iser_ctask->rdma_regd[ISER_DIR_IN], 0,
693 sizeof(struct iser_regd_buf));
694 memset(&iser_ctask->rdma_regd[ISER_DIR_OUT], 0,
695 sizeof(struct iser_regd_buf));
696}
697
698void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask)
699{
700 int deferred;
701
702 /* if we were reading, copy back to unaligned sglist,
703 * anyway dma_unmap and free the copy
704 */
705 if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL)
706 iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN);
707 if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL)
708 iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT);
709
710 if (iser_ctask->dir[ISER_DIR_IN]) {
711 deferred = iser_regd_buff_release
712 (&iser_ctask->rdma_regd[ISER_DIR_IN]);
713 if (deferred) {
714 iser_err("References remain for BUF-IN rdma reg\n");
715 BUG();
716 }
717 }
718
719 if (iser_ctask->dir[ISER_DIR_OUT]) {
720 deferred = iser_regd_buff_release
721 (&iser_ctask->rdma_regd[ISER_DIR_OUT]);
722 if (deferred) {
723 iser_err("References remain for BUF-OUT rdma reg\n");
724 BUG();
725 }
726 }
727
728 iser_dma_unmap_task_data(iser_ctask);
729}
730
731void iser_dto_buffs_release(struct iser_dto *dto)
732{
733 int i;
734
735 for (i = 0; i < dto->regd_vector_len; i++)
736 iser_regd_buff_release(dto->regd[i]);
737}
738
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
new file mode 100644
index 000000000000..31950a522a1c
--- /dev/null
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -0,0 +1,401 @@
1/*
2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: iser_memory.c 6964 2006-05-07 11:11:43Z ogerlitz $
33 */
34#include <linux/module.h>
35#include <linux/kernel.h>
36#include <linux/slab.h>
37#include <linux/mm.h>
38#include <asm/io.h>
39#include <asm/scatterlist.h>
40#include <linux/scatterlist.h>
41
42#include "iscsi_iser.h"
43
44#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
45/**
46 * Decrements the reference count for the
47 * registered buffer & releases it
48 *
49 * returns 0 if released, 1 if deferred
50 */
51int iser_regd_buff_release(struct iser_regd_buf *regd_buf)
52{
53 struct device *dma_device;
54
55 if ((atomic_read(&regd_buf->ref_count) == 0) ||
56 atomic_dec_and_test(&regd_buf->ref_count)) {
57 /* if we used the dma mr, unreg is just NOP */
58 if (regd_buf->reg.rkey != 0)
59 iser_unreg_mem(&regd_buf->reg);
60
61 if (regd_buf->dma_addr) {
62 dma_device = regd_buf->device->ib_device->dma_device;
63 dma_unmap_single(dma_device,
64 regd_buf->dma_addr,
65 regd_buf->data_size,
66 regd_buf->direction);
67 }
68 /* else this regd buf is associated with task which we */
69 /* dma_unmap_single/sg later */
70 return 0;
71 } else {
72 iser_dbg("Release deferred, regd.buff: 0x%p\n", regd_buf);
73 return 1;
74 }
75}
76
77/**
78 * iser_reg_single - fills registered buffer descriptor with
79 * registration information
80 */
81void iser_reg_single(struct iser_device *device,
82 struct iser_regd_buf *regd_buf,
83 enum dma_data_direction direction)
84{
85 dma_addr_t dma_addr;
86
87 dma_addr = dma_map_single(device->ib_device->dma_device,
88 regd_buf->virt_addr,
89 regd_buf->data_size, direction);
90 BUG_ON(dma_mapping_error(dma_addr));
91
92 regd_buf->reg.lkey = device->mr->lkey;
93 regd_buf->reg.rkey = 0; /* indicate there's no need to unreg */
94 regd_buf->reg.len = regd_buf->data_size;
95 regd_buf->reg.va = dma_addr;
96
97 regd_buf->dma_addr = dma_addr;
98 regd_buf->direction = direction;
99}
100
101/**
102 * iser_start_rdma_unaligned_sg
103 */
104int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,
105 enum iser_data_dir cmd_dir)
106{
107 int dma_nents;
108 struct device *dma_device;
109 char *mem = NULL;
110 struct iser_data_buf *data = &iser_ctask->data[cmd_dir];
111 unsigned long cmd_data_len = data->data_len;
112
113 if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
114 mem = (void *)__get_free_pages(GFP_NOIO,
115 long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
116 else
117 mem = kmalloc(cmd_data_len, GFP_NOIO);
118
119 if (mem == NULL) {
120 iser_err("Failed to allocate mem size %d %d for copying sglist\n",
121 data->size,(int)cmd_data_len);
122 return -ENOMEM;
123 }
124
125 if (cmd_dir == ISER_DIR_OUT) {
126 /* copy the unaligned sg the buffer which is used for RDMA */
127 struct scatterlist *sg = (struct scatterlist *)data->buf;
128 int i;
129 char *p, *from;
130
131 for (p = mem, i = 0; i < data->size; i++) {
132 from = kmap_atomic(sg[i].page, KM_USER0);
133 memcpy(p,
134 from + sg[i].offset,
135 sg[i].length);
136 kunmap_atomic(from, KM_USER0);
137 p += sg[i].length;
138 }
139 }
140
141 sg_init_one(&iser_ctask->data_copy[cmd_dir].sg_single, mem, cmd_data_len);
142 iser_ctask->data_copy[cmd_dir].buf =
143 &iser_ctask->data_copy[cmd_dir].sg_single;
144 iser_ctask->data_copy[cmd_dir].size = 1;
145
146 iser_ctask->data_copy[cmd_dir].copy_buf = mem;
147
148 dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
149
150 if (cmd_dir == ISER_DIR_OUT)
151 dma_nents = dma_map_sg(dma_device,
152 &iser_ctask->data_copy[cmd_dir].sg_single,
153 1, DMA_TO_DEVICE);
154 else
155 dma_nents = dma_map_sg(dma_device,
156 &iser_ctask->data_copy[cmd_dir].sg_single,
157 1, DMA_FROM_DEVICE);
158
159 BUG_ON(dma_nents == 0);
160
161 iser_ctask->data_copy[cmd_dir].dma_nents = dma_nents;
162 return 0;
163}
164
165/**
166 * iser_finalize_rdma_unaligned_sg
167 */
168void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,
169 enum iser_data_dir cmd_dir)
170{
171 struct device *dma_device;
172 struct iser_data_buf *mem_copy;
173 unsigned long cmd_data_len;
174
175 dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
176 mem_copy = &iser_ctask->data_copy[cmd_dir];
177
178 if (cmd_dir == ISER_DIR_OUT)
179 dma_unmap_sg(dma_device, &mem_copy->sg_single, 1,
180 DMA_TO_DEVICE);
181 else
182 dma_unmap_sg(dma_device, &mem_copy->sg_single, 1,
183 DMA_FROM_DEVICE);
184
185 if (cmd_dir == ISER_DIR_IN) {
186 char *mem;
187 struct scatterlist *sg;
188 unsigned char *p, *to;
189 unsigned int sg_size;
190 int i;
191
192 /* copy back read RDMA to unaligned sg */
193 mem = mem_copy->copy_buf;
194
195 sg = (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf;
196 sg_size = iser_ctask->data[ISER_DIR_IN].size;
197
198 for (p = mem, i = 0; i < sg_size; i++){
199 to = kmap_atomic(sg[i].page, KM_SOFTIRQ0);
200 memcpy(to + sg[i].offset,
201 p,
202 sg[i].length);
203 kunmap_atomic(to, KM_SOFTIRQ0);
204 p += sg[i].length;
205 }
206 }
207
208 cmd_data_len = iser_ctask->data[cmd_dir].data_len;
209
210 if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
211 free_pages((unsigned long)mem_copy->copy_buf,
212 long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
213 else
214 kfree(mem_copy->copy_buf);
215
216 mem_copy->copy_buf = NULL;
217}
218
219/**
220 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
221 * and returns the length of resulting physical address array (may be less than
222 * the original due to possible compaction).
223 *
224 * we build a "page vec" under the assumption that the SG meets the RDMA
225 * alignment requirements. Other then the first and last SG elements, all
226 * the "internal" elements can be compacted into a list whose elements are
227 * dma addresses of physical pages. The code supports also the weird case
228 * where --few fragments of the same page-- are present in the SG as
229 * consecutive elements. Also, it handles one entry SG.
230 */
231static int iser_sg_to_page_vec(struct iser_data_buf *data,
232 struct iser_page_vec *page_vec)
233{
234 struct scatterlist *sg = (struct scatterlist *)data->buf;
235 dma_addr_t first_addr, last_addr, page;
236 int start_aligned, end_aligned;
237 unsigned int cur_page = 0;
238 unsigned long total_sz = 0;
239 int i;
240
241 /* compute the offset of first element */
242 page_vec->offset = (u64) sg[0].offset;
243
244 for (i = 0; i < data->dma_nents; i++) {
245 total_sz += sg_dma_len(&sg[i]);
246
247 first_addr = sg_dma_address(&sg[i]);
248 last_addr = first_addr + sg_dma_len(&sg[i]);
249
250 start_aligned = !(first_addr & ~PAGE_MASK);
251 end_aligned = !(last_addr & ~PAGE_MASK);
252
253 /* continue to collect page fragments till aligned or SG ends */
254 while (!end_aligned && (i + 1 < data->dma_nents)) {
255 i++;
256 total_sz += sg_dma_len(&sg[i]);
257 last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]);
258 end_aligned = !(last_addr & ~PAGE_MASK);
259 }
260
261 first_addr = first_addr & PAGE_MASK;
262
263 for (page = first_addr; page < last_addr; page += PAGE_SIZE)
264 page_vec->pages[cur_page++] = page;
265
266 }
267 page_vec->data_size = total_sz;
268 iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
269 return cur_page;
270}
271
272#define MASK_4K ((1UL << 12) - 1) /* 0xFFF */
273#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & MASK_4K) == 0)
274
275/**
276 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
277 * for RDMA sub-list of a scatter-gather list of memory buffers, and returns
278 * the number of entries which are aligned correctly. Supports the case where
279 * consecutive SG elements are actually fragments of the same physcial page.
280 */
281static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data)
282{
283 struct scatterlist *sg;
284 dma_addr_t end_addr, next_addr;
285 int i, cnt;
286 unsigned int ret_len = 0;
287
288 sg = (struct scatterlist *)data->buf;
289
290 for (cnt = 0, i = 0; i < data->dma_nents; i++, cnt++) {
291 /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
292 "offset: %ld sz: %ld\n", i,
293 (unsigned long)page_to_phys(sg[i].page),
294 (unsigned long)sg[i].offset,
295 (unsigned long)sg[i].length); */
296 end_addr = sg_dma_address(&sg[i]) +
297 sg_dma_len(&sg[i]);
298 /* iser_dbg("Checking sg iobuf end address "
299 "0x%08lX\n", end_addr); */
300 if (i + 1 < data->dma_nents) {
301 next_addr = sg_dma_address(&sg[i+1]);
302 /* are i, i+1 fragments of the same page? */
303 if (end_addr == next_addr)
304 continue;
305 else if (!IS_4K_ALIGNED(end_addr)) {
306 ret_len = cnt + 1;
307 break;
308 }
309 }
310 }
311 if (i == data->dma_nents)
312 ret_len = cnt; /* loop ended */
313 iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
314 ret_len, data->dma_nents, data);
315 return ret_len;
316}
317
318static void iser_data_buf_dump(struct iser_data_buf *data)
319{
320 struct scatterlist *sg = (struct scatterlist *)data->buf;
321 int i;
322
323 for (i = 0; i < data->size; i++)
324 iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
325 "off:%d sz:%d dma_len:%d\n",
326 i, (unsigned long)sg_dma_address(&sg[i]),
327 sg[i].page, sg[i].offset,
328 sg[i].length,sg_dma_len(&sg[i]));
329}
330
331static void iser_dump_page_vec(struct iser_page_vec *page_vec)
332{
333 int i;
334
335 iser_err("page vec length %d data size %d\n",
336 page_vec->length, page_vec->data_size);
337 for (i = 0; i < page_vec->length; i++)
338 iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
339}
340
341static void iser_page_vec_build(struct iser_data_buf *data,
342 struct iser_page_vec *page_vec)
343{
344 int page_vec_len = 0;
345
346 page_vec->length = 0;
347 page_vec->offset = 0;
348
349 iser_dbg("Translating sg sz: %d\n", data->dma_nents);
350 page_vec_len = iser_sg_to_page_vec(data,page_vec);
351 iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len);
352
353 page_vec->length = page_vec_len;
354
355 if (page_vec_len * PAGE_SIZE < page_vec->data_size) {
356 iser_err("page_vec too short to hold this SG\n");
357 iser_data_buf_dump(data);
358 iser_dump_page_vec(page_vec);
359 BUG();
360 }
361}
362
363/**
364 * iser_reg_rdma_mem - Registers memory intended for RDMA,
365 * obtaining rkey and va
366 *
367 * returns 0 on success, errno code on failure
368 */
369int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
370 enum iser_data_dir cmd_dir)
371{
372 struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn;
373 struct iser_data_buf *mem = &iser_ctask->data[cmd_dir];
374 struct iser_regd_buf *regd_buf;
375 int aligned_len;
376 int err;
377
378 regd_buf = &iser_ctask->rdma_regd[cmd_dir];
379
380 aligned_len = iser_data_buf_aligned_len(mem);
381 if (aligned_len != mem->size) {
382 iser_err("rdma alignment violation %d/%d aligned\n",
383 aligned_len, mem->size);
384 iser_data_buf_dump(mem);
385 /* allocate copy buf, if we are writing, copy the */
386 /* unaligned scatterlist, dma map the copy */
387 if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0)
388 return -ENOMEM;
389 mem = &iser_ctask->data_copy[cmd_dir];
390 }
391
392 iser_page_vec_build(mem, ib_conn->page_vec);
393 err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
394 if (err)
395 return err;
396
397 /* take a reference on this regd buf such that it will not be released *
398 * (eg in send dto completion) before we get the scsi response */
399 atomic_inc(&regd_buf->ref_count);
400 return 0;
401}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
new file mode 100644
index 000000000000..ff117bbf81b4
--- /dev/null
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -0,0 +1,827 @@
1/*
2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $
34 */
35#include <asm/io.h>
36#include <linux/kernel.h>
37#include <linux/module.h>
38#include <linux/smp_lock.h>
39#include <linux/delay.h>
40#include <linux/version.h>
41
42#include "iscsi_iser.h"
43
44#define ISCSI_ISER_MAX_CONN 8
45#define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \
46 ISER_QP_MAX_REQ_DTOS) * \
47 ISCSI_ISER_MAX_CONN)
48
49static void iser_cq_tasklet_fn(unsigned long data);
50static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
51static void iser_comp_error_worker(void *data);
52
53static void iser_cq_event_callback(struct ib_event *cause, void *context)
54{
55 iser_err("got cq event %d \n", cause->event);
56}
57
58static void iser_qp_event_callback(struct ib_event *cause, void *context)
59{
60 iser_err("got qp event %d\n",cause->event);
61}
62
63/**
64 * iser_create_device_ib_res - creates Protection Domain (PD), Completion
65 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
66 * the adapator.
67 *
68 * returns 0 on success, -1 on failure
69 */
70static int iser_create_device_ib_res(struct iser_device *device)
71{
72 device->pd = ib_alloc_pd(device->ib_device);
73 if (IS_ERR(device->pd))
74 goto pd_err;
75
76 device->cq = ib_create_cq(device->ib_device,
77 iser_cq_callback,
78 iser_cq_event_callback,
79 (void *)device,
80 ISER_MAX_CQ_LEN);
81 if (IS_ERR(device->cq))
82 goto cq_err;
83
84 if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP))
85 goto cq_arm_err;
86
87 tasklet_init(&device->cq_tasklet,
88 iser_cq_tasklet_fn,
89 (unsigned long)device);
90
91 device->mr = ib_get_dma_mr(device->pd,
92 IB_ACCESS_LOCAL_WRITE);
93 if (IS_ERR(device->mr))
94 goto dma_mr_err;
95
96 return 0;
97
98dma_mr_err:
99 tasklet_kill(&device->cq_tasklet);
100cq_arm_err:
101 ib_destroy_cq(device->cq);
102cq_err:
103 ib_dealloc_pd(device->pd);
104pd_err:
105 iser_err("failed to allocate an IB resource\n");
106 return -1;
107}
108
109/**
110 * iser_free_device_ib_res - destory/dealloc/dereg the DMA MR,
111 * CQ and PD created with the device associated with the adapator.
112 */
113static void iser_free_device_ib_res(struct iser_device *device)
114{
115 BUG_ON(device->mr == NULL);
116
117 tasklet_kill(&device->cq_tasklet);
118
119 (void)ib_dereg_mr(device->mr);
120 (void)ib_destroy_cq(device->cq);
121 (void)ib_dealloc_pd(device->pd);
122
123 device->mr = NULL;
124 device->cq = NULL;
125 device->pd = NULL;
126}
127
128/**
129 * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP)
130 *
131 * returns 0 on success, -1 on failure
132 */
133static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
134{
135 struct iser_device *device;
136 struct ib_qp_init_attr init_attr;
137 int ret;
138 struct ib_fmr_pool_param params;
139
140 BUG_ON(ib_conn->device == NULL);
141
142 device = ib_conn->device;
143
144 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
145 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
146 GFP_KERNEL);
147 if (!ib_conn->page_vec) {
148 ret = -ENOMEM;
149 goto alloc_err;
150 }
151 ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
152
153 params.page_shift = PAGE_SHIFT;
154 /* when the first/last SG element are not start/end *
155 * page aligned, the map whould be of N+1 pages */
156 params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
157 /* make the pool size twice the max number of SCSI commands *
158 * the ML is expected to queue, watermark for unmap at 50% */
159 params.pool_size = ISCSI_XMIT_CMDS_MAX * 2;
160 params.dirty_watermark = ISCSI_XMIT_CMDS_MAX;
161 params.cache = 0;
162 params.flush_function = NULL;
163 params.access = (IB_ACCESS_LOCAL_WRITE |
164 IB_ACCESS_REMOTE_WRITE |
165 IB_ACCESS_REMOTE_READ);
166
167 ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
168 if (IS_ERR(ib_conn->fmr_pool)) {
169 ret = PTR_ERR(ib_conn->fmr_pool);
170 goto fmr_pool_err;
171 }
172
173 memset(&init_attr, 0, sizeof init_attr);
174
175 init_attr.event_handler = iser_qp_event_callback;
176 init_attr.qp_context = (void *)ib_conn;
177 init_attr.send_cq = device->cq;
178 init_attr.recv_cq = device->cq;
179 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
180 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
181 init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN;
182 init_attr.cap.max_recv_sge = 2;
183 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
184 init_attr.qp_type = IB_QPT_RC;
185
186 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
187 if (ret)
188 goto qp_err;
189
190 ib_conn->qp = ib_conn->cma_id->qp;
191 iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
192 ib_conn, ib_conn->cma_id,
193 ib_conn->fmr_pool, ib_conn->cma_id->qp);
194 return ret;
195
196qp_err:
197 (void)ib_destroy_fmr_pool(ib_conn->fmr_pool);
198fmr_pool_err:
199 kfree(ib_conn->page_vec);
200alloc_err:
201 iser_err("unable to alloc mem or create resource, err %d\n", ret);
202 return ret;
203}
204
205/**
206 * releases the FMR pool, QP and CMA ID objects, returns 0 on success,
207 * -1 on failure
208 */
209static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
210{
211 BUG_ON(ib_conn == NULL);
212
213 iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
214 ib_conn, ib_conn->cma_id,
215 ib_conn->fmr_pool, ib_conn->qp);
216
217 /* qp is created only once both addr & route are resolved */
218 if (ib_conn->fmr_pool != NULL)
219 ib_destroy_fmr_pool(ib_conn->fmr_pool);
220
221 if (ib_conn->qp != NULL)
222 rdma_destroy_qp(ib_conn->cma_id);
223
224 if (ib_conn->cma_id != NULL)
225 rdma_destroy_id(ib_conn->cma_id);
226
227 ib_conn->fmr_pool = NULL;
228 ib_conn->qp = NULL;
229 ib_conn->cma_id = NULL;
230 kfree(ib_conn->page_vec);
231
232 return 0;
233}
234
235/**
236 * based on the resolved device node GUID see if there already allocated
237 * device for this device. If there's no such, create one.
238 */
239static
240struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
241{
242 struct list_head *p_list;
243 struct iser_device *device = NULL;
244
245 mutex_lock(&ig.device_list_mutex);
246
247 p_list = ig.device_list.next;
248 while (p_list != &ig.device_list) {
249 device = list_entry(p_list, struct iser_device, ig_list);
250 /* find if there's a match using the node GUID */
251 if (device->ib_device->node_guid == cma_id->device->node_guid)
252 break;
253 }
254
255 if (device == NULL) {
256 device = kzalloc(sizeof *device, GFP_KERNEL);
257 if (device == NULL)
258 goto out;
259 /* assign this device to the device */
260 device->ib_device = cma_id->device;
261 /* init the device and link it into ig device list */
262 if (iser_create_device_ib_res(device)) {
263 kfree(device);
264 device = NULL;
265 goto out;
266 }
267 list_add(&device->ig_list, &ig.device_list);
268 }
269out:
270 BUG_ON(device == NULL);
271 device->refcount++;
272 mutex_unlock(&ig.device_list_mutex);
273 return device;
274}
275
276/* if there's no demand for this device, release it */
277static void iser_device_try_release(struct iser_device *device)
278{
279 mutex_lock(&ig.device_list_mutex);
280 device->refcount--;
281 iser_err("device %p refcount %d\n",device,device->refcount);
282 if (!device->refcount) {
283 iser_free_device_ib_res(device);
284 list_del(&device->ig_list);
285 kfree(device);
286 }
287 mutex_unlock(&ig.device_list_mutex);
288}
289
290int iser_conn_state_comp(struct iser_conn *ib_conn,
291 enum iser_ib_conn_state comp)
292{
293 int ret;
294
295 spin_lock_bh(&ib_conn->lock);
296 ret = (ib_conn->state == comp);
297 spin_unlock_bh(&ib_conn->lock);
298 return ret;
299}
300
301static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
302 enum iser_ib_conn_state comp,
303 enum iser_ib_conn_state exch)
304{
305 int ret;
306
307 spin_lock_bh(&ib_conn->lock);
308 if ((ret = (ib_conn->state == comp)))
309 ib_conn->state = exch;
310 spin_unlock_bh(&ib_conn->lock);
311 return ret;
312}
313
314/**
315 * triggers start of the disconnect procedures and wait for them to be done
316 */
317void iser_conn_terminate(struct iser_conn *ib_conn)
318{
319 int err = 0;
320
321 /* change the ib conn state only if the conn is UP, however always call
322 * rdma_disconnect since this is the only way to cause the CMA to change
323 * the QP state to ERROR
324 */
325
326 iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING);
327 err = rdma_disconnect(ib_conn->cma_id);
328 if (err)
329 iser_err("Failed to disconnect, conn: 0x%p err %d\n",
330 ib_conn,err);
331
332 wait_event_interruptible(ib_conn->wait,
333 ib_conn->state == ISER_CONN_DOWN);
334
335 iser_conn_release(ib_conn);
336}
337
338static void iser_connect_error(struct rdma_cm_id *cma_id)
339{
340 struct iser_conn *ib_conn;
341 ib_conn = (struct iser_conn *)cma_id->context;
342
343 ib_conn->state = ISER_CONN_DOWN;
344 wake_up_interruptible(&ib_conn->wait);
345}
346
347static void iser_addr_handler(struct rdma_cm_id *cma_id)
348{
349 struct iser_device *device;
350 struct iser_conn *ib_conn;
351 int ret;
352
353 device = iser_device_find_by_ib_device(cma_id);
354 ib_conn = (struct iser_conn *)cma_id->context;
355 ib_conn->device = device;
356
357 ret = rdma_resolve_route(cma_id, 1000);
358 if (ret) {
359 iser_err("resolve route failed: %d\n", ret);
360 iser_connect_error(cma_id);
361 }
362 return;
363}
364
365static void iser_route_handler(struct rdma_cm_id *cma_id)
366{
367 struct rdma_conn_param conn_param;
368 int ret;
369
370 ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
371 if (ret)
372 goto failure;
373
374 iser_dbg("path.mtu is %d setting it to %d\n",
375 cma_id->route.path_rec->mtu, IB_MTU_1024);
376
377 /* we must set the MTU to 1024 as this is what the target is assuming */
378 if (cma_id->route.path_rec->mtu > IB_MTU_1024)
379 cma_id->route.path_rec->mtu = IB_MTU_1024;
380
381 memset(&conn_param, 0, sizeof conn_param);
382 conn_param.responder_resources = 4;
383 conn_param.initiator_depth = 1;
384 conn_param.retry_count = 7;
385 conn_param.rnr_retry_count = 6;
386
387 ret = rdma_connect(cma_id, &conn_param);
388 if (ret) {
389 iser_err("failure connecting: %d\n", ret);
390 goto failure;
391 }
392
393 return;
394failure:
395 iser_connect_error(cma_id);
396}
397
398static void iser_connected_handler(struct rdma_cm_id *cma_id)
399{
400 struct iser_conn *ib_conn;
401
402 ib_conn = (struct iser_conn *)cma_id->context;
403 ib_conn->state = ISER_CONN_UP;
404 wake_up_interruptible(&ib_conn->wait);
405}
406
407static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
408{
409 struct iser_conn *ib_conn;
410
411 ib_conn = (struct iser_conn *)cma_id->context;
412 ib_conn->disc_evt_flag = 1;
413
414 /* getting here when the state is UP means that the conn is being *
415 * terminated asynchronously from the iSCSI layer's perspective. */
416 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
417 ISER_CONN_TERMINATING))
418 iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
419 ISCSI_ERR_CONN_FAILED);
420
421 /* Complete the termination process if no posts are pending */
422 if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) &&
423 (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
424 ib_conn->state = ISER_CONN_DOWN;
425 wake_up_interruptible(&ib_conn->wait);
426 }
427}
428
429static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
430{
431 int ret = 0;
432
433 iser_err("event %d conn %p id %p\n",event->event,cma_id->context,cma_id);
434
435 switch (event->event) {
436 case RDMA_CM_EVENT_ADDR_RESOLVED:
437 iser_addr_handler(cma_id);
438 break;
439 case RDMA_CM_EVENT_ROUTE_RESOLVED:
440 iser_route_handler(cma_id);
441 break;
442 case RDMA_CM_EVENT_ESTABLISHED:
443 iser_connected_handler(cma_id);
444 break;
445 case RDMA_CM_EVENT_ADDR_ERROR:
446 case RDMA_CM_EVENT_ROUTE_ERROR:
447 case RDMA_CM_EVENT_CONNECT_ERROR:
448 case RDMA_CM_EVENT_UNREACHABLE:
449 case RDMA_CM_EVENT_REJECTED:
450 iser_err("event: %d, error: %d\n", event->event, event->status);
451 iser_connect_error(cma_id);
452 break;
453 case RDMA_CM_EVENT_DISCONNECTED:
454 iser_disconnected_handler(cma_id);
455 break;
456 case RDMA_CM_EVENT_DEVICE_REMOVAL:
457 BUG();
458 break;
459 case RDMA_CM_EVENT_CONNECT_RESPONSE:
460 BUG();
461 break;
462 case RDMA_CM_EVENT_CONNECT_REQUEST:
463 default:
464 break;
465 }
466 return ret;
467}
468
469int iser_conn_init(struct iser_conn **ibconn)
470{
471 struct iser_conn *ib_conn;
472
473 ib_conn = kzalloc(sizeof *ib_conn, GFP_KERNEL);
474 if (!ib_conn) {
475 iser_err("can't alloc memory for struct iser_conn\n");
476 return -ENOMEM;
477 }
478 ib_conn->state = ISER_CONN_INIT;
479 init_waitqueue_head(&ib_conn->wait);
480 atomic_set(&ib_conn->post_recv_buf_count, 0);
481 atomic_set(&ib_conn->post_send_buf_count, 0);
482 INIT_WORK(&ib_conn->comperror_work, iser_comp_error_worker,
483 ib_conn);
484 INIT_LIST_HEAD(&ib_conn->conn_list);
485 spin_lock_init(&ib_conn->lock);
486
487 *ibconn = ib_conn;
488 return 0;
489}
490
491 /**
492 * starts the process of connecting to the target
493 * sleeps untill the connection is established or rejected
494 */
495int iser_connect(struct iser_conn *ib_conn,
496 struct sockaddr_in *src_addr,
497 struct sockaddr_in *dst_addr,
498 int non_blocking)
499{
500 struct sockaddr *src, *dst;
501 int err = 0;
502
503 sprintf(ib_conn->name,"%d.%d.%d.%d:%d",
504 NIPQUAD(dst_addr->sin_addr.s_addr), dst_addr->sin_port);
505
506 /* the device is known only --after-- address resolution */
507 ib_conn->device = NULL;
508
509 iser_err("connecting to: %d.%d.%d.%d, port 0x%x\n",
510 NIPQUAD(dst_addr->sin_addr), dst_addr->sin_port);
511
512 ib_conn->state = ISER_CONN_PENDING;
513
514 ib_conn->cma_id = rdma_create_id(iser_cma_handler,
515 (void *)ib_conn,
516 RDMA_PS_TCP);
517 if (IS_ERR(ib_conn->cma_id)) {
518 err = PTR_ERR(ib_conn->cma_id);
519 iser_err("rdma_create_id failed: %d\n", err);
520 goto id_failure;
521 }
522
523 src = (struct sockaddr *)src_addr;
524 dst = (struct sockaddr *)dst_addr;
525 err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000);
526 if (err) {
527 iser_err("rdma_resolve_addr failed: %d\n", err);
528 goto addr_failure;
529 }
530
531 if (!non_blocking) {
532 wait_event_interruptible(ib_conn->wait,
533 (ib_conn->state != ISER_CONN_PENDING));
534
535 if (ib_conn->state != ISER_CONN_UP) {
536 err = -EIO;
537 goto connect_failure;
538 }
539 }
540
541 mutex_lock(&ig.connlist_mutex);
542 list_add(&ib_conn->conn_list, &ig.connlist);
543 mutex_unlock(&ig.connlist_mutex);
544 return 0;
545
546id_failure:
547 ib_conn->cma_id = NULL;
548addr_failure:
549 ib_conn->state = ISER_CONN_DOWN;
550connect_failure:
551 iser_conn_release(ib_conn);
552 return err;
553}
554
555/**
556 * Frees all conn objects and deallocs conn descriptor
557 */
558void iser_conn_release(struct iser_conn *ib_conn)
559{
560 struct iser_device *device = ib_conn->device;
561
562 BUG_ON(ib_conn->state != ISER_CONN_DOWN);
563
564 mutex_lock(&ig.connlist_mutex);
565 list_del(&ib_conn->conn_list);
566 mutex_unlock(&ig.connlist_mutex);
567
568 iser_free_ib_conn_res(ib_conn);
569 ib_conn->device = NULL;
570 /* on EVENT_ADDR_ERROR there's no device yet for this conn */
571 if (device != NULL)
572 iser_device_try_release(device);
573 kfree(ib_conn);
574}
575
576
577/**
578 * iser_reg_page_vec - Register physical memory
579 *
580 * returns: 0 on success, errno code on failure
581 */
582int iser_reg_page_vec(struct iser_conn *ib_conn,
583 struct iser_page_vec *page_vec,
584 struct iser_mem_reg *mem_reg)
585{
586 struct ib_pool_fmr *mem;
587 u64 io_addr;
588 u64 *page_list;
589 int status;
590
591 page_list = page_vec->pages;
592 io_addr = page_list[0];
593
594 mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool,
595 page_list,
596 page_vec->length,
597 &io_addr);
598
599 if (IS_ERR(mem)) {
600 status = (int)PTR_ERR(mem);
601 iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
602 return status;
603 }
604
605 mem_reg->lkey = mem->fmr->lkey;
606 mem_reg->rkey = mem->fmr->rkey;
607 mem_reg->len = page_vec->length * PAGE_SIZE;
608 mem_reg->va = io_addr;
609 mem_reg->mem_h = (void *)mem;
610
611 mem_reg->va += page_vec->offset;
612 mem_reg->len = page_vec->data_size;
613
614 iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
615 "entry[0]: (0x%08lx,%ld)] -> "
616 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
617 page_vec, page_vec->length,
618 (unsigned long)page_vec->pages[0],
619 (unsigned long)page_vec->data_size,
620 (unsigned int)mem_reg->lkey, mem_reg->mem_h,
621 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
622 return 0;
623}
624
625/**
626 * Unregister (previosuly registered) memory.
627 */
628void iser_unreg_mem(struct iser_mem_reg *reg)
629{
630 int ret;
631
632 iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
633
634 ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
635 if (ret)
636 iser_err("ib_fmr_pool_unmap failed %d\n", ret);
637
638 reg->mem_h = NULL;
639}
640
641/**
642 * iser_dto_to_iov - builds IOV from a dto descriptor
643 */
644static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len)
645{
646 int i;
647 struct ib_sge *sge;
648 struct iser_regd_buf *regd_buf;
649
650 if (dto->regd_vector_len > iov_len) {
651 iser_err("iov size %d too small for posting dto of len %d\n",
652 iov_len, dto->regd_vector_len);
653 BUG();
654 }
655
656 for (i = 0; i < dto->regd_vector_len; i++) {
657 sge = &iov[i];
658 regd_buf = dto->regd[i];
659
660 sge->addr = regd_buf->reg.va;
661 sge->length = regd_buf->reg.len;
662 sge->lkey = regd_buf->reg.lkey;
663
664 if (dto->used_sz[i] > 0) /* Adjust size */
665 sge->length = dto->used_sz[i];
666
667 /* offset and length should not exceed the regd buf length */
668 if (sge->length + dto->offset[i] > regd_buf->reg.len) {
669 iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:"
670 "%ld in dto:0x%p [%d], va:0x%08lX\n",
671 (unsigned long)sge->length, dto->offset[i],
672 (unsigned long)regd_buf->reg.len, dto, i,
673 (unsigned long)sge->addr);
674 BUG();
675 }
676
677 sge->addr += dto->offset[i]; /* Adjust offset */
678 }
679}
680
681/**
682 * iser_post_recv - Posts a receive buffer.
683 *
684 * returns 0 on success, -1 on failure
685 */
686int iser_post_recv(struct iser_desc *rx_desc)
687{
688 int ib_ret, ret_val = 0;
689 struct ib_recv_wr recv_wr, *recv_wr_failed;
690 struct ib_sge iov[2];
691 struct iser_conn *ib_conn;
692 struct iser_dto *recv_dto = &rx_desc->dto;
693
694 /* Retrieve conn */
695 ib_conn = recv_dto->conn->ib_conn;
696
697 iser_dto_to_iov(recv_dto, iov, 2);
698
699 recv_wr.next = NULL;
700 recv_wr.sg_list = iov;
701 recv_wr.num_sge = recv_dto->regd_vector_len;
702 recv_wr.wr_id = (unsigned long)rx_desc;
703
704 atomic_inc(&ib_conn->post_recv_buf_count);
705 ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed);
706 if (ib_ret) {
707 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
708 atomic_dec(&ib_conn->post_recv_buf_count);
709 ret_val = -1;
710 }
711
712 return ret_val;
713}
714
715/**
716 * iser_start_send - Initiate a Send DTO operation
717 *
718 * returns 0 on success, -1 on failure
719 */
720int iser_post_send(struct iser_desc *tx_desc)
721{
722 int ib_ret, ret_val = 0;
723 struct ib_send_wr send_wr, *send_wr_failed;
724 struct ib_sge iov[MAX_REGD_BUF_VECTOR_LEN];
725 struct iser_conn *ib_conn;
726 struct iser_dto *dto = &tx_desc->dto;
727
728 ib_conn = dto->conn->ib_conn;
729
730 iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN);
731
732 send_wr.next = NULL;
733 send_wr.wr_id = (unsigned long)tx_desc;
734 send_wr.sg_list = iov;
735 send_wr.num_sge = dto->regd_vector_len;
736 send_wr.opcode = IB_WR_SEND;
737 send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0;
738
739 atomic_inc(&ib_conn->post_send_buf_count);
740
741 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
742 if (ib_ret) {
743 iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n",
744 dto, dto->regd_vector_len);
745 iser_err("ib_post_send failed, ret:%d\n", ib_ret);
746 atomic_dec(&ib_conn->post_send_buf_count);
747 ret_val = -1;
748 }
749
750 return ret_val;
751}
752
753static void iser_comp_error_worker(void *data)
754{
755 struct iser_conn *ib_conn = data;
756
757 /* getting here when the state is UP means that the conn is being *
758 * terminated asynchronously from the iSCSI layer's perspective. */
759 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
760 ISER_CONN_TERMINATING))
761 iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
762 ISCSI_ERR_CONN_FAILED);
763
764 /* complete the termination process if disconnect event was delivered *
765 * note there are no more non completed posts to the QP */
766 if (ib_conn->disc_evt_flag) {
767 ib_conn->state = ISER_CONN_DOWN;
768 wake_up_interruptible(&ib_conn->wait);
769 }
770}
771
772static void iser_handle_comp_error(struct iser_desc *desc)
773{
774 struct iser_dto *dto = &desc->dto;
775 struct iser_conn *ib_conn = dto->conn->ib_conn;
776
777 iser_dto_buffs_release(dto);
778
779 if (desc->type == ISCSI_RX) {
780 kfree(desc->data);
781 kmem_cache_free(ig.desc_cache, desc);
782 atomic_dec(&ib_conn->post_recv_buf_count);
783 } else { /* type is TX control/command/dataout */
784 if (desc->type == ISCSI_TX_DATAOUT)
785 kmem_cache_free(ig.desc_cache, desc);
786 atomic_dec(&ib_conn->post_send_buf_count);
787 }
788
789 if (atomic_read(&ib_conn->post_recv_buf_count) == 0 &&
790 atomic_read(&ib_conn->post_send_buf_count) == 0)
791 schedule_work(&ib_conn->comperror_work);
792}
793
794static void iser_cq_tasklet_fn(unsigned long data)
795{
796 struct iser_device *device = (struct iser_device *)data;
797 struct ib_cq *cq = device->cq;
798 struct ib_wc wc;
799 struct iser_desc *desc;
800 unsigned long xfer_len;
801
802 while (ib_poll_cq(cq, 1, &wc) == 1) {
803 desc = (struct iser_desc *) (unsigned long) wc.wr_id;
804 BUG_ON(desc == NULL);
805
806 if (wc.status == IB_WC_SUCCESS) {
807 if (desc->type == ISCSI_RX) {
808 xfer_len = (unsigned long)wc.byte_len;
809 iser_rcv_completion(desc, xfer_len);
810 } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */
811 iser_snd_completion(desc);
812 } else {
813 iser_err("comp w. error op %d status %d\n",desc->type,wc.status);
814 iser_handle_comp_error(desc);
815 }
816 }
817 /* #warning "it is assumed here that arming CQ only once its empty" *
818 * " would not cause interrupts to be missed" */
819 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
820}
821
822static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
823{
824 struct iser_device *device = (struct iser_device *)cq_context;
825
826 tasklet_schedule(&device->cq_tasklet);
827}