aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds/iw.c
diff options
context:
space:
mode:
authorAndy Grover <andy.grover@oracle.com>2009-02-24 10:30:36 -0500
committerDavid S. Miller <davem@davemloft.net>2009-02-27 02:39:33 -0500
commitfcd8b7c0ecf792dd824b2edcd63cb2c08563c340 (patch)
tree9ba10fe626cc9129f9138bc81b1fa61c74097e4f /net/rds/iw.c
parente6babe4cc4ce48577d743cc0de19a214f2527956 (diff)
RDS: Add iWARP support
Support for iWARP NICs is implemented as a separate RDS transport from IB. The code, however, is very similar to IB (it was forked, basically.) so let's keep it in one changeset. The reason for this duplicationis that despite its similarity to IB, there are a number of places where it has different semantics. iwarp zcopy support is still under development, and giving it its own sandbox ensures that IB code isn't disrupted while iwarp changes. Over time these transports will re-converge. Signed-off-by: Andy Grover <andy.grover@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/iw.c')
-rw-r--r--net/rds/iw.c333
1 files changed, 333 insertions, 0 deletions
diff --git a/net/rds/iw.c b/net/rds/iw.c
new file mode 100644
index 000000000000..1b56905c4c08
--- /dev/null
+++ b/net/rds/iw.c
@@ -0,0 +1,333 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/if.h>
36#include <linux/netdevice.h>
37#include <linux/inetdevice.h>
38#include <linux/if_arp.h>
39#include <linux/delay.h>
40
41#include "rds.h"
42#include "iw.h"
43
44unsigned int fastreg_pool_size = RDS_FASTREG_POOL_SIZE;
45unsigned int fastreg_message_size = RDS_FASTREG_SIZE + 1; /* +1 allows for unaligned MRs */
46
47module_param(fastreg_pool_size, int, 0444);
48MODULE_PARM_DESC(fastreg_pool_size, " Max number of fastreg MRs per device");
49module_param(fastreg_message_size, int, 0444);
50MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MRs)");
51
52struct list_head rds_iw_devices;
53
54DEFINE_SPINLOCK(iw_nodev_conns_lock);
55LIST_HEAD(iw_nodev_conns);
56
57void rds_iw_add_one(struct ib_device *device)
58{
59 struct rds_iw_device *rds_iwdev;
60 struct ib_device_attr *dev_attr;
61
62 /* Only handle iwarp devices */
63 if (device->node_type != RDMA_NODE_RNIC)
64 return;
65
66 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
67 if (!dev_attr)
68 return;
69
70 if (ib_query_device(device, dev_attr)) {
71 rdsdebug("Query device failed for %s\n", device->name);
72 goto free_attr;
73 }
74
75 rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
76 if (!rds_iwdev)
77 goto free_attr;
78
79 spin_lock_init(&rds_iwdev->spinlock);
80
81 rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
82 rds_iwdev->max_wrs = dev_attr->max_qp_wr;
83 rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE);
84
85 rds_iwdev->page_shift = max(PAGE_SHIFT, ffs(dev_attr->page_size_cap) - 1);
86
87 rds_iwdev->dev = device;
88 rds_iwdev->pd = ib_alloc_pd(device);
89 if (IS_ERR(rds_iwdev->pd))
90 goto free_dev;
91
92 if (!rds_iwdev->dma_local_lkey) {
93 if (device->node_type != RDMA_NODE_RNIC) {
94 rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd,
95 IB_ACCESS_LOCAL_WRITE);
96 } else {
97 rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd,
98 IB_ACCESS_REMOTE_READ |
99 IB_ACCESS_REMOTE_WRITE |
100 IB_ACCESS_LOCAL_WRITE);
101 }
102 if (IS_ERR(rds_iwdev->mr))
103 goto err_pd;
104 } else
105 rds_iwdev->mr = NULL;
106
107 rds_iwdev->mr_pool = rds_iw_create_mr_pool(rds_iwdev);
108 if (IS_ERR(rds_iwdev->mr_pool)) {
109 rds_iwdev->mr_pool = NULL;
110 goto err_mr;
111 }
112
113 INIT_LIST_HEAD(&rds_iwdev->cm_id_list);
114 INIT_LIST_HEAD(&rds_iwdev->conn_list);
115 list_add_tail(&rds_iwdev->list, &rds_iw_devices);
116
117 ib_set_client_data(device, &rds_iw_client, rds_iwdev);
118
119 goto free_attr;
120
121err_mr:
122 if (rds_iwdev->mr)
123 ib_dereg_mr(rds_iwdev->mr);
124err_pd:
125 ib_dealloc_pd(rds_iwdev->pd);
126free_dev:
127 kfree(rds_iwdev);
128free_attr:
129 kfree(dev_attr);
130}
131
132void rds_iw_remove_one(struct ib_device *device)
133{
134 struct rds_iw_device *rds_iwdev;
135 struct rds_iw_cm_id *i_cm_id, *next;
136
137 rds_iwdev = ib_get_client_data(device, &rds_iw_client);
138 if (!rds_iwdev)
139 return;
140
141 spin_lock_irq(&rds_iwdev->spinlock);
142 list_for_each_entry_safe(i_cm_id, next, &rds_iwdev->cm_id_list, list) {
143 list_del(&i_cm_id->list);
144 kfree(i_cm_id);
145 }
146 spin_unlock_irq(&rds_iwdev->spinlock);
147
148 rds_iw_remove_conns(rds_iwdev);
149
150 if (rds_iwdev->mr_pool)
151 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
152
153 if (rds_iwdev->mr)
154 ib_dereg_mr(rds_iwdev->mr);
155
156 while (ib_dealloc_pd(rds_iwdev->pd)) {
157 rdsdebug("Failed to dealloc pd %p\n", rds_iwdev->pd);
158 msleep(1);
159 }
160
161 list_del(&rds_iwdev->list);
162 kfree(rds_iwdev);
163}
164
165struct ib_client rds_iw_client = {
166 .name = "rds_iw",
167 .add = rds_iw_add_one,
168 .remove = rds_iw_remove_one
169};
170
171static int rds_iw_conn_info_visitor(struct rds_connection *conn,
172 void *buffer)
173{
174 struct rds_info_rdma_connection *iinfo = buffer;
175 struct rds_iw_connection *ic;
176
177 /* We will only ever look at IB transports */
178 if (conn->c_trans != &rds_iw_transport)
179 return 0;
180
181 iinfo->src_addr = conn->c_laddr;
182 iinfo->dst_addr = conn->c_faddr;
183
184 memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
185 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
186 if (rds_conn_state(conn) == RDS_CONN_UP) {
187 struct rds_iw_device *rds_iwdev;
188 struct rdma_dev_addr *dev_addr;
189
190 ic = conn->c_transport_data;
191 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
192
193 ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
194 ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
195
196 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
197 iinfo->max_send_wr = ic->i_send_ring.w_nr;
198 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
199 iinfo->max_send_sge = rds_iwdev->max_sge;
200 rds_iw_get_mr_info(rds_iwdev, iinfo);
201 }
202 return 1;
203}
204
205static void rds_iw_ic_info(struct socket *sock, unsigned int len,
206 struct rds_info_iterator *iter,
207 struct rds_info_lengths *lens)
208{
209 rds_for_each_conn_info(sock, len, iter, lens,
210 rds_iw_conn_info_visitor,
211 sizeof(struct rds_info_rdma_connection));
212}
213
214
215/*
216 * Early RDS/IB was built to only bind to an address if there is an IPoIB
217 * device with that address set.
218 *
219 * If it were me, I'd advocate for something more flexible. Sending and
220 * receiving should be device-agnostic. Transports would try and maintain
221 * connections between peers who have messages queued. Userspace would be
222 * allowed to influence which paths have priority. We could call userspace
223 * asserting this policy "routing".
224 */
225static int rds_iw_laddr_check(__be32 addr)
226{
227 int ret;
228 struct rdma_cm_id *cm_id;
229 struct sockaddr_in sin;
230
231 /* Create a CMA ID and try to bind it. This catches both
232 * IB and iWARP capable NICs.
233 */
234 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
235 if (!cm_id)
236 return -EADDRNOTAVAIL;
237
238 memset(&sin, 0, sizeof(sin));
239 sin.sin_family = AF_INET;
240 sin.sin_addr.s_addr = addr;
241
242 /* rdma_bind_addr will only succeed for IB & iWARP devices */
243 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
244 /* due to this, we will claim to support IB devices unless we
245 check node_type. */
246 if (ret || cm_id->device->node_type != RDMA_NODE_RNIC)
247 ret = -EADDRNOTAVAIL;
248
249 rdsdebug("addr %pI4 ret %d node type %d\n",
250 &addr, ret,
251 cm_id->device ? cm_id->device->node_type : -1);
252
253 rdma_destroy_id(cm_id);
254
255 return ret;
256}
257
258void rds_iw_exit(void)
259{
260 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
261 rds_iw_remove_nodev_conns();
262 ib_unregister_client(&rds_iw_client);
263 rds_iw_sysctl_exit();
264 rds_iw_recv_exit();
265 rds_trans_unregister(&rds_iw_transport);
266}
267
268struct rds_transport rds_iw_transport = {
269 .laddr_check = rds_iw_laddr_check,
270 .xmit_complete = rds_iw_xmit_complete,
271 .xmit = rds_iw_xmit,
272 .xmit_cong_map = NULL,
273 .xmit_rdma = rds_iw_xmit_rdma,
274 .recv = rds_iw_recv,
275 .conn_alloc = rds_iw_conn_alloc,
276 .conn_free = rds_iw_conn_free,
277 .conn_connect = rds_iw_conn_connect,
278 .conn_shutdown = rds_iw_conn_shutdown,
279 .inc_copy_to_user = rds_iw_inc_copy_to_user,
280 .inc_purge = rds_iw_inc_purge,
281 .inc_free = rds_iw_inc_free,
282 .cm_initiate_connect = rds_iw_cm_initiate_connect,
283 .cm_handle_connect = rds_iw_cm_handle_connect,
284 .cm_connect_complete = rds_iw_cm_connect_complete,
285 .stats_info_copy = rds_iw_stats_info_copy,
286 .exit = rds_iw_exit,
287 .get_mr = rds_iw_get_mr,
288 .sync_mr = rds_iw_sync_mr,
289 .free_mr = rds_iw_free_mr,
290 .flush_mrs = rds_iw_flush_mrs,
291 .t_owner = THIS_MODULE,
292 .t_name = "iwarp",
293 .t_prefer_loopback = 1,
294};
295
296int __init rds_iw_init(void)
297{
298 int ret;
299
300 INIT_LIST_HEAD(&rds_iw_devices);
301
302 ret = ib_register_client(&rds_iw_client);
303 if (ret)
304 goto out;
305
306 ret = rds_iw_sysctl_init();
307 if (ret)
308 goto out_ibreg;
309
310 ret = rds_iw_recv_init();
311 if (ret)
312 goto out_sysctl;
313
314 ret = rds_trans_register(&rds_iw_transport);
315 if (ret)
316 goto out_recv;
317
318 rds_info_register_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
319
320 goto out;
321
322out_recv:
323 rds_iw_recv_exit();
324out_sysctl:
325 rds_iw_sysctl_exit();
326out_ibreg:
327 ib_unregister_client(&rds_iw_client);
328out:
329 return ret;
330}
331
332MODULE_LICENSE("GPL");
333