aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSean Hefty <sean.hefty@intel.com>2006-06-17 23:37:28 -0400
committerRoland Dreier <rolandd@cisco.com>2006-06-17 23:37:28 -0400
commit7025fcd36bd62af2c6ca0ea3490c00b216c4d168 (patch)
treee2fda3944176fcafb24876680d6eb8b6be394fe6
parenta1e8733e557bb390e13aa00ef044a6022c8d0bb2 (diff)
IB: address translation to map IP toIB addresses (GIDs)
Add an address translation service that maps IP addresses to InfiniBand GID addresses using IPoIB. Signed-off-by: Sean Hefty <sean.hefty@intel.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/Kconfig5
-rw-r--r--drivers/infiniband/core/Makefile6
-rw-r--r--drivers/infiniband/core/addr.c367
-rw-r--r--include/rdma/ib_addr.h114
4 files changed, 491 insertions, 1 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index afc612b8577d..ba2d6505e9a4 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
29 libibverbs, libibcm and a hardware driver library from 29 libibverbs, libibcm and a hardware driver library from
30 <http://www.openib.org>. 30 <http://www.openib.org>.
31 31
32config INFINIBAND_ADDR_TRANS
33 bool
34 depends on INFINIBAND && INET
35 default y
36
32source "drivers/infiniband/hw/mthca/Kconfig" 37source "drivers/infiniband/hw/mthca/Kconfig"
33source "drivers/infiniband/hw/ipath/Kconfig" 38source "drivers/infiniband/hw/ipath/Kconfig"
34 39
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index a56bf9c994f6..05095919d168 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,5 +1,7 @@
1addr_trans-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o
2
1obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ 3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
2 ib_cm.o 4 ib_cm.o $(addr_trans-y)
3obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o 5obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
4obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o 6obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o
5 7
@@ -12,6 +14,8 @@ ib_sa-y := sa_query.o
12 14
13ib_cm-y := cm.o 15ib_cm-y := cm.o
14 16
17ib_addr-y := addr.o
18
15ib_umad-y := user_mad.o 19ib_umad-y := user_mad.o
16 20
17ib_ucm-y := ucm.o 21ib_ucm-y := ucm.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
new file mode 100644
index 000000000000..d294bbc42f09
--- /dev/null
+++ b/drivers/infiniband/core/addr.c
@@ -0,0 +1,367 @@
1/*
2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation. All rights reserved.
6 *
7 * This Software is licensed under one of the following licenses:
8 *
9 * 1) under the terms of the "Common Public License 1.0" a copy of which is
10 * available from the Open Source Initiative, see
11 * http://www.opensource.org/licenses/cpl.php.
12 *
13 * 2) under the terms of the "The BSD License" a copy of which is
14 * available from the Open Source Initiative, see
15 * http://www.opensource.org/licenses/bsd-license.php.
16 *
17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18 * copy of which is available from the Open Source Initiative, see
19 * http://www.opensource.org/licenses/gpl-license.php.
20 *
21 * Licensee has the right to choose one of the above licenses.
22 *
23 * Redistributions of source code must retain the above copyright
24 * notice and one of the license notices.
25 *
26 * Redistributions in binary form must reproduce both the above copyright
27 * notice, one of the license notices in the documentation
28 * and/or other materials provided with the distribution.
29 */
30
31#include <linux/mutex.h>
32#include <linux/inetdevice.h>
33#include <linux/workqueue.h>
34#include <linux/if_arp.h>
35#include <net/arp.h>
36#include <net/neighbour.h>
37#include <net/route.h>
38#include <rdma/ib_addr.h>
39
40MODULE_AUTHOR("Sean Hefty");
41MODULE_DESCRIPTION("IB Address Translation");
42MODULE_LICENSE("Dual BSD/GPL");
43
44struct addr_req {
45 struct list_head list;
46 struct sockaddr src_addr;
47 struct sockaddr dst_addr;
48 struct rdma_dev_addr *addr;
49 void *context;
50 void (*callback)(int status, struct sockaddr *src_addr,
51 struct rdma_dev_addr *addr, void *context);
52 unsigned long timeout;
53 int status;
54};
55
56static void process_req(void *data);
57
58static DEFINE_MUTEX(lock);
59static LIST_HEAD(req_list);
60static DECLARE_WORK(work, process_req, NULL);
61static struct workqueue_struct *addr_wq;
62
63static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
64 unsigned char *dst_dev_addr)
65{
66 switch (dev->type) {
67 case ARPHRD_INFINIBAND:
68 dev_addr->dev_type = IB_NODE_CA;
69 break;
70 default:
71 return -EADDRNOTAVAIL;
72 }
73
74 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
75 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
76 if (dst_dev_addr)
77 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
78 return 0;
79}
80
81int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
82{
83 struct net_device *dev;
84 u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
85 int ret;
86
87 dev = ip_dev_find(ip);
88 if (!dev)
89 return -EADDRNOTAVAIL;
90
91 ret = copy_addr(dev_addr, dev, NULL);
92 dev_put(dev);
93 return ret;
94}
95EXPORT_SYMBOL(rdma_translate_ip);
96
97static void set_timeout(unsigned long time)
98{
99 unsigned long delay;
100
101 cancel_delayed_work(&work);
102
103 delay = time - jiffies;
104 if ((long)delay <= 0)
105 delay = 1;
106
107 queue_delayed_work(addr_wq, &work, delay);
108}
109
110static void queue_req(struct addr_req *req)
111{
112 struct addr_req *temp_req;
113
114 mutex_lock(&lock);
115 list_for_each_entry_reverse(temp_req, &req_list, list) {
116 if (time_after(req->timeout, temp_req->timeout))
117 break;
118 }
119
120 list_add(&req->list, &temp_req->list);
121
122 if (req_list.next == &req->list)
123 set_timeout(req->timeout);
124 mutex_unlock(&lock);
125}
126
127static void addr_send_arp(struct sockaddr_in *dst_in)
128{
129 struct rtable *rt;
130 struct flowi fl;
131 u32 dst_ip = dst_in->sin_addr.s_addr;
132
133 memset(&fl, 0, sizeof fl);
134 fl.nl_u.ip4_u.daddr = dst_ip;
135 if (ip_route_output_key(&rt, &fl))
136 return;
137
138 arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev,
139 rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
140 ip_rt_put(rt);
141}
142
143static int addr_resolve_remote(struct sockaddr_in *src_in,
144 struct sockaddr_in *dst_in,
145 struct rdma_dev_addr *addr)
146{
147 u32 src_ip = src_in->sin_addr.s_addr;
148 u32 dst_ip = dst_in->sin_addr.s_addr;
149 struct flowi fl;
150 struct rtable *rt;
151 struct neighbour *neigh;
152 int ret;
153
154 memset(&fl, 0, sizeof fl);
155 fl.nl_u.ip4_u.daddr = dst_ip;
156 fl.nl_u.ip4_u.saddr = src_ip;
157 ret = ip_route_output_key(&rt, &fl);
158 if (ret)
159 goto out;
160
161 /* If the device does ARP internally, return 'done' */
162 if (rt->idev->dev->flags & IFF_NOARP) {
163 copy_addr(addr, rt->idev->dev, NULL);
164 goto put;
165 }
166
167 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
168 if (!neigh) {
169 ret = -ENODATA;
170 goto put;
171 }
172
173 if (!(neigh->nud_state & NUD_VALID)) {
174 ret = -ENODATA;
175 goto release;
176 }
177
178 if (!src_ip) {
179 src_in->sin_family = dst_in->sin_family;
180 src_in->sin_addr.s_addr = rt->rt_src;
181 }
182
183 ret = copy_addr(addr, neigh->dev, neigh->ha);
184release:
185 neigh_release(neigh);
186put:
187 ip_rt_put(rt);
188out:
189 return ret;
190}
191
192static void process_req(void *data)
193{
194 struct addr_req *req, *temp_req;
195 struct sockaddr_in *src_in, *dst_in;
196 struct list_head done_list;
197
198 INIT_LIST_HEAD(&done_list);
199
200 mutex_lock(&lock);
201 list_for_each_entry_safe(req, temp_req, &req_list, list) {
202 if (req->status) {
203 src_in = (struct sockaddr_in *) &req->src_addr;
204 dst_in = (struct sockaddr_in *) &req->dst_addr;
205 req->status = addr_resolve_remote(src_in, dst_in,
206 req->addr);
207 }
208 if (req->status && time_after(jiffies, req->timeout))
209 req->status = -ETIMEDOUT;
210 else if (req->status == -ENODATA)
211 continue;
212
213 list_del(&req->list);
214 list_add_tail(&req->list, &done_list);
215 }
216
217 if (!list_empty(&req_list)) {
218 req = list_entry(req_list.next, struct addr_req, list);
219 set_timeout(req->timeout);
220 }
221 mutex_unlock(&lock);
222
223 list_for_each_entry_safe(req, temp_req, &done_list, list) {
224 list_del(&req->list);
225 req->callback(req->status, &req->src_addr, req->addr,
226 req->context);
227 kfree(req);
228 }
229}
230
231static int addr_resolve_local(struct sockaddr_in *src_in,
232 struct sockaddr_in *dst_in,
233 struct rdma_dev_addr *addr)
234{
235 struct net_device *dev;
236 u32 src_ip = src_in->sin_addr.s_addr;
237 u32 dst_ip = dst_in->sin_addr.s_addr;
238 int ret;
239
240 dev = ip_dev_find(dst_ip);
241 if (!dev)
242 return -EADDRNOTAVAIL;
243
244 if (ZERONET(src_ip)) {
245 src_in->sin_family = dst_in->sin_family;
246 src_in->sin_addr.s_addr = dst_ip;
247 ret = copy_addr(addr, dev, dev->dev_addr);
248 } else if (LOOPBACK(src_ip)) {
249 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
250 if (!ret)
251 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
252 } else {
253 ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
254 if (!ret)
255 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
256 }
257
258 dev_put(dev);
259 return ret;
260}
261
262int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
263 struct rdma_dev_addr *addr, int timeout_ms,
264 void (*callback)(int status, struct sockaddr *src_addr,
265 struct rdma_dev_addr *addr, void *context),
266 void *context)
267{
268 struct sockaddr_in *src_in, *dst_in;
269 struct addr_req *req;
270 int ret = 0;
271
272 req = kmalloc(sizeof *req, GFP_KERNEL);
273 if (!req)
274 return -ENOMEM;
275 memset(req, 0, sizeof *req);
276
277 if (src_addr)
278 memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
279 memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
280 req->addr = addr;
281 req->callback = callback;
282 req->context = context;
283
284 src_in = (struct sockaddr_in *) &req->src_addr;
285 dst_in = (struct sockaddr_in *) &req->dst_addr;
286
287 req->status = addr_resolve_local(src_in, dst_in, addr);
288 if (req->status == -EADDRNOTAVAIL)
289 req->status = addr_resolve_remote(src_in, dst_in, addr);
290
291 switch (req->status) {
292 case 0:
293 req->timeout = jiffies;
294 queue_req(req);
295 break;
296 case -ENODATA:
297 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
298 queue_req(req);
299 addr_send_arp(dst_in);
300 break;
301 default:
302 ret = req->status;
303 kfree(req);
304 break;
305 }
306 return ret;
307}
308EXPORT_SYMBOL(rdma_resolve_ip);
309
310void rdma_addr_cancel(struct rdma_dev_addr *addr)
311{
312 struct addr_req *req, *temp_req;
313
314 mutex_lock(&lock);
315 list_for_each_entry_safe(req, temp_req, &req_list, list) {
316 if (req->addr == addr) {
317 req->status = -ECANCELED;
318 req->timeout = jiffies;
319 list_del(&req->list);
320 list_add(&req->list, &req_list);
321 set_timeout(req->timeout);
322 break;
323 }
324 }
325 mutex_unlock(&lock);
326}
327EXPORT_SYMBOL(rdma_addr_cancel);
328
329static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev,
330 struct packet_type *pkt, struct net_device *orig_dev)
331{
332 struct arphdr *arp_hdr;
333
334 arp_hdr = (struct arphdr *) skb->nh.raw;
335
336 if (arp_hdr->ar_op == htons(ARPOP_REQUEST) ||
337 arp_hdr->ar_op == htons(ARPOP_REPLY))
338 set_timeout(jiffies);
339
340 kfree_skb(skb);
341 return 0;
342}
343
344static struct packet_type addr_arp = {
345 .type = __constant_htons(ETH_P_ARP),
346 .func = addr_arp_recv,
347 .af_packet_priv = (void*) 1,
348};
349
350static int addr_init(void)
351{
352 addr_wq = create_singlethread_workqueue("ib_addr_wq");
353 if (!addr_wq)
354 return -ENOMEM;
355
356 dev_add_pack(&addr_arp);
357 return 0;
358}
359
360static void addr_cleanup(void)
361{
362 dev_remove_pack(&addr_arp);
363 destroy_workqueue(addr_wq);
364}
365
366module_init(addr_init);
367module_exit(addr_cleanup);
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
new file mode 100644
index 000000000000..fcb5ba87dcc5
--- /dev/null
+++ b/include/rdma/ib_addr.h
@@ -0,0 +1,114 @@
1/*
2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 *
5 * This Software is licensed under one of the following licenses:
6 *
7 * 1) under the terms of the "Common Public License 1.0" a copy of which is
8 * available from the Open Source Initiative, see
9 * http://www.opensource.org/licenses/cpl.php.
10 *
11 * 2) under the terms of the "The BSD License" a copy of which is
12 * available from the Open Source Initiative, see
13 * http://www.opensource.org/licenses/bsd-license.php.
14 *
15 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
16 * copy of which is available from the Open Source Initiative, see
17 * http://www.opensource.org/licenses/gpl-license.php.
18 *
19 * Licensee has the right to choose one of the above licenses.
20 *
21 * Redistributions of source code must retain the above copyright
22 * notice and one of the license notices.
23 *
24 * Redistributions in binary form must reproduce both the above copyright
25 * notice, one of the license notices in the documentation
26 * and/or other materials provided with the distribution.
27 *
28 */
29
30#if !defined(IB_ADDR_H)
31#define IB_ADDR_H
32
33#include <linux/in.h>
34#include <linux/in6.h>
35#include <linux/netdevice.h>
36#include <linux/socket.h>
37#include <rdma/ib_verbs.h>
38
39struct rdma_dev_addr {
40 unsigned char src_dev_addr[MAX_ADDR_LEN];
41 unsigned char dst_dev_addr[MAX_ADDR_LEN];
42 unsigned char broadcast[MAX_ADDR_LEN];
43 enum ib_node_type dev_type;
44};
45
46/**
47 * rdma_translate_ip - Translate a local IP address to an RDMA hardware
48 * address.
49 */
50int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr);
51
52/**
53 * rdma_resolve_ip - Resolve source and destination IP addresses to
54 * RDMA hardware addresses.
55 * @src_addr: An optional source address to use in the resolution. If a
56 * source address is not provided, a usable address will be returned via
57 * the callback.
58 * @dst_addr: The destination address to resolve.
59 * @addr: A reference to a data location that will receive the resolved
60 * addresses. The data location must remain valid until the callback has
61 * been invoked.
62 * @timeout_ms: Amount of time to wait for the address resolution to complete.
63 * @callback: Call invoked once address resolution has completed, timed out,
64 * or been canceled. A status of 0 indicates success.
65 * @context: User-specified context associated with the call.
66 */
67int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
68 struct rdma_dev_addr *addr, int timeout_ms,
69 void (*callback)(int status, struct sockaddr *src_addr,
70 struct rdma_dev_addr *addr, void *context),
71 void *context);
72
73void rdma_addr_cancel(struct rdma_dev_addr *addr);
74
75static inline int ip_addr_size(struct sockaddr *addr)
76{
77 return addr->sa_family == AF_INET6 ?
78 sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in);
79}
80
81static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
82{
83 return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9];
84}
85
86static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey)
87{
88 dev_addr->broadcast[8] = pkey >> 8;
89 dev_addr->broadcast[9] = (unsigned char) pkey;
90}
91
92static inline union ib_gid *ib_addr_get_sgid(struct rdma_dev_addr *dev_addr)
93{
94 return (union ib_gid *) (dev_addr->src_dev_addr + 4);
95}
96
97static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr,
98 union ib_gid *gid)
99{
100 memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid);
101}
102
103static inline union ib_gid *ib_addr_get_dgid(struct rdma_dev_addr *dev_addr)
104{
105 return (union ib_gid *) (dev_addr->dst_dev_addr + 4);
106}
107
108static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
109 union ib_gid *gid)
110{
111 memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
112}
113
114#endif /* IB_ADDR_H */