aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorSean Hefty <sean.hefty@intel.com>2006-06-17 23:37:29 -0400
committerRoland Dreier <rolandd@cisco.com>2006-06-17 23:37:29 -0400
commite51060f08a61965c4dd91516d82fe90617152590 (patch)
tree19a8a0cc519e215e12ce460258a356cdac842f6e /drivers/infiniband/core
parent7025fcd36bd62af2c6ca0ea3490c00b216c4d168 (diff)
IB: IP address based RDMA connection manager
Kernel connection management agent over InfiniBand that connects based on IP addresses. The agent defines a generic RDMA connection abstraction to support clients wanting to connect over different RDMA devices. The agent also handles RDMA device hotplug events on behalf of clients. Signed-off-by: Sean Hefty <sean.hefty@intel.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile6
-rw-r--r--drivers/infiniband/core/cma.c1927
2 files changed, 1931 insertions, 2 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 05095919d168..68e73ec2d1f8 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,7 +1,7 @@
1addr_trans-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o 1infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
2 2
3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ 3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
4 ib_cm.o $(addr_trans-y) 4 ib_cm.o $(infiniband-y)
5obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o 5obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
6obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o 6obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o
7 7
@@ -14,6 +14,8 @@ ib_sa-y := sa_query.o
14 14
15ib_cm-y := cm.o 15ib_cm-y := cm.o
16 16
17rdma_cm-y := cma.o
18
17ib_addr-y := addr.o 19ib_addr-y := addr.o
18 20
19ib_umad-y := user_mad.o 21ib_umad-y := user_mad.o
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
new file mode 100644
index 000000000000..a76834edf608
--- /dev/null
+++ b/drivers/infiniband/core/cma.c
@@ -0,0 +1,1927 @@
1/*
2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
6 *
7 * This Software is licensed under one of the following licenses:
8 *
9 * 1) under the terms of the "Common Public License 1.0" a copy of which is
10 * available from the Open Source Initiative, see
11 * http://www.opensource.org/licenses/cpl.php.
12 *
13 * 2) under the terms of the "The BSD License" a copy of which is
14 * available from the Open Source Initiative, see
15 * http://www.opensource.org/licenses/bsd-license.php.
16 *
17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18 * copy of which is available from the Open Source Initiative, see
19 * http://www.opensource.org/licenses/gpl-license.php.
20 *
21 * Licensee has the right to choose one of the above licenses.
22 *
23 * Redistributions of source code must retain the above copyright
24 * notice and one of the license notices.
25 *
26 * Redistributions in binary form must reproduce both the above copyright
27 * notice, one of the license notices in the documentation
28 * and/or other materials provided with the distribution.
29 *
30 */
31
32#include <linux/completion.h>
33#include <linux/in.h>
34#include <linux/in6.h>
35#include <linux/mutex.h>
36#include <linux/random.h>
37#include <linux/idr.h>
38
39#include <net/tcp.h>
40
41#include <rdma/rdma_cm.h>
42#include <rdma/rdma_cm_ib.h>
43#include <rdma/ib_cache.h>
44#include <rdma/ib_cm.h>
45#include <rdma/ib_sa.h>
46
47MODULE_AUTHOR("Sean Hefty");
48MODULE_DESCRIPTION("Generic RDMA CM Agent");
49MODULE_LICENSE("Dual BSD/GPL");
50
51#define CMA_CM_RESPONSE_TIMEOUT 20
52#define CMA_MAX_CM_RETRIES 3
53
54static void cma_add_one(struct ib_device *device);
55static void cma_remove_one(struct ib_device *device);
56
57static struct ib_client cma_client = {
58 .name = "cma",
59 .add = cma_add_one,
60 .remove = cma_remove_one
61};
62
63static LIST_HEAD(dev_list);
64static LIST_HEAD(listen_any_list);
65static DEFINE_MUTEX(lock);
66static struct workqueue_struct *cma_wq;
67static DEFINE_IDR(sdp_ps);
68static DEFINE_IDR(tcp_ps);
69
70struct cma_device {
71 struct list_head list;
72 struct ib_device *device;
73 __be64 node_guid;
74 struct completion comp;
75 atomic_t refcount;
76 struct list_head id_list;
77};
78
79enum cma_state {
80 CMA_IDLE,
81 CMA_ADDR_QUERY,
82 CMA_ADDR_RESOLVED,
83 CMA_ROUTE_QUERY,
84 CMA_ROUTE_RESOLVED,
85 CMA_CONNECT,
86 CMA_DISCONNECT,
87 CMA_ADDR_BOUND,
88 CMA_LISTEN,
89 CMA_DEVICE_REMOVAL,
90 CMA_DESTROYING
91};
92
93struct rdma_bind_list {
94 struct idr *ps;
95 struct hlist_head owners;
96 unsigned short port;
97};
98
99/*
100 * Device removal can occur at anytime, so we need extra handling to
101 * serialize notifying the user of device removal with other callbacks.
102 * We do this by disabling removal notification while a callback is in process,
103 * and reporting it after the callback completes.
104 */
105struct rdma_id_private {
106 struct rdma_cm_id id;
107
108 struct rdma_bind_list *bind_list;
109 struct hlist_node node;
110 struct list_head list;
111 struct list_head listen_list;
112 struct cma_device *cma_dev;
113
114 enum cma_state state;
115 spinlock_t lock;
116 struct completion comp;
117 atomic_t refcount;
118 wait_queue_head_t wait_remove;
119 atomic_t dev_remove;
120
121 int backlog;
122 int timeout_ms;
123 struct ib_sa_query *query;
124 int query_id;
125 union {
126 struct ib_cm_id *ib;
127 } cm_id;
128
129 u32 seq_num;
130 u32 qp_num;
131 enum ib_qp_type qp_type;
132 u8 srq;
133};
134
135struct cma_work {
136 struct work_struct work;
137 struct rdma_id_private *id;
138 enum cma_state old_state;
139 enum cma_state new_state;
140 struct rdma_cm_event event;
141};
142
143union cma_ip_addr {
144 struct in6_addr ip6;
145 struct {
146 __u32 pad[3];
147 __u32 addr;
148 } ip4;
149};
150
151struct cma_hdr {
152 u8 cma_version;
153 u8 ip_version; /* IP version: 7:4 */
154 __u16 port;
155 union cma_ip_addr src_addr;
156 union cma_ip_addr dst_addr;
157};
158
159struct sdp_hh {
160 u8 bsdh[16];
161 u8 sdp_version; /* Major version: 7:4 */
162 u8 ip_version; /* IP version: 7:4 */
163 u8 sdp_specific1[10];
164 __u16 port;
165 __u16 sdp_specific2;
166 union cma_ip_addr src_addr;
167 union cma_ip_addr dst_addr;
168};
169
170struct sdp_hah {
171 u8 bsdh[16];
172 u8 sdp_version;
173};
174
175#define CMA_VERSION 0x00
176#define SDP_MAJ_VERSION 0x2
177
178static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
179{
180 unsigned long flags;
181 int ret;
182
183 spin_lock_irqsave(&id_priv->lock, flags);
184 ret = (id_priv->state == comp);
185 spin_unlock_irqrestore(&id_priv->lock, flags);
186 return ret;
187}
188
189static int cma_comp_exch(struct rdma_id_private *id_priv,
190 enum cma_state comp, enum cma_state exch)
191{
192 unsigned long flags;
193 int ret;
194
195 spin_lock_irqsave(&id_priv->lock, flags);
196 if ((ret = (id_priv->state == comp)))
197 id_priv->state = exch;
198 spin_unlock_irqrestore(&id_priv->lock, flags);
199 return ret;
200}
201
202static enum cma_state cma_exch(struct rdma_id_private *id_priv,
203 enum cma_state exch)
204{
205 unsigned long flags;
206 enum cma_state old;
207
208 spin_lock_irqsave(&id_priv->lock, flags);
209 old = id_priv->state;
210 id_priv->state = exch;
211 spin_unlock_irqrestore(&id_priv->lock, flags);
212 return old;
213}
214
215static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
216{
217 return hdr->ip_version >> 4;
218}
219
220static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
221{
222 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
223}
224
225static inline u8 sdp_get_majv(u8 sdp_version)
226{
227 return sdp_version >> 4;
228}
229
230static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
231{
232 return hh->ip_version >> 4;
233}
234
235static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
236{
237 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
238}
239
240static void cma_attach_to_dev(struct rdma_id_private *id_priv,
241 struct cma_device *cma_dev)
242{
243 atomic_inc(&cma_dev->refcount);
244 id_priv->cma_dev = cma_dev;
245 id_priv->id.device = cma_dev->device;
246 list_add_tail(&id_priv->list, &cma_dev->id_list);
247}
248
249static inline void cma_deref_dev(struct cma_device *cma_dev)
250{
251 if (atomic_dec_and_test(&cma_dev->refcount))
252 complete(&cma_dev->comp);
253}
254
255static void cma_detach_from_dev(struct rdma_id_private *id_priv)
256{
257 list_del(&id_priv->list);
258 cma_deref_dev(id_priv->cma_dev);
259 id_priv->cma_dev = NULL;
260}
261
262static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
263{
264 struct cma_device *cma_dev;
265 union ib_gid *gid;
266 int ret = -ENODEV;
267
268 gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr);
269
270 mutex_lock(&lock);
271 list_for_each_entry(cma_dev, &dev_list, list) {
272 ret = ib_find_cached_gid(cma_dev->device, gid,
273 &id_priv->id.port_num, NULL);
274 if (!ret) {
275 cma_attach_to_dev(id_priv, cma_dev);
276 break;
277 }
278 }
279 mutex_unlock(&lock);
280 return ret;
281}
282
283static int cma_acquire_dev(struct rdma_id_private *id_priv)
284{
285 switch (id_priv->id.route.addr.dev_addr.dev_type) {
286 case IB_NODE_CA:
287 return cma_acquire_ib_dev(id_priv);
288 default:
289 return -ENODEV;
290 }
291}
292
293static void cma_deref_id(struct rdma_id_private *id_priv)
294{
295 if (atomic_dec_and_test(&id_priv->refcount))
296 complete(&id_priv->comp);
297}
298
299static void cma_release_remove(struct rdma_id_private *id_priv)
300{
301 if (atomic_dec_and_test(&id_priv->dev_remove))
302 wake_up(&id_priv->wait_remove);
303}
304
305struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
306 void *context, enum rdma_port_space ps)
307{
308 struct rdma_id_private *id_priv;
309
310 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
311 if (!id_priv)
312 return ERR_PTR(-ENOMEM);
313
314 id_priv->state = CMA_IDLE;
315 id_priv->id.context = context;
316 id_priv->id.event_handler = event_handler;
317 id_priv->id.ps = ps;
318 spin_lock_init(&id_priv->lock);
319 init_completion(&id_priv->comp);
320 atomic_set(&id_priv->refcount, 1);
321 init_waitqueue_head(&id_priv->wait_remove);
322 atomic_set(&id_priv->dev_remove, 0);
323 INIT_LIST_HEAD(&id_priv->listen_list);
324 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
325
326 return &id_priv->id;
327}
328EXPORT_SYMBOL(rdma_create_id);
329
330static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
331{
332 struct ib_qp_attr qp_attr;
333 struct rdma_dev_addr *dev_addr;
334 int ret;
335
336 dev_addr = &id_priv->id.route.addr.dev_addr;
337 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
338 ib_addr_get_pkey(dev_addr),
339 &qp_attr.pkey_index);
340 if (ret)
341 return ret;
342
343 qp_attr.qp_state = IB_QPS_INIT;
344 qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
345 qp_attr.port_num = id_priv->id.port_num;
346 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS |
347 IB_QP_PKEY_INDEX | IB_QP_PORT);
348}
349
350int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
351 struct ib_qp_init_attr *qp_init_attr)
352{
353 struct rdma_id_private *id_priv;
354 struct ib_qp *qp;
355 int ret;
356
357 id_priv = container_of(id, struct rdma_id_private, id);
358 if (id->device != pd->device)
359 return -EINVAL;
360
361 qp = ib_create_qp(pd, qp_init_attr);
362 if (IS_ERR(qp))
363 return PTR_ERR(qp);
364
365 switch (id->device->node_type) {
366 case IB_NODE_CA:
367 ret = cma_init_ib_qp(id_priv, qp);
368 break;
369 default:
370 ret = -ENOSYS;
371 break;
372 }
373
374 if (ret)
375 goto err;
376
377 id->qp = qp;
378 id_priv->qp_num = qp->qp_num;
379 id_priv->qp_type = qp->qp_type;
380 id_priv->srq = (qp->srq != NULL);
381 return 0;
382err:
383 ib_destroy_qp(qp);
384 return ret;
385}
386EXPORT_SYMBOL(rdma_create_qp);
387
388void rdma_destroy_qp(struct rdma_cm_id *id)
389{
390 ib_destroy_qp(id->qp);
391}
392EXPORT_SYMBOL(rdma_destroy_qp);
393
394static int cma_modify_qp_rtr(struct rdma_cm_id *id)
395{
396 struct ib_qp_attr qp_attr;
397 int qp_attr_mask, ret;
398
399 if (!id->qp)
400 return 0;
401
402 /* Need to update QP attributes from default values. */
403 qp_attr.qp_state = IB_QPS_INIT;
404 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
405 if (ret)
406 return ret;
407
408 ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
409 if (ret)
410 return ret;
411
412 qp_attr.qp_state = IB_QPS_RTR;
413 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
414 if (ret)
415 return ret;
416
417 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
418}
419
420static int cma_modify_qp_rts(struct rdma_cm_id *id)
421{
422 struct ib_qp_attr qp_attr;
423 int qp_attr_mask, ret;
424
425 if (!id->qp)
426 return 0;
427
428 qp_attr.qp_state = IB_QPS_RTS;
429 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
430 if (ret)
431 return ret;
432
433 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
434}
435
436static int cma_modify_qp_err(struct rdma_cm_id *id)
437{
438 struct ib_qp_attr qp_attr;
439
440 if (!id->qp)
441 return 0;
442
443 qp_attr.qp_state = IB_QPS_ERR;
444 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
445}
446
447int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
448 int *qp_attr_mask)
449{
450 struct rdma_id_private *id_priv;
451 int ret;
452
453 id_priv = container_of(id, struct rdma_id_private, id);
454 switch (id_priv->id.device->node_type) {
455 case IB_NODE_CA:
456 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
457 qp_attr_mask);
458 if (qp_attr->qp_state == IB_QPS_RTR)
459 qp_attr->rq_psn = id_priv->seq_num;
460 break;
461 default:
462 ret = -ENOSYS;
463 break;
464 }
465
466 return ret;
467}
468EXPORT_SYMBOL(rdma_init_qp_attr);
469
470static inline int cma_zero_addr(struct sockaddr *addr)
471{
472 struct in6_addr *ip6;
473
474 if (addr->sa_family == AF_INET)
475 return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr);
476 else {
477 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
478 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
479 ip6->s6_addr32[3] | ip6->s6_addr32[4]) == 0;
480 }
481}
482
483static inline int cma_loopback_addr(struct sockaddr *addr)
484{
485 return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr);
486}
487
488static inline int cma_any_addr(struct sockaddr *addr)
489{
490 return cma_zero_addr(addr) || cma_loopback_addr(addr);
491}
492
493static inline int cma_any_port(struct sockaddr *addr)
494{
495 return !((struct sockaddr_in *) addr)->sin_port;
496}
497
498static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
499 u8 *ip_ver, __u16 *port,
500 union cma_ip_addr **src, union cma_ip_addr **dst)
501{
502 switch (ps) {
503 case RDMA_PS_SDP:
504 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
505 SDP_MAJ_VERSION)
506 return -EINVAL;
507
508 *ip_ver = sdp_get_ip_ver(hdr);
509 *port = ((struct sdp_hh *) hdr)->port;
510 *src = &((struct sdp_hh *) hdr)->src_addr;
511 *dst = &((struct sdp_hh *) hdr)->dst_addr;
512 break;
513 default:
514 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
515 return -EINVAL;
516
517 *ip_ver = cma_get_ip_ver(hdr);
518 *port = ((struct cma_hdr *) hdr)->port;
519 *src = &((struct cma_hdr *) hdr)->src_addr;
520 *dst = &((struct cma_hdr *) hdr)->dst_addr;
521 break;
522 }
523
524 if (*ip_ver != 4 && *ip_ver != 6)
525 return -EINVAL;
526 return 0;
527}
528
529static void cma_save_net_info(struct rdma_addr *addr,
530 struct rdma_addr *listen_addr,
531 u8 ip_ver, __u16 port,
532 union cma_ip_addr *src, union cma_ip_addr *dst)
533{
534 struct sockaddr_in *listen4, *ip4;
535 struct sockaddr_in6 *listen6, *ip6;
536
537 switch (ip_ver) {
538 case 4:
539 listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
540 ip4 = (struct sockaddr_in *) &addr->src_addr;
541 ip4->sin_family = listen4->sin_family;
542 ip4->sin_addr.s_addr = dst->ip4.addr;
543 ip4->sin_port = listen4->sin_port;
544
545 ip4 = (struct sockaddr_in *) &addr->dst_addr;
546 ip4->sin_family = listen4->sin_family;
547 ip4->sin_addr.s_addr = src->ip4.addr;
548 ip4->sin_port = port;
549 break;
550 case 6:
551 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
552 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
553 ip6->sin6_family = listen6->sin6_family;
554 ip6->sin6_addr = dst->ip6;
555 ip6->sin6_port = listen6->sin6_port;
556
557 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
558 ip6->sin6_family = listen6->sin6_family;
559 ip6->sin6_addr = src->ip6;
560 ip6->sin6_port = port;
561 break;
562 default:
563 break;
564 }
565}
566
567static inline int cma_user_data_offset(enum rdma_port_space ps)
568{
569 switch (ps) {
570 case RDMA_PS_SDP:
571 return 0;
572 default:
573 return sizeof(struct cma_hdr);
574 }
575}
576
577static int cma_notify_user(struct rdma_id_private *id_priv,
578 enum rdma_cm_event_type type, int status,
579 void *data, u8 data_len)
580{
581 struct rdma_cm_event event;
582
583 event.event = type;
584 event.status = status;
585 event.private_data = data;
586 event.private_data_len = data_len;
587
588 return id_priv->id.event_handler(&id_priv->id, &event);
589}
590
591static void cma_cancel_route(struct rdma_id_private *id_priv)
592{
593 switch (id_priv->id.device->node_type) {
594 case IB_NODE_CA:
595 if (id_priv->query)
596 ib_sa_cancel_query(id_priv->query_id, id_priv->query);
597 break;
598 default:
599 break;
600 }
601}
602
603static inline int cma_internal_listen(struct rdma_id_private *id_priv)
604{
605 return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
606 cma_any_addr(&id_priv->id.route.addr.src_addr);
607}
608
609static void cma_destroy_listen(struct rdma_id_private *id_priv)
610{
611 cma_exch(id_priv, CMA_DESTROYING);
612
613 if (id_priv->cma_dev) {
614 switch (id_priv->id.device->node_type) {
615 case IB_NODE_CA:
616 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
617 ib_destroy_cm_id(id_priv->cm_id.ib);
618 break;
619 default:
620 break;
621 }
622 cma_detach_from_dev(id_priv);
623 }
624 list_del(&id_priv->listen_list);
625
626 cma_deref_id(id_priv);
627 wait_for_completion(&id_priv->comp);
628
629 kfree(id_priv);
630}
631
632static void cma_cancel_listens(struct rdma_id_private *id_priv)
633{
634 struct rdma_id_private *dev_id_priv;
635
636 mutex_lock(&lock);
637 list_del(&id_priv->list);
638
639 while (!list_empty(&id_priv->listen_list)) {
640 dev_id_priv = list_entry(id_priv->listen_list.next,
641 struct rdma_id_private, listen_list);
642 cma_destroy_listen(dev_id_priv);
643 }
644 mutex_unlock(&lock);
645}
646
647static void cma_cancel_operation(struct rdma_id_private *id_priv,
648 enum cma_state state)
649{
650 switch (state) {
651 case CMA_ADDR_QUERY:
652 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
653 break;
654 case CMA_ROUTE_QUERY:
655 cma_cancel_route(id_priv);
656 break;
657 case CMA_LISTEN:
658 if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
659 !id_priv->cma_dev)
660 cma_cancel_listens(id_priv);
661 break;
662 default:
663 break;
664 }
665}
666
667static void cma_release_port(struct rdma_id_private *id_priv)
668{
669 struct rdma_bind_list *bind_list = id_priv->bind_list;
670
671 if (!bind_list)
672 return;
673
674 mutex_lock(&lock);
675 hlist_del(&id_priv->node);
676 if (hlist_empty(&bind_list->owners)) {
677 idr_remove(bind_list->ps, bind_list->port);
678 kfree(bind_list);
679 }
680 mutex_unlock(&lock);
681}
682
683void rdma_destroy_id(struct rdma_cm_id *id)
684{
685 struct rdma_id_private *id_priv;
686 enum cma_state state;
687
688 id_priv = container_of(id, struct rdma_id_private, id);
689 state = cma_exch(id_priv, CMA_DESTROYING);
690 cma_cancel_operation(id_priv, state);
691
692 if (id_priv->cma_dev) {
693 switch (id->device->node_type) {
694 case IB_NODE_CA:
695 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
696 ib_destroy_cm_id(id_priv->cm_id.ib);
697 break;
698 default:
699 break;
700 }
701 mutex_lock(&lock);
702 cma_detach_from_dev(id_priv);
703 mutex_unlock(&lock);
704 }
705
706 cma_release_port(id_priv);
707 cma_deref_id(id_priv);
708 wait_for_completion(&id_priv->comp);
709
710 kfree(id_priv->id.route.path_rec);
711 kfree(id_priv);
712}
713EXPORT_SYMBOL(rdma_destroy_id);
714
715static int cma_rep_recv(struct rdma_id_private *id_priv)
716{
717 int ret;
718
719 ret = cma_modify_qp_rtr(&id_priv->id);
720 if (ret)
721 goto reject;
722
723 ret = cma_modify_qp_rts(&id_priv->id);
724 if (ret)
725 goto reject;
726
727 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
728 if (ret)
729 goto reject;
730
731 return 0;
732reject:
733 cma_modify_qp_err(&id_priv->id);
734 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
735 NULL, 0, NULL, 0);
736 return ret;
737}
738
739static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
740{
741 if (id_priv->id.ps == RDMA_PS_SDP &&
742 sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
743 SDP_MAJ_VERSION)
744 return -EINVAL;
745
746 return 0;
747}
748
749static int cma_rtu_recv(struct rdma_id_private *id_priv)
750{
751 int ret;
752
753 ret = cma_modify_qp_rts(&id_priv->id);
754 if (ret)
755 goto reject;
756
757 return 0;
758reject:
759 cma_modify_qp_err(&id_priv->id);
760 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
761 NULL, 0, NULL, 0);
762 return ret;
763}
764
765static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
766{
767 struct rdma_id_private *id_priv = cm_id->context;
768 enum rdma_cm_event_type event;
769 u8 private_data_len = 0;
770 int ret = 0, status = 0;
771
772 atomic_inc(&id_priv->dev_remove);
773 if (!cma_comp(id_priv, CMA_CONNECT))
774 goto out;
775
776 switch (ib_event->event) {
777 case IB_CM_REQ_ERROR:
778 case IB_CM_REP_ERROR:
779 event = RDMA_CM_EVENT_UNREACHABLE;
780 status = -ETIMEDOUT;
781 break;
782 case IB_CM_REP_RECEIVED:
783 status = cma_verify_rep(id_priv, ib_event->private_data);
784 if (status)
785 event = RDMA_CM_EVENT_CONNECT_ERROR;
786 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
787 status = cma_rep_recv(id_priv);
788 event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
789 RDMA_CM_EVENT_ESTABLISHED;
790 } else
791 event = RDMA_CM_EVENT_CONNECT_RESPONSE;
792 private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
793 break;
794 case IB_CM_RTU_RECEIVED:
795 status = cma_rtu_recv(id_priv);
796 event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
797 RDMA_CM_EVENT_ESTABLISHED;
798 break;
799 case IB_CM_DREQ_ERROR:
800 status = -ETIMEDOUT; /* fall through */
801 case IB_CM_DREQ_RECEIVED:
802 case IB_CM_DREP_RECEIVED:
803 if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
804 goto out;
805 event = RDMA_CM_EVENT_DISCONNECTED;
806 break;
807 case IB_CM_TIMEWAIT_EXIT:
808 case IB_CM_MRA_RECEIVED:
809 /* ignore event */
810 goto out;
811 case IB_CM_REJ_RECEIVED:
812 cma_modify_qp_err(&id_priv->id);
813 status = ib_event->param.rej_rcvd.reason;
814 event = RDMA_CM_EVENT_REJECTED;
815 break;
816 default:
817 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
818 ib_event->event);
819 goto out;
820 }
821
822 ret = cma_notify_user(id_priv, event, status, ib_event->private_data,
823 private_data_len);
824 if (ret) {
825 /* Destroy the CM ID by returning a non-zero value. */
826 id_priv->cm_id.ib = NULL;
827 cma_exch(id_priv, CMA_DESTROYING);
828 cma_release_remove(id_priv);
829 rdma_destroy_id(&id_priv->id);
830 return ret;
831 }
832out:
833 cma_release_remove(id_priv);
834 return ret;
835}
836
837static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
838 struct ib_cm_event *ib_event)
839{
840 struct rdma_id_private *id_priv;
841 struct rdma_cm_id *id;
842 struct rdma_route *rt;
843 union cma_ip_addr *src, *dst;
844 __u16 port;
845 u8 ip_ver;
846
847 id = rdma_create_id(listen_id->event_handler, listen_id->context,
848 listen_id->ps);
849 if (IS_ERR(id))
850 return NULL;
851
852 rt = &id->route;
853 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
854 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL);
855 if (!rt->path_rec)
856 goto err;
857
858 if (cma_get_net_info(ib_event->private_data, listen_id->ps,
859 &ip_ver, &port, &src, &dst))
860 goto err;
861
862 cma_save_net_info(&id->route.addr, &listen_id->route.addr,
863 ip_ver, port, src, dst);
864 rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
865 if (rt->num_paths == 2)
866 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
867
868 ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
869 ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
870 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
871 rt->addr.dev_addr.dev_type = IB_NODE_CA;
872
873 id_priv = container_of(id, struct rdma_id_private, id);
874 id_priv->state = CMA_CONNECT;
875 return id_priv;
876err:
877 rdma_destroy_id(id);
878 return NULL;
879}
880
881static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
882{
883 struct rdma_id_private *listen_id, *conn_id;
884 int offset, ret;
885
886 listen_id = cm_id->context;
887 atomic_inc(&listen_id->dev_remove);
888 if (!cma_comp(listen_id, CMA_LISTEN)) {
889 ret = -ECONNABORTED;
890 goto out;
891 }
892
893 conn_id = cma_new_id(&listen_id->id, ib_event);
894 if (!conn_id) {
895 ret = -ENOMEM;
896 goto out;
897 }
898
899 atomic_inc(&conn_id->dev_remove);
900 ret = cma_acquire_ib_dev(conn_id);
901 if (ret) {
902 ret = -ENODEV;
903 cma_release_remove(conn_id);
904 rdma_destroy_id(&conn_id->id);
905 goto out;
906 }
907
908 conn_id->cm_id.ib = cm_id;
909 cm_id->context = conn_id;
910 cm_id->cm_handler = cma_ib_handler;
911
912 offset = cma_user_data_offset(listen_id->id.ps);
913 ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
914 ib_event->private_data + offset,
915 IB_CM_REQ_PRIVATE_DATA_SIZE - offset);
916 if (ret) {
917 /* Destroy the CM ID by returning a non-zero value. */
918 conn_id->cm_id.ib = NULL;
919 cma_exch(conn_id, CMA_DESTROYING);
920 cma_release_remove(conn_id);
921 rdma_destroy_id(&conn_id->id);
922 }
923out:
924 cma_release_remove(listen_id);
925 return ret;
926}
927
928static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
929{
930 return cpu_to_be64(((u64)ps << 16) +
931 be16_to_cpu(((struct sockaddr_in *) addr)->sin_port));
932}
933
934static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
935 struct ib_cm_compare_data *compare)
936{
937 struct cma_hdr *cma_data, *cma_mask;
938 struct sdp_hh *sdp_data, *sdp_mask;
939 __u32 ip4_addr;
940 struct in6_addr ip6_addr;
941
942 memset(compare, 0, sizeof *compare);
943 cma_data = (void *) compare->data;
944 cma_mask = (void *) compare->mask;
945 sdp_data = (void *) compare->data;
946 sdp_mask = (void *) compare->mask;
947
948 switch (addr->sa_family) {
949 case AF_INET:
950 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
951 if (ps == RDMA_PS_SDP) {
952 sdp_set_ip_ver(sdp_data, 4);
953 sdp_set_ip_ver(sdp_mask, 0xF);
954 sdp_data->dst_addr.ip4.addr = ip4_addr;
955 sdp_mask->dst_addr.ip4.addr = ~0;
956 } else {
957 cma_set_ip_ver(cma_data, 4);
958 cma_set_ip_ver(cma_mask, 0xF);
959 cma_data->dst_addr.ip4.addr = ip4_addr;
960 cma_mask->dst_addr.ip4.addr = ~0;
961 }
962 break;
963 case AF_INET6:
964 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
965 if (ps == RDMA_PS_SDP) {
966 sdp_set_ip_ver(sdp_data, 6);
967 sdp_set_ip_ver(sdp_mask, 0xF);
968 sdp_data->dst_addr.ip6 = ip6_addr;
969 memset(&sdp_mask->dst_addr.ip6, 0xFF,
970 sizeof sdp_mask->dst_addr.ip6);
971 } else {
972 cma_set_ip_ver(cma_data, 6);
973 cma_set_ip_ver(cma_mask, 0xF);
974 cma_data->dst_addr.ip6 = ip6_addr;
975 memset(&cma_mask->dst_addr.ip6, 0xFF,
976 sizeof cma_mask->dst_addr.ip6);
977 }
978 break;
979 default:
980 break;
981 }
982}
983
984static int cma_ib_listen(struct rdma_id_private *id_priv)
985{
986 struct ib_cm_compare_data compare_data;
987 struct sockaddr *addr;
988 __be64 svc_id;
989 int ret;
990
991 id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
992 id_priv);
993 if (IS_ERR(id_priv->cm_id.ib))
994 return PTR_ERR(id_priv->cm_id.ib);
995
996 addr = &id_priv->id.route.addr.src_addr;
997 svc_id = cma_get_service_id(id_priv->id.ps, addr);
998 if (cma_any_addr(addr))
999 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1000 else {
1001 cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1002 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1003 }
1004
1005 if (ret) {
1006 ib_destroy_cm_id(id_priv->cm_id.ib);
1007 id_priv->cm_id.ib = NULL;
1008 }
1009
1010 return ret;
1011}
1012
1013static int cma_listen_handler(struct rdma_cm_id *id,
1014 struct rdma_cm_event *event)
1015{
1016 struct rdma_id_private *id_priv = id->context;
1017
1018 id->context = id_priv->id.context;
1019 id->event_handler = id_priv->id.event_handler;
1020 return id_priv->id.event_handler(id, event);
1021}
1022
1023static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1024 struct cma_device *cma_dev)
1025{
1026 struct rdma_id_private *dev_id_priv;
1027 struct rdma_cm_id *id;
1028 int ret;
1029
1030 id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1031 if (IS_ERR(id))
1032 return;
1033
1034 dev_id_priv = container_of(id, struct rdma_id_private, id);
1035
1036 dev_id_priv->state = CMA_ADDR_BOUND;
1037 memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1038 ip_addr_size(&id_priv->id.route.addr.src_addr));
1039
1040 cma_attach_to_dev(dev_id_priv, cma_dev);
1041 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1042
1043 ret = rdma_listen(id, id_priv->backlog);
1044 if (ret)
1045 goto err;
1046
1047 return;
1048err:
1049 cma_destroy_listen(dev_id_priv);
1050}
1051
1052static void cma_listen_on_all(struct rdma_id_private *id_priv)
1053{
1054 struct cma_device *cma_dev;
1055
1056 mutex_lock(&lock);
1057 list_add_tail(&id_priv->list, &listen_any_list);
1058 list_for_each_entry(cma_dev, &dev_list, list)
1059 cma_listen_on_dev(id_priv, cma_dev);
1060 mutex_unlock(&lock);
1061}
1062
1063static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1064{
1065 struct sockaddr_in addr_in;
1066
1067 memset(&addr_in, 0, sizeof addr_in);
1068 addr_in.sin_family = af;
1069 return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1070}
1071
1072int rdma_listen(struct rdma_cm_id *id, int backlog)
1073{
1074 struct rdma_id_private *id_priv;
1075 int ret;
1076
1077 id_priv = container_of(id, struct rdma_id_private, id);
1078 if (id_priv->state == CMA_IDLE) {
1079 ret = cma_bind_any(id, AF_INET);
1080 if (ret)
1081 return ret;
1082 }
1083
1084 if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1085 return -EINVAL;
1086
1087 id_priv->backlog = backlog;
1088 if (id->device) {
1089 switch (id->device->node_type) {
1090 case IB_NODE_CA:
1091 ret = cma_ib_listen(id_priv);
1092 if (ret)
1093 goto err;
1094 break;
1095 default:
1096 ret = -ENOSYS;
1097 goto err;
1098 }
1099 } else
1100 cma_listen_on_all(id_priv);
1101
1102 return 0;
1103err:
1104 id_priv->backlog = 0;
1105 cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1106 return ret;
1107}
1108EXPORT_SYMBOL(rdma_listen);
1109
1110static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1111 void *context)
1112{
1113 struct cma_work *work = context;
1114 struct rdma_route *route;
1115
1116 route = &work->id->id.route;
1117
1118 if (!status) {
1119 route->num_paths = 1;
1120 *route->path_rec = *path_rec;
1121 } else {
1122 work->old_state = CMA_ROUTE_QUERY;
1123 work->new_state = CMA_ADDR_RESOLVED;
1124 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1125 }
1126
1127 queue_work(cma_wq, &work->work);
1128}
1129
1130static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1131 struct cma_work *work)
1132{
1133 struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
1134 struct ib_sa_path_rec path_rec;
1135
1136 memset(&path_rec, 0, sizeof path_rec);
1137 path_rec.sgid = *ib_addr_get_sgid(addr);
1138 path_rec.dgid = *ib_addr_get_dgid(addr);
1139 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
1140 path_rec.numb_path = 1;
1141
1142 id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
1143 id_priv->id.port_num, &path_rec,
1144 IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1145 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
1146 timeout_ms, GFP_KERNEL,
1147 cma_query_handler, work, &id_priv->query);
1148
1149 return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1150}
1151
1152static void cma_work_handler(void *data)
1153{
1154 struct cma_work *work = data;
1155 struct rdma_id_private *id_priv = work->id;
1156 int destroy = 0;
1157
1158 atomic_inc(&id_priv->dev_remove);
1159 if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1160 goto out;
1161
1162 if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1163 cma_exch(id_priv, CMA_DESTROYING);
1164 destroy = 1;
1165 }
1166out:
1167 cma_release_remove(id_priv);
1168 cma_deref_id(id_priv);
1169 if (destroy)
1170 rdma_destroy_id(&id_priv->id);
1171 kfree(work);
1172}
1173
1174static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1175{
1176 struct rdma_route *route = &id_priv->id.route;
1177 struct cma_work *work;
1178 int ret;
1179
1180 work = kzalloc(sizeof *work, GFP_KERNEL);
1181 if (!work)
1182 return -ENOMEM;
1183
1184 work->id = id_priv;
1185 INIT_WORK(&work->work, cma_work_handler, work);
1186 work->old_state = CMA_ROUTE_QUERY;
1187 work->new_state = CMA_ROUTE_RESOLVED;
1188 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1189
1190 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1191 if (!route->path_rec) {
1192 ret = -ENOMEM;
1193 goto err1;
1194 }
1195
1196 ret = cma_query_ib_route(id_priv, timeout_ms, work);
1197 if (ret)
1198 goto err2;
1199
1200 return 0;
1201err2:
1202 kfree(route->path_rec);
1203 route->path_rec = NULL;
1204err1:
1205 kfree(work);
1206 return ret;
1207}
1208
1209int rdma_set_ib_paths(struct rdma_cm_id *id,
1210 struct ib_sa_path_rec *path_rec, int num_paths)
1211{
1212 struct rdma_id_private *id_priv;
1213 int ret;
1214
1215 id_priv = container_of(id, struct rdma_id_private, id);
1216 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1217 return -EINVAL;
1218
1219 id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1220 if (!id->route.path_rec) {
1221 ret = -ENOMEM;
1222 goto err;
1223 }
1224
1225 memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1226 return 0;
1227err:
1228 cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1229 return ret;
1230}
1231EXPORT_SYMBOL(rdma_set_ib_paths);
1232
1233int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1234{
1235 struct rdma_id_private *id_priv;
1236 int ret;
1237
1238 id_priv = container_of(id, struct rdma_id_private, id);
1239 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1240 return -EINVAL;
1241
1242 atomic_inc(&id_priv->refcount);
1243 switch (id->device->node_type) {
1244 case IB_NODE_CA:
1245 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1246 break;
1247 default:
1248 ret = -ENOSYS;
1249 break;
1250 }
1251 if (ret)
1252 goto err;
1253
1254 return 0;
1255err:
1256 cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1257 cma_deref_id(id_priv);
1258 return ret;
1259}
1260EXPORT_SYMBOL(rdma_resolve_route);
1261
1262static int cma_bind_loopback(struct rdma_id_private *id_priv)
1263{
1264 struct cma_device *cma_dev;
1265 struct ib_port_attr port_attr;
1266 union ib_gid *gid;
1267 u16 pkey;
1268 int ret;
1269 u8 p;
1270
1271 mutex_lock(&lock);
1272 list_for_each_entry(cma_dev, &dev_list, list)
1273 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1274 if (!ib_query_port (cma_dev->device, p, &port_attr) &&
1275 port_attr.state == IB_PORT_ACTIVE)
1276 goto port_found;
1277
1278 if (!list_empty(&dev_list)) {
1279 p = 1;
1280 cma_dev = list_entry(dev_list.next, struct cma_device, list);
1281 } else {
1282 ret = -ENODEV;
1283 goto out;
1284 }
1285
1286port_found:
1287 gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr);
1288 ret = ib_get_cached_gid(cma_dev->device, p, 0, gid);
1289 if (ret)
1290 goto out;
1291
1292 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1293 if (ret)
1294 goto out;
1295
1296 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1297 id_priv->id.port_num = p;
1298 cma_attach_to_dev(id_priv, cma_dev);
1299out:
1300 mutex_unlock(&lock);
1301 return ret;
1302}
1303
1304static void addr_handler(int status, struct sockaddr *src_addr,
1305 struct rdma_dev_addr *dev_addr, void *context)
1306{
1307 struct rdma_id_private *id_priv = context;
1308 enum rdma_cm_event_type event;
1309
1310 atomic_inc(&id_priv->dev_remove);
1311 if (!id_priv->cma_dev && !status)
1312 status = cma_acquire_dev(id_priv);
1313
1314 if (status) {
1315 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
1316 goto out;
1317 event = RDMA_CM_EVENT_ADDR_ERROR;
1318 } else {
1319 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
1320 goto out;
1321 memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1322 ip_addr_size(src_addr));
1323 event = RDMA_CM_EVENT_ADDR_RESOLVED;
1324 }
1325
1326 if (cma_notify_user(id_priv, event, status, NULL, 0)) {
1327 cma_exch(id_priv, CMA_DESTROYING);
1328 cma_release_remove(id_priv);
1329 cma_deref_id(id_priv);
1330 rdma_destroy_id(&id_priv->id);
1331 return;
1332 }
1333out:
1334 cma_release_remove(id_priv);
1335 cma_deref_id(id_priv);
1336}
1337
1338static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1339{
1340 struct cma_work *work;
1341 struct sockaddr_in *src_in, *dst_in;
1342 int ret;
1343
1344 work = kzalloc(sizeof *work, GFP_KERNEL);
1345 if (!work)
1346 return -ENOMEM;
1347
1348 if (!id_priv->cma_dev) {
1349 ret = cma_bind_loopback(id_priv);
1350 if (ret)
1351 goto err;
1352 }
1353
1354 ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr,
1355 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr));
1356
1357 if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1358 src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1359 dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1360 src_in->sin_family = dst_in->sin_family;
1361 src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1362 }
1363
1364 work->id = id_priv;
1365 INIT_WORK(&work->work, cma_work_handler, work);
1366 work->old_state = CMA_ADDR_QUERY;
1367 work->new_state = CMA_ADDR_RESOLVED;
1368 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1369 queue_work(cma_wq, &work->work);
1370 return 0;
1371err:
1372 kfree(work);
1373 return ret;
1374}
1375
1376static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1377 struct sockaddr *dst_addr)
1378{
1379 if (src_addr && src_addr->sa_family)
1380 return rdma_bind_addr(id, src_addr);
1381 else
1382 return cma_bind_any(id, dst_addr->sa_family);
1383}
1384
1385int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1386 struct sockaddr *dst_addr, int timeout_ms)
1387{
1388 struct rdma_id_private *id_priv;
1389 int ret;
1390
1391 id_priv = container_of(id, struct rdma_id_private, id);
1392 if (id_priv->state == CMA_IDLE) {
1393 ret = cma_bind_addr(id, src_addr, dst_addr);
1394 if (ret)
1395 return ret;
1396 }
1397
1398 if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1399 return -EINVAL;
1400
1401 atomic_inc(&id_priv->refcount);
1402 memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1403 if (cma_any_addr(dst_addr))
1404 ret = cma_resolve_loopback(id_priv);
1405 else
1406 ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr,
1407 &id->route.addr.dev_addr,
1408 timeout_ms, addr_handler, id_priv);
1409 if (ret)
1410 goto err;
1411
1412 return 0;
1413err:
1414 cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1415 cma_deref_id(id_priv);
1416 return ret;
1417}
1418EXPORT_SYMBOL(rdma_resolve_addr);
1419
1420static void cma_bind_port(struct rdma_bind_list *bind_list,
1421 struct rdma_id_private *id_priv)
1422{
1423 struct sockaddr_in *sin;
1424
1425 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1426 sin->sin_port = htons(bind_list->port);
1427 id_priv->bind_list = bind_list;
1428 hlist_add_head(&id_priv->node, &bind_list->owners);
1429}
1430
1431static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1432 unsigned short snum)
1433{
1434 struct rdma_bind_list *bind_list;
1435 int port, start, ret;
1436
1437 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1438 if (!bind_list)
1439 return -ENOMEM;
1440
1441 start = snum ? snum : sysctl_local_port_range[0];
1442
1443 do {
1444 ret = idr_get_new_above(ps, bind_list, start, &port);
1445 } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1446
1447 if (ret)
1448 goto err;
1449
1450 if ((snum && port != snum) ||
1451 (!snum && port > sysctl_local_port_range[1])) {
1452 idr_remove(ps, port);
1453 ret = -EADDRNOTAVAIL;
1454 goto err;
1455 }
1456
1457 bind_list->ps = ps;
1458 bind_list->port = (unsigned short) port;
1459 cma_bind_port(bind_list, id_priv);
1460 return 0;
1461err:
1462 kfree(bind_list);
1463 return ret;
1464}
1465
1466static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
1467{
1468 struct rdma_id_private *cur_id;
1469 struct sockaddr_in *sin, *cur_sin;
1470 struct rdma_bind_list *bind_list;
1471 struct hlist_node *node;
1472 unsigned short snum;
1473
1474 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1475 snum = ntohs(sin->sin_port);
1476 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
1477 return -EACCES;
1478
1479 bind_list = idr_find(ps, snum);
1480 if (!bind_list)
1481 return cma_alloc_port(ps, id_priv, snum);
1482
1483 /*
1484 * We don't support binding to any address if anyone is bound to
1485 * a specific address on the same port.
1486 */
1487 if (cma_any_addr(&id_priv->id.route.addr.src_addr))
1488 return -EADDRNOTAVAIL;
1489
1490 hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
1491 if (cma_any_addr(&cur_id->id.route.addr.src_addr))
1492 return -EADDRNOTAVAIL;
1493
1494 cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
1495 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
1496 return -EADDRINUSE;
1497 }
1498
1499 cma_bind_port(bind_list, id_priv);
1500 return 0;
1501}
1502
1503static int cma_get_port(struct rdma_id_private *id_priv)
1504{
1505 struct idr *ps;
1506 int ret;
1507
1508 switch (id_priv->id.ps) {
1509 case RDMA_PS_SDP:
1510 ps = &sdp_ps;
1511 break;
1512 case RDMA_PS_TCP:
1513 ps = &tcp_ps;
1514 break;
1515 default:
1516 return -EPROTONOSUPPORT;
1517 }
1518
1519 mutex_lock(&lock);
1520 if (cma_any_port(&id_priv->id.route.addr.src_addr))
1521 ret = cma_alloc_port(ps, id_priv, 0);
1522 else
1523 ret = cma_use_port(ps, id_priv);
1524 mutex_unlock(&lock);
1525
1526 return ret;
1527}
1528
1529int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
1530{
1531 struct rdma_id_private *id_priv;
1532 int ret;
1533
1534 if (addr->sa_family != AF_INET)
1535 return -EAFNOSUPPORT;
1536
1537 id_priv = container_of(id, struct rdma_id_private, id);
1538 if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
1539 return -EINVAL;
1540
1541 if (!cma_any_addr(addr)) {
1542 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
1543 if (!ret)
1544 ret = cma_acquire_dev(id_priv);
1545 if (ret)
1546 goto err;
1547 }
1548
1549 memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
1550 ret = cma_get_port(id_priv);
1551 if (ret)
1552 goto err;
1553
1554 return 0;
1555err:
1556 cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
1557 return ret;
1558}
1559EXPORT_SYMBOL(rdma_bind_addr);
1560
1561static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
1562 struct rdma_route *route)
1563{
1564 struct sockaddr_in *src4, *dst4;
1565 struct cma_hdr *cma_hdr;
1566 struct sdp_hh *sdp_hdr;
1567
1568 src4 = (struct sockaddr_in *) &route->addr.src_addr;
1569 dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
1570
1571 switch (ps) {
1572 case RDMA_PS_SDP:
1573 sdp_hdr = hdr;
1574 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
1575 return -EINVAL;
1576 sdp_set_ip_ver(sdp_hdr, 4);
1577 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
1578 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
1579 sdp_hdr->port = src4->sin_port;
1580 break;
1581 default:
1582 cma_hdr = hdr;
1583 cma_hdr->cma_version = CMA_VERSION;
1584 cma_set_ip_ver(cma_hdr, 4);
1585 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
1586 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
1587 cma_hdr->port = src4->sin_port;
1588 break;
1589 }
1590 return 0;
1591}
1592
1593static int cma_connect_ib(struct rdma_id_private *id_priv,
1594 struct rdma_conn_param *conn_param)
1595{
1596 struct ib_cm_req_param req;
1597 struct rdma_route *route;
1598 void *private_data;
1599 int offset, ret;
1600
1601 memset(&req, 0, sizeof req);
1602 offset = cma_user_data_offset(id_priv->id.ps);
1603 req.private_data_len = offset + conn_param->private_data_len;
1604 private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
1605 if (!private_data)
1606 return -ENOMEM;
1607
1608 if (conn_param->private_data && conn_param->private_data_len)
1609 memcpy(private_data + offset, conn_param->private_data,
1610 conn_param->private_data_len);
1611
1612 id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
1613 id_priv);
1614 if (IS_ERR(id_priv->cm_id.ib)) {
1615 ret = PTR_ERR(id_priv->cm_id.ib);
1616 goto out;
1617 }
1618
1619 route = &id_priv->id.route;
1620 ret = cma_format_hdr(private_data, id_priv->id.ps, route);
1621 if (ret)
1622 goto out;
1623 req.private_data = private_data;
1624
1625 req.primary_path = &route->path_rec[0];
1626 if (route->num_paths == 2)
1627 req.alternate_path = &route->path_rec[1];
1628
1629 req.service_id = cma_get_service_id(id_priv->id.ps,
1630 &route->addr.dst_addr);
1631 req.qp_num = id_priv->qp_num;
1632 req.qp_type = id_priv->qp_type;
1633 req.starting_psn = id_priv->seq_num;
1634 req.responder_resources = conn_param->responder_resources;
1635 req.initiator_depth = conn_param->initiator_depth;
1636 req.flow_control = conn_param->flow_control;
1637 req.retry_count = conn_param->retry_count;
1638 req.rnr_retry_count = conn_param->rnr_retry_count;
1639 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
1640 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
1641 req.max_cm_retries = CMA_MAX_CM_RETRIES;
1642 req.srq = id_priv->srq ? 1 : 0;
1643
1644 ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
1645out:
1646 kfree(private_data);
1647 return ret;
1648}
1649
1650int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1651{
1652 struct rdma_id_private *id_priv;
1653 int ret;
1654
1655 id_priv = container_of(id, struct rdma_id_private, id);
1656 if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
1657 return -EINVAL;
1658
1659 if (!id->qp) {
1660 id_priv->qp_num = conn_param->qp_num;
1661 id_priv->qp_type = conn_param->qp_type;
1662 id_priv->srq = conn_param->srq;
1663 }
1664
1665 switch (id->device->node_type) {
1666 case IB_NODE_CA:
1667 ret = cma_connect_ib(id_priv, conn_param);
1668 break;
1669 default:
1670 ret = -ENOSYS;
1671 break;
1672 }
1673 if (ret)
1674 goto err;
1675
1676 return 0;
1677err:
1678 cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
1679 return ret;
1680}
1681EXPORT_SYMBOL(rdma_connect);
1682
1683static int cma_accept_ib(struct rdma_id_private *id_priv,
1684 struct rdma_conn_param *conn_param)
1685{
1686 struct ib_cm_rep_param rep;
1687 int ret;
1688
1689 ret = cma_modify_qp_rtr(&id_priv->id);
1690 if (ret)
1691 return ret;
1692
1693 memset(&rep, 0, sizeof rep);
1694 rep.qp_num = id_priv->qp_num;
1695 rep.starting_psn = id_priv->seq_num;
1696 rep.private_data = conn_param->private_data;
1697 rep.private_data_len = conn_param->private_data_len;
1698 rep.responder_resources = conn_param->responder_resources;
1699 rep.initiator_depth = conn_param->initiator_depth;
1700 rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
1701 rep.failover_accepted = 0;
1702 rep.flow_control = conn_param->flow_control;
1703 rep.rnr_retry_count = conn_param->rnr_retry_count;
1704 rep.srq = id_priv->srq ? 1 : 0;
1705
1706 return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
1707}
1708
1709int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1710{
1711 struct rdma_id_private *id_priv;
1712 int ret;
1713
1714 id_priv = container_of(id, struct rdma_id_private, id);
1715 if (!cma_comp(id_priv, CMA_CONNECT))
1716 return -EINVAL;
1717
1718 if (!id->qp && conn_param) {
1719 id_priv->qp_num = conn_param->qp_num;
1720 id_priv->qp_type = conn_param->qp_type;
1721 id_priv->srq = conn_param->srq;
1722 }
1723
1724 switch (id->device->node_type) {
1725 case IB_NODE_CA:
1726 if (conn_param)
1727 ret = cma_accept_ib(id_priv, conn_param);
1728 else
1729 ret = cma_rep_recv(id_priv);
1730 break;
1731 default:
1732 ret = -ENOSYS;
1733 break;
1734 }
1735
1736 if (ret)
1737 goto reject;
1738
1739 return 0;
1740reject:
1741 cma_modify_qp_err(id);
1742 rdma_reject(id, NULL, 0);
1743 return ret;
1744}
1745EXPORT_SYMBOL(rdma_accept);
1746
1747int rdma_reject(struct rdma_cm_id *id, const void *private_data,
1748 u8 private_data_len)
1749{
1750 struct rdma_id_private *id_priv;
1751 int ret;
1752
1753 id_priv = container_of(id, struct rdma_id_private, id);
1754 if (!cma_comp(id_priv, CMA_CONNECT))
1755 return -EINVAL;
1756
1757 switch (id->device->node_type) {
1758 case IB_NODE_CA:
1759 ret = ib_send_cm_rej(id_priv->cm_id.ib,
1760 IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1761 private_data, private_data_len);
1762 break;
1763 default:
1764 ret = -ENOSYS;
1765 break;
1766 }
1767 return ret;
1768}
1769EXPORT_SYMBOL(rdma_reject);
1770
1771int rdma_disconnect(struct rdma_cm_id *id)
1772{
1773 struct rdma_id_private *id_priv;
1774 int ret;
1775
1776 id_priv = container_of(id, struct rdma_id_private, id);
1777 if (!cma_comp(id_priv, CMA_CONNECT) &&
1778 !cma_comp(id_priv, CMA_DISCONNECT))
1779 return -EINVAL;
1780
1781 ret = cma_modify_qp_err(id);
1782 if (ret)
1783 goto out;
1784
1785 switch (id->device->node_type) {
1786 case IB_NODE_CA:
1787 /* Initiate or respond to a disconnect. */
1788 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
1789 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
1790 break;
1791 default:
1792 break;
1793 }
1794out:
1795 return ret;
1796}
1797EXPORT_SYMBOL(rdma_disconnect);
1798
1799static void cma_add_one(struct ib_device *device)
1800{
1801 struct cma_device *cma_dev;
1802 struct rdma_id_private *id_priv;
1803
1804 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
1805 if (!cma_dev)
1806 return;
1807
1808 cma_dev->device = device;
1809 cma_dev->node_guid = device->node_guid;
1810 if (!cma_dev->node_guid)
1811 goto err;
1812
1813 init_completion(&cma_dev->comp);
1814 atomic_set(&cma_dev->refcount, 1);
1815 INIT_LIST_HEAD(&cma_dev->id_list);
1816 ib_set_client_data(device, &cma_client, cma_dev);
1817
1818 mutex_lock(&lock);
1819 list_add_tail(&cma_dev->list, &dev_list);
1820 list_for_each_entry(id_priv, &listen_any_list, list)
1821 cma_listen_on_dev(id_priv, cma_dev);
1822 mutex_unlock(&lock);
1823 return;
1824err:
1825 kfree(cma_dev);
1826}
1827
1828static int cma_remove_id_dev(struct rdma_id_private *id_priv)
1829{
1830 enum cma_state state;
1831
1832 /* Record that we want to remove the device */
1833 state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
1834 if (state == CMA_DESTROYING)
1835 return 0;
1836
1837 cma_cancel_operation(id_priv, state);
1838 wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove));
1839
1840 /* Check for destruction from another callback. */
1841 if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
1842 return 0;
1843
1844 return cma_notify_user(id_priv, RDMA_CM_EVENT_DEVICE_REMOVAL,
1845 0, NULL, 0);
1846}
1847
1848static void cma_process_remove(struct cma_device *cma_dev)
1849{
1850 struct list_head remove_list;
1851 struct rdma_id_private *id_priv;
1852 int ret;
1853
1854 INIT_LIST_HEAD(&remove_list);
1855
1856 mutex_lock(&lock);
1857 while (!list_empty(&cma_dev->id_list)) {
1858 id_priv = list_entry(cma_dev->id_list.next,
1859 struct rdma_id_private, list);
1860
1861 if (cma_internal_listen(id_priv)) {
1862 cma_destroy_listen(id_priv);
1863 continue;
1864 }
1865
1866 list_del(&id_priv->list);
1867 list_add_tail(&id_priv->list, &remove_list);
1868 atomic_inc(&id_priv->refcount);
1869 mutex_unlock(&lock);
1870
1871 ret = cma_remove_id_dev(id_priv);
1872 cma_deref_id(id_priv);
1873 if (ret)
1874 rdma_destroy_id(&id_priv->id);
1875
1876 mutex_lock(&lock);
1877 }
1878 mutex_unlock(&lock);
1879
1880 cma_deref_dev(cma_dev);
1881 wait_for_completion(&cma_dev->comp);
1882}
1883
1884static void cma_remove_one(struct ib_device *device)
1885{
1886 struct cma_device *cma_dev;
1887
1888 cma_dev = ib_get_client_data(device, &cma_client);
1889 if (!cma_dev)
1890 return;
1891
1892 mutex_lock(&lock);
1893 list_del(&cma_dev->list);
1894 mutex_unlock(&lock);
1895
1896 cma_process_remove(cma_dev);
1897 kfree(cma_dev);
1898}
1899
1900static int cma_init(void)
1901{
1902 int ret;
1903
1904 cma_wq = create_singlethread_workqueue("rdma_cm_wq");
1905 if (!cma_wq)
1906 return -ENOMEM;
1907
1908 ret = ib_register_client(&cma_client);
1909 if (ret)
1910 goto err;
1911 return 0;
1912
1913err:
1914 destroy_workqueue(cma_wq);
1915 return ret;
1916}
1917
1918static void cma_cleanup(void)
1919{
1920 ib_unregister_client(&cma_client);
1921 destroy_workqueue(cma_wq);
1922 idr_destroy(&sdp_ps);
1923 idr_destroy(&tcp_ps);
1924}
1925
1926module_init(cma_init);
1927module_exit(cma_cleanup);