aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorSean Hefty <sean.hefty@intel.com>2007-02-15 20:00:18 -0500
committerRoland Dreier <rolandd@cisco.com>2007-02-16 17:29:07 -0500
commitc8f6a362bf3eb28ade6027b49bb160a336dd51c0 (patch)
treeb9b19599c9fddea313725a359283bc624a86c1ec /drivers/infiniband/core
parentfaec2f7b96b555055d0aa6cc6b83a537270bed52 (diff)
RDMA/cma: Add multicast communication support
Extend rdma_cm to support multicast communication. Multicast support is added to the existing RDMA_PS_UDP port space, as well as a new RDMA_PS_IPOIB port space. The latter port space allows joining the multicast groups used by IPoIB, which enables offloading IPoIB traffic to a separate QP. The port space determines the signature used in the MGID when joining the group. The newly added RDMA_PS_IPOIB also allows for unicast operations, similar to RDMA_PS_UDP. Supporting the RDMA_PS_IPOIB requires changing how UD QPs are initialized, since we can no longer assume that the qkey is constant. This requires saving the Q_Key to use when attaching to a device, so that it is available when creating the QP. The Q_Key information is exported to the user through the existing rdma_init_qp_attr() interface. Multicast support is also exported to userspace through the rdma_ucm. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/cma.c359
-rw-r--r--drivers/infiniband/core/ucma.c204
2 files changed, 516 insertions, 47 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index db88e609bf42..f8d69b3fa307 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -71,6 +71,7 @@ static struct workqueue_struct *cma_wq;
71static DEFINE_IDR(sdp_ps); 71static DEFINE_IDR(sdp_ps);
72static DEFINE_IDR(tcp_ps); 72static DEFINE_IDR(tcp_ps);
73static DEFINE_IDR(udp_ps); 73static DEFINE_IDR(udp_ps);
74static DEFINE_IDR(ipoib_ps);
74static int next_port; 75static int next_port;
75 76
76struct cma_device { 77struct cma_device {
@@ -116,6 +117,7 @@ struct rdma_id_private {
116 struct list_head list; 117 struct list_head list;
117 struct list_head listen_list; 118 struct list_head listen_list;
118 struct cma_device *cma_dev; 119 struct cma_device *cma_dev;
120 struct list_head mc_list;
119 121
120 enum cma_state state; 122 enum cma_state state;
121 spinlock_t lock; 123 spinlock_t lock;
@@ -134,10 +136,23 @@ struct rdma_id_private {
134 } cm_id; 136 } cm_id;
135 137
136 u32 seq_num; 138 u32 seq_num;
139 u32 qkey;
137 u32 qp_num; 140 u32 qp_num;
138 u8 srq; 141 u8 srq;
139}; 142};
140 143
144struct cma_multicast {
145 struct rdma_id_private *id_priv;
146 union {
147 struct ib_sa_multicast *ib;
148 } multicast;
149 struct list_head list;
150 void *context;
151 struct sockaddr addr;
152 u8 pad[sizeof(struct sockaddr_in6) -
153 sizeof(struct sockaddr)];
154};
155
141struct cma_work { 156struct cma_work {
142 struct work_struct work; 157 struct work_struct work;
143 struct rdma_id_private *id; 158 struct rdma_id_private *id;
@@ -243,6 +258,11 @@ static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
243 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); 258 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
244} 259}
245 260
261static inline int cma_is_ud_ps(enum rdma_port_space ps)
262{
263 return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
264}
265
246static void cma_attach_to_dev(struct rdma_id_private *id_priv, 266static void cma_attach_to_dev(struct rdma_id_private *id_priv,
247 struct cma_device *cma_dev) 267 struct cma_device *cma_dev)
248{ 268{
@@ -265,19 +285,41 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
265 id_priv->cma_dev = NULL; 285 id_priv->cma_dev = NULL;
266} 286}
267 287
288static int cma_set_qkey(struct ib_device *device, u8 port_num,
289 enum rdma_port_space ps,
290 struct rdma_dev_addr *dev_addr, u32 *qkey)
291{
292 struct ib_sa_mcmember_rec rec;
293 int ret = 0;
294
295 switch (ps) {
296 case RDMA_PS_UDP:
297 *qkey = RDMA_UDP_QKEY;
298 break;
299 case RDMA_PS_IPOIB:
300 ib_addr_get_mgid(dev_addr, &rec.mgid);
301 ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
302 *qkey = be32_to_cpu(rec.qkey);
303 break;
304 default:
305 break;
306 }
307 return ret;
308}
309
268static int cma_acquire_dev(struct rdma_id_private *id_priv) 310static int cma_acquire_dev(struct rdma_id_private *id_priv)
269{ 311{
270 enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type; 312 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
271 struct cma_device *cma_dev; 313 struct cma_device *cma_dev;
272 union ib_gid gid; 314 union ib_gid gid;
273 int ret = -ENODEV; 315 int ret = -ENODEV;
274 316
275 switch (rdma_node_get_transport(dev_type)) { 317 switch (rdma_node_get_transport(dev_addr->dev_type)) {
276 case RDMA_TRANSPORT_IB: 318 case RDMA_TRANSPORT_IB:
277 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 319 ib_addr_get_sgid(dev_addr, &gid);
278 break; 320 break;
279 case RDMA_TRANSPORT_IWARP: 321 case RDMA_TRANSPORT_IWARP:
280 iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 322 iw_addr_get_sgid(dev_addr, &gid);
281 break; 323 break;
282 default: 324 default:
283 return -ENODEV; 325 return -ENODEV;
@@ -287,7 +329,12 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
287 ret = ib_find_cached_gid(cma_dev->device, &gid, 329 ret = ib_find_cached_gid(cma_dev->device, &gid,
288 &id_priv->id.port_num, NULL); 330 &id_priv->id.port_num, NULL);
289 if (!ret) { 331 if (!ret) {
290 cma_attach_to_dev(id_priv, cma_dev); 332 ret = cma_set_qkey(cma_dev->device,
333 id_priv->id.port_num,
334 id_priv->id.ps, dev_addr,
335 &id_priv->qkey);
336 if (!ret)
337 cma_attach_to_dev(id_priv, cma_dev);
291 break; 338 break;
292 } 339 }
293 } 340 }
@@ -325,40 +372,50 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
325 init_waitqueue_head(&id_priv->wait_remove); 372 init_waitqueue_head(&id_priv->wait_remove);
326 atomic_set(&id_priv->dev_remove, 0); 373 atomic_set(&id_priv->dev_remove, 0);
327 INIT_LIST_HEAD(&id_priv->listen_list); 374 INIT_LIST_HEAD(&id_priv->listen_list);
375 INIT_LIST_HEAD(&id_priv->mc_list);
328 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 376 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
329 377
330 return &id_priv->id; 378 return &id_priv->id;
331} 379}
332EXPORT_SYMBOL(rdma_create_id); 380EXPORT_SYMBOL(rdma_create_id);
333 381
334static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 382static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
335{ 383{
336 struct ib_qp_attr qp_attr; 384 struct ib_qp_attr qp_attr;
337 struct rdma_dev_addr *dev_addr; 385 int qp_attr_mask, ret;
338 int ret;
339 386
340 dev_addr = &id_priv->id.route.addr.dev_addr; 387 qp_attr.qp_state = IB_QPS_INIT;
341 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 388 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
342 ib_addr_get_pkey(dev_addr),
343 &qp_attr.pkey_index);
344 if (ret) 389 if (ret)
345 return ret; 390 return ret;
346 391
347 qp_attr.qp_state = IB_QPS_INIT; 392 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
348 qp_attr.qp_access_flags = 0; 393 if (ret)
349 qp_attr.port_num = id_priv->id.port_num; 394 return ret;
350 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS | 395
351 IB_QP_PKEY_INDEX | IB_QP_PORT); 396 qp_attr.qp_state = IB_QPS_RTR;
397 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
398 if (ret)
399 return ret;
400
401 qp_attr.qp_state = IB_QPS_RTS;
402 qp_attr.sq_psn = 0;
403 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
404
405 return ret;
352} 406}
353 407
354static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 408static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
355{ 409{
356 struct ib_qp_attr qp_attr; 410 struct ib_qp_attr qp_attr;
411 int qp_attr_mask, ret;
357 412
358 qp_attr.qp_state = IB_QPS_INIT; 413 qp_attr.qp_state = IB_QPS_INIT;
359 qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; 414 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
415 if (ret)
416 return ret;
360 417
361 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS); 418 return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
362} 419}
363 420
364int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 421int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
@@ -376,18 +433,10 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
376 if (IS_ERR(qp)) 433 if (IS_ERR(qp))
377 return PTR_ERR(qp); 434 return PTR_ERR(qp);
378 435
379 switch (rdma_node_get_transport(id->device->node_type)) { 436 if (cma_is_ud_ps(id_priv->id.ps))
380 case RDMA_TRANSPORT_IB: 437 ret = cma_init_ud_qp(id_priv, qp);
381 ret = cma_init_ib_qp(id_priv, qp); 438 else
382 break; 439 ret = cma_init_conn_qp(id_priv, qp);
383 case RDMA_TRANSPORT_IWARP:
384 ret = cma_init_iw_qp(id_priv, qp);
385 break;
386 default:
387 ret = -ENOSYS;
388 break;
389 }
390
391 if (ret) 440 if (ret)
392 goto err; 441 goto err;
393 442
@@ -460,23 +509,55 @@ static int cma_modify_qp_err(struct rdma_cm_id *id)
460 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); 509 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
461} 510}
462 511
512static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
513 struct ib_qp_attr *qp_attr, int *qp_attr_mask)
514{
515 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
516 int ret;
517
518 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
519 ib_addr_get_pkey(dev_addr),
520 &qp_attr->pkey_index);
521 if (ret)
522 return ret;
523
524 qp_attr->port_num = id_priv->id.port_num;
525 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
526
527 if (cma_is_ud_ps(id_priv->id.ps)) {
528 qp_attr->qkey = id_priv->qkey;
529 *qp_attr_mask |= IB_QP_QKEY;
530 } else {
531 qp_attr->qp_access_flags = 0;
532 *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
533 }
534 return 0;
535}
536
463int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 537int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
464 int *qp_attr_mask) 538 int *qp_attr_mask)
465{ 539{
466 struct rdma_id_private *id_priv; 540 struct rdma_id_private *id_priv;
467 int ret; 541 int ret = 0;
468 542
469 id_priv = container_of(id, struct rdma_id_private, id); 543 id_priv = container_of(id, struct rdma_id_private, id);
470 switch (rdma_node_get_transport(id_priv->id.device->node_type)) { 544 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
471 case RDMA_TRANSPORT_IB: 545 case RDMA_TRANSPORT_IB:
472 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 546 if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
473 qp_attr_mask); 547 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
548 else
549 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
550 qp_attr_mask);
474 if (qp_attr->qp_state == IB_QPS_RTR) 551 if (qp_attr->qp_state == IB_QPS_RTR)
475 qp_attr->rq_psn = id_priv->seq_num; 552 qp_attr->rq_psn = id_priv->seq_num;
476 break; 553 break;
477 case RDMA_TRANSPORT_IWARP: 554 case RDMA_TRANSPORT_IWARP:
478 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 555 if (!id_priv->cm_id.iw) {
479 qp_attr_mask); 556 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
557 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
558 } else
559 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
560 qp_attr_mask);
480 break; 561 break;
481 default: 562 default:
482 ret = -ENOSYS; 563 ret = -ENOSYS;
@@ -698,6 +779,19 @@ static void cma_release_port(struct rdma_id_private *id_priv)
698 mutex_unlock(&lock); 779 mutex_unlock(&lock);
699} 780}
700 781
782static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
783{
784 struct cma_multicast *mc;
785
786 while (!list_empty(&id_priv->mc_list)) {
787 mc = container_of(id_priv->mc_list.next,
788 struct cma_multicast, list);
789 list_del(&mc->list);
790 ib_sa_free_multicast(mc->multicast.ib);
791 kfree(mc);
792 }
793}
794
701void rdma_destroy_id(struct rdma_cm_id *id) 795void rdma_destroy_id(struct rdma_cm_id *id)
702{ 796{
703 struct rdma_id_private *id_priv; 797 struct rdma_id_private *id_priv;
@@ -722,6 +816,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
722 default: 816 default:
723 break; 817 break;
724 } 818 }
819 cma_leave_mc_groups(id_priv);
725 mutex_lock(&lock); 820 mutex_lock(&lock);
726 cma_detach_from_dev(id_priv); 821 cma_detach_from_dev(id_priv);
727 } 822 }
@@ -972,7 +1067,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
972 memset(&event, 0, sizeof event); 1067 memset(&event, 0, sizeof event);
973 offset = cma_user_data_offset(listen_id->id.ps); 1068 offset = cma_user_data_offset(listen_id->id.ps);
974 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1069 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
975 if (listen_id->id.ps == RDMA_PS_UDP) { 1070 if (cma_is_ud_ps(listen_id->id.ps)) {
976 conn_id = cma_new_udp_id(&listen_id->id, ib_event); 1071 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
977 event.param.ud.private_data = ib_event->private_data + offset; 1072 event.param.ud.private_data = ib_event->private_data + offset;
978 event.param.ud.private_data_len = 1073 event.param.ud.private_data_len =
@@ -1725,7 +1820,7 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1725 struct rdma_bind_list *bind_list; 1820 struct rdma_bind_list *bind_list;
1726 int port, ret; 1821 int port, ret;
1727 1822
1728 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 1823 bind_list = kmalloc(sizeof *bind_list, GFP_KERNEL);
1729 if (!bind_list) 1824 if (!bind_list)
1730 return -ENOMEM; 1825 return -ENOMEM;
1731 1826
@@ -1847,6 +1942,9 @@ static int cma_get_port(struct rdma_id_private *id_priv)
1847 case RDMA_PS_UDP: 1942 case RDMA_PS_UDP:
1848 ps = &udp_ps; 1943 ps = &udp_ps;
1849 break; 1944 break;
1945 case RDMA_PS_IPOIB:
1946 ps = &ipoib_ps;
1947 break;
1850 default: 1948 default:
1851 return -EPROTONOSUPPORT; 1949 return -EPROTONOSUPPORT;
1852 } 1950 }
@@ -1961,7 +2059,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
1961 event.status = ib_event->param.sidr_rep_rcvd.status; 2059 event.status = ib_event->param.sidr_rep_rcvd.status;
1962 break; 2060 break;
1963 } 2061 }
1964 if (rep->qkey != RDMA_UD_QKEY) { 2062 if (id_priv->qkey != rep->qkey) {
1965 event.event = RDMA_CM_EVENT_UNREACHABLE; 2063 event.event = RDMA_CM_EVENT_UNREACHABLE;
1966 event.status = -EINVAL; 2064 event.status = -EINVAL;
1967 break; 2065 break;
@@ -2160,7 +2258,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2160 2258
2161 switch (rdma_node_get_transport(id->device->node_type)) { 2259 switch (rdma_node_get_transport(id->device->node_type)) {
2162 case RDMA_TRANSPORT_IB: 2260 case RDMA_TRANSPORT_IB:
2163 if (id->ps == RDMA_PS_UDP) 2261 if (cma_is_ud_ps(id->ps))
2164 ret = cma_resolve_ib_udp(id_priv, conn_param); 2262 ret = cma_resolve_ib_udp(id_priv, conn_param);
2165 else 2263 else
2166 ret = cma_connect_ib(id_priv, conn_param); 2264 ret = cma_connect_ib(id_priv, conn_param);
@@ -2256,7 +2354,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2256 rep.status = status; 2354 rep.status = status;
2257 if (status == IB_SIDR_SUCCESS) { 2355 if (status == IB_SIDR_SUCCESS) {
2258 rep.qp_num = id_priv->qp_num; 2356 rep.qp_num = id_priv->qp_num;
2259 rep.qkey = RDMA_UD_QKEY; 2357 rep.qkey = id_priv->qkey;
2260 } 2358 }
2261 rep.private_data = private_data; 2359 rep.private_data = private_data;
2262 rep.private_data_len = private_data_len; 2360 rep.private_data_len = private_data_len;
@@ -2280,7 +2378,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2280 2378
2281 switch (rdma_node_get_transport(id->device->node_type)) { 2379 switch (rdma_node_get_transport(id->device->node_type)) {
2282 case RDMA_TRANSPORT_IB: 2380 case RDMA_TRANSPORT_IB:
2283 if (id->ps == RDMA_PS_UDP) 2381 if (cma_is_ud_ps(id->ps))
2284 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 2382 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2285 conn_param->private_data, 2383 conn_param->private_data,
2286 conn_param->private_data_len); 2384 conn_param->private_data_len);
@@ -2341,7 +2439,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2341 2439
2342 switch (rdma_node_get_transport(id->device->node_type)) { 2440 switch (rdma_node_get_transport(id->device->node_type)) {
2343 case RDMA_TRANSPORT_IB: 2441 case RDMA_TRANSPORT_IB:
2344 if (id->ps == RDMA_PS_UDP) 2442 if (cma_is_ud_ps(id->ps))
2345 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 2443 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2346 private_data, private_data_len); 2444 private_data, private_data_len);
2347 else 2445 else
@@ -2392,6 +2490,178 @@ out:
2392} 2490}
2393EXPORT_SYMBOL(rdma_disconnect); 2491EXPORT_SYMBOL(rdma_disconnect);
2394 2492
2493static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2494{
2495 struct rdma_id_private *id_priv;
2496 struct cma_multicast *mc = multicast->context;
2497 struct rdma_cm_event event;
2498 int ret;
2499
2500 id_priv = mc->id_priv;
2501 atomic_inc(&id_priv->dev_remove);
2502 if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2503 !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2504 goto out;
2505
2506 if (!status && id_priv->id.qp)
2507 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2508 multicast->rec.mlid);
2509
2510 memset(&event, 0, sizeof event);
2511 event.status = status;
2512 event.param.ud.private_data = mc->context;
2513 if (!status) {
2514 event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2515 ib_init_ah_from_mcmember(id_priv->id.device,
2516 id_priv->id.port_num, &multicast->rec,
2517 &event.param.ud.ah_attr);
2518 event.param.ud.qp_num = 0xFFFFFF;
2519 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2520 } else
2521 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2522
2523 ret = id_priv->id.event_handler(&id_priv->id, &event);
2524 if (ret) {
2525 cma_exch(id_priv, CMA_DESTROYING);
2526 cma_release_remove(id_priv);
2527 rdma_destroy_id(&id_priv->id);
2528 return 0;
2529 }
2530out:
2531 cma_release_remove(id_priv);
2532 return 0;
2533}
2534
2535static void cma_set_mgid(struct rdma_id_private *id_priv,
2536 struct sockaddr *addr, union ib_gid *mgid)
2537{
2538 unsigned char mc_map[MAX_ADDR_LEN];
2539 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2540 struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2541 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2542
2543 if (cma_any_addr(addr)) {
2544 memset(mgid, 0, sizeof *mgid);
2545 } else if ((addr->sa_family == AF_INET6) &&
2546 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
2547 0xFF10A01B)) {
2548 /* IPv6 address is an SA assigned MGID. */
2549 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2550 } else {
2551 ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
2552 if (id_priv->id.ps == RDMA_PS_UDP)
2553 mc_map[7] = 0x01; /* Use RDMA CM signature */
2554 mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
2555 mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
2556 *mgid = *(union ib_gid *) (mc_map + 4);
2557 }
2558}
2559
2560static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2561 struct cma_multicast *mc)
2562{
2563 struct ib_sa_mcmember_rec rec;
2564 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2565 ib_sa_comp_mask comp_mask;
2566 int ret;
2567
2568 ib_addr_get_mgid(dev_addr, &rec.mgid);
2569 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
2570 &rec.mgid, &rec);
2571 if (ret)
2572 return ret;
2573
2574 cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
2575 if (id_priv->id.ps == RDMA_PS_UDP)
2576 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2577 ib_addr_get_sgid(dev_addr, &rec.port_gid);
2578 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2579 rec.join_state = 1;
2580
2581 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
2582 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
2583 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
2584 IB_SA_MCMEMBER_REC_FLOW_LABEL |
2585 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
2586
2587 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
2588 id_priv->id.port_num, &rec,
2589 comp_mask, GFP_KERNEL,
2590 cma_ib_mc_handler, mc);
2591 if (IS_ERR(mc->multicast.ib))
2592 return PTR_ERR(mc->multicast.ib);
2593
2594 return 0;
2595}
2596
2597int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2598 void *context)
2599{
2600 struct rdma_id_private *id_priv;
2601 struct cma_multicast *mc;
2602 int ret;
2603
2604 id_priv = container_of(id, struct rdma_id_private, id);
2605 if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2606 !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2607 return -EINVAL;
2608
2609 mc = kmalloc(sizeof *mc, GFP_KERNEL);
2610 if (!mc)
2611 return -ENOMEM;
2612
2613 memcpy(&mc->addr, addr, ip_addr_size(addr));
2614 mc->context = context;
2615 mc->id_priv = id_priv;
2616
2617 spin_lock(&id_priv->lock);
2618 list_add(&mc->list, &id_priv->mc_list);
2619 spin_unlock(&id_priv->lock);
2620
2621 switch (rdma_node_get_transport(id->device->node_type)) {
2622 case RDMA_TRANSPORT_IB:
2623 ret = cma_join_ib_multicast(id_priv, mc);
2624 break;
2625 default:
2626 ret = -ENOSYS;
2627 break;
2628 }
2629
2630 if (ret) {
2631 spin_lock_irq(&id_priv->lock);
2632 list_del(&mc->list);
2633 spin_unlock_irq(&id_priv->lock);
2634 kfree(mc);
2635 }
2636 return ret;
2637}
2638EXPORT_SYMBOL(rdma_join_multicast);
2639
2640void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2641{
2642 struct rdma_id_private *id_priv;
2643 struct cma_multicast *mc;
2644
2645 id_priv = container_of(id, struct rdma_id_private, id);
2646 spin_lock_irq(&id_priv->lock);
2647 list_for_each_entry(mc, &id_priv->mc_list, list) {
2648 if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
2649 list_del(&mc->list);
2650 spin_unlock_irq(&id_priv->lock);
2651
2652 if (id->qp)
2653 ib_detach_mcast(id->qp,
2654 &mc->multicast.ib->rec.mgid,
2655 mc->multicast.ib->rec.mlid);
2656 ib_sa_free_multicast(mc->multicast.ib);
2657 kfree(mc);
2658 return;
2659 }
2660 }
2661 spin_unlock_irq(&id_priv->lock);
2662}
2663EXPORT_SYMBOL(rdma_leave_multicast);
2664
2395static void cma_add_one(struct ib_device *device) 2665static void cma_add_one(struct ib_device *device)
2396{ 2666{
2397 struct cma_device *cma_dev; 2667 struct cma_device *cma_dev;
@@ -2522,6 +2792,7 @@ static void cma_cleanup(void)
2522 idr_destroy(&sdp_ps); 2792 idr_destroy(&sdp_ps);
2523 idr_destroy(&tcp_ps); 2793 idr_destroy(&tcp_ps);
2524 idr_destroy(&udp_ps); 2794 idr_destroy(&udp_ps);
2795 idr_destroy(&ipoib_ps);
2525} 2796}
2526 2797
2527module_init(cma_init); 2798module_init(cma_init);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 6b81b98961c7..b516b93b8550 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -70,10 +70,24 @@ struct ucma_context {
70 u64 uid; 70 u64 uid;
71 71
72 struct list_head list; 72 struct list_head list;
73 struct list_head mc_list;
74};
75
76struct ucma_multicast {
77 struct ucma_context *ctx;
78 int id;
79 int events_reported;
80
81 u64 uid;
82 struct list_head list;
83 struct sockaddr addr;
84 u8 pad[sizeof(struct sockaddr_in6) -
85 sizeof(struct sockaddr)];
73}; 86};
74 87
75struct ucma_event { 88struct ucma_event {
76 struct ucma_context *ctx; 89 struct ucma_context *ctx;
90 struct ucma_multicast *mc;
77 struct list_head list; 91 struct list_head list;
78 struct rdma_cm_id *cm_id; 92 struct rdma_cm_id *cm_id;
79 struct rdma_ucm_event_resp resp; 93 struct rdma_ucm_event_resp resp;
@@ -81,6 +95,7 @@ struct ucma_event {
81 95
82static DEFINE_MUTEX(mut); 96static DEFINE_MUTEX(mut);
83static DEFINE_IDR(ctx_idr); 97static DEFINE_IDR(ctx_idr);
98static DEFINE_IDR(multicast_idr);
84 99
85static inline struct ucma_context *_ucma_find_context(int id, 100static inline struct ucma_context *_ucma_find_context(int id,
86 struct ucma_file *file) 101 struct ucma_file *file)
@@ -124,6 +139,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
124 139
125 atomic_set(&ctx->ref, 1); 140 atomic_set(&ctx->ref, 1);
126 init_completion(&ctx->comp); 141 init_completion(&ctx->comp);
142 INIT_LIST_HEAD(&ctx->mc_list);
127 ctx->file = file; 143 ctx->file = file;
128 144
129 do { 145 do {
@@ -147,6 +163,37 @@ error:
147 return NULL; 163 return NULL;
148} 164}
149 165
166static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
167{
168 struct ucma_multicast *mc;
169 int ret;
170
171 mc = kzalloc(sizeof(*mc), GFP_KERNEL);
172 if (!mc)
173 return NULL;
174
175 do {
176 ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
177 if (!ret)
178 goto error;
179
180 mutex_lock(&mut);
181 ret = idr_get_new(&multicast_idr, mc, &mc->id);
182 mutex_unlock(&mut);
183 } while (ret == -EAGAIN);
184
185 if (ret)
186 goto error;
187
188 mc->ctx = ctx;
189 list_add_tail(&mc->list, &ctx->mc_list);
190 return mc;
191
192error:
193 kfree(mc);
194 return NULL;
195}
196
150static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, 197static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
151 struct rdma_conn_param *src) 198 struct rdma_conn_param *src)
152{ 199{
@@ -180,8 +227,19 @@ static void ucma_set_event_context(struct ucma_context *ctx,
180 struct ucma_event *uevent) 227 struct ucma_event *uevent)
181{ 228{
182 uevent->ctx = ctx; 229 uevent->ctx = ctx;
183 uevent->resp.uid = ctx->uid; 230 switch (event->event) {
184 uevent->resp.id = ctx->id; 231 case RDMA_CM_EVENT_MULTICAST_JOIN:
232 case RDMA_CM_EVENT_MULTICAST_ERROR:
233 uevent->mc = (struct ucma_multicast *)
234 event->param.ud.private_data;
235 uevent->resp.uid = uevent->mc->uid;
236 uevent->resp.id = uevent->mc->id;
237 break;
238 default:
239 uevent->resp.uid = ctx->uid;
240 uevent->resp.id = ctx->id;
241 break;
242 }
185} 243}
186 244
187static int ucma_event_handler(struct rdma_cm_id *cm_id, 245static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -199,7 +257,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
199 ucma_set_event_context(ctx, event, uevent); 257 ucma_set_event_context(ctx, event, uevent);
200 uevent->resp.event = event->event; 258 uevent->resp.event = event->event;
201 uevent->resp.status = event->status; 259 uevent->resp.status = event->status;
202 if (cm_id->ps == RDMA_PS_UDP) 260 if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
203 ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); 261 ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
204 else 262 else
205 ucma_copy_conn_event(&uevent->resp.param.conn, 263 ucma_copy_conn_event(&uevent->resp.param.conn,
@@ -290,6 +348,8 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
290 348
291 list_del(&uevent->list); 349 list_del(&uevent->list);
292 uevent->ctx->events_reported++; 350 uevent->ctx->events_reported++;
351 if (uevent->mc)
352 uevent->mc->events_reported++;
293 kfree(uevent); 353 kfree(uevent);
294done: 354done:
295 mutex_unlock(&file->mut); 355 mutex_unlock(&file->mut);
@@ -342,6 +402,19 @@ err1:
342 return ret; 402 return ret;
343} 403}
344 404
405static void ucma_cleanup_multicast(struct ucma_context *ctx)
406{
407 struct ucma_multicast *mc, *tmp;
408
409 mutex_lock(&mut);
410 list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
411 list_del(&mc->list);
412 idr_remove(&multicast_idr, mc->id);
413 kfree(mc);
414 }
415 mutex_unlock(&mut);
416}
417
345static void ucma_cleanup_events(struct ucma_context *ctx) 418static void ucma_cleanup_events(struct ucma_context *ctx)
346{ 419{
347 struct ucma_event *uevent, *tmp; 420 struct ucma_event *uevent, *tmp;
@@ -360,6 +433,19 @@ static void ucma_cleanup_events(struct ucma_context *ctx)
360 } 433 }
361} 434}
362 435
436static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
437{
438 struct ucma_event *uevent, *tmp;
439
440 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
441 if (uevent->mc != mc)
442 continue;
443
444 list_del(&uevent->list);
445 kfree(uevent);
446 }
447}
448
363static int ucma_free_ctx(struct ucma_context *ctx) 449static int ucma_free_ctx(struct ucma_context *ctx)
364{ 450{
365 int events_reported; 451 int events_reported;
@@ -367,6 +453,8 @@ static int ucma_free_ctx(struct ucma_context *ctx)
367 /* No new events will be generated after destroying the id. */ 453 /* No new events will be generated after destroying the id. */
368 rdma_destroy_id(ctx->cm_id); 454 rdma_destroy_id(ctx->cm_id);
369 455
456 ucma_cleanup_multicast(ctx);
457
370 /* Cleanup events not yet reported to the user. */ 458 /* Cleanup events not yet reported to the user. */
371 mutex_lock(&ctx->file->mut); 459 mutex_lock(&ctx->file->mut);
372 ucma_cleanup_events(ctx); 460 ucma_cleanup_events(ctx);
@@ -731,6 +819,114 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
731 return ret; 819 return ret;
732} 820}
733 821
822static ssize_t ucma_join_multicast(struct ucma_file *file,
823 const char __user *inbuf,
824 int in_len, int out_len)
825{
826 struct rdma_ucm_join_mcast cmd;
827 struct rdma_ucm_create_id_resp resp;
828 struct ucma_context *ctx;
829 struct ucma_multicast *mc;
830 int ret;
831
832 if (out_len < sizeof(resp))
833 return -ENOSPC;
834
835 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
836 return -EFAULT;
837
838 ctx = ucma_get_ctx(file, cmd.id);
839 if (IS_ERR(ctx))
840 return PTR_ERR(ctx);
841
842 mutex_lock(&file->mut);
843 mc = ucma_alloc_multicast(ctx);
844 if (IS_ERR(mc)) {
845 ret = PTR_ERR(mc);
846 goto err1;
847 }
848
849 mc->uid = cmd.uid;
850 memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
851 ret = rdma_join_multicast(ctx->cm_id, &mc->addr, mc);
852 if (ret)
853 goto err2;
854
855 resp.id = mc->id;
856 if (copy_to_user((void __user *)(unsigned long)cmd.response,
857 &resp, sizeof(resp))) {
858 ret = -EFAULT;
859 goto err3;
860 }
861
862 mutex_unlock(&file->mut);
863 ucma_put_ctx(ctx);
864 return 0;
865
866err3:
867 rdma_leave_multicast(ctx->cm_id, &mc->addr);
868 ucma_cleanup_mc_events(mc);
869err2:
870 mutex_lock(&mut);
871 idr_remove(&multicast_idr, mc->id);
872 mutex_unlock(&mut);
873 list_del(&mc->list);
874 kfree(mc);
875err1:
876 mutex_unlock(&file->mut);
877 ucma_put_ctx(ctx);
878 return ret;
879}
880
881static ssize_t ucma_leave_multicast(struct ucma_file *file,
882 const char __user *inbuf,
883 int in_len, int out_len)
884{
885 struct rdma_ucm_destroy_id cmd;
886 struct rdma_ucm_destroy_id_resp resp;
887 struct ucma_multicast *mc;
888 int ret = 0;
889
890 if (out_len < sizeof(resp))
891 return -ENOSPC;
892
893 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
894 return -EFAULT;
895
896 mutex_lock(&mut);
897 mc = idr_find(&multicast_idr, cmd.id);
898 if (!mc)
899 mc = ERR_PTR(-ENOENT);
900 else if (mc->ctx->file != file)
901 mc = ERR_PTR(-EINVAL);
902 else {
903 idr_remove(&multicast_idr, mc->id);
904 atomic_inc(&mc->ctx->ref);
905 }
906 mutex_unlock(&mut);
907
908 if (IS_ERR(mc)) {
909 ret = PTR_ERR(mc);
910 goto out;
911 }
912
913 rdma_leave_multicast(mc->ctx->cm_id, &mc->addr);
914 mutex_lock(&mc->ctx->file->mut);
915 ucma_cleanup_mc_events(mc);
916 list_del(&mc->list);
917 mutex_unlock(&mc->ctx->file->mut);
918
919 ucma_put_ctx(mc->ctx);
920 resp.events_reported = mc->events_reported;
921 kfree(mc);
922
923 if (copy_to_user((void __user *)(unsigned long)cmd.response,
924 &resp, sizeof(resp)))
925 ret = -EFAULT;
926out:
927 return ret;
928}
929
734static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 930static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
735 const char __user *inbuf, 931 const char __user *inbuf,
736 int in_len, int out_len) = { 932 int in_len, int out_len) = {
@@ -750,6 +946,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
750 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 946 [RDMA_USER_CM_CMD_GET_OPTION] = NULL,
751 [RDMA_USER_CM_CMD_SET_OPTION] = NULL, 947 [RDMA_USER_CM_CMD_SET_OPTION] = NULL,
752 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 948 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
949 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
950 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
753}; 951};
754 952
755static ssize_t ucma_write(struct file *filp, const char __user *buf, 953static ssize_t ucma_write(struct file *filp, const char __user *buf,