aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/cma.c359
-rw-r--r--drivers/infiniband/core/fmr_pool.c4
-rw-r--r--drivers/infiniband/core/iwcm.c47
-rw-r--r--drivers/infiniband/core/multicast.c837
-rw-r--r--drivers/infiniband/core/sa.h66
-rw-r--r--drivers/infiniband/core/sa_query.c30
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucma.c204
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_user.h1
-rw-r--r--drivers/infiniband/hw/ehca/Kconfig8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h19
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c307
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c32
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c195
41 files changed, 1784 insertions, 384 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 50fb1cd447b7..189e5d4b9b17 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
12 12
13ib_mad-y := mad.o smi.o agent.o mad_rmpp.o 13ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
14 14
15ib_sa-y := sa_query.o 15ib_sa-y := sa_query.o multicast.o
16 16
17ib_cm-y := cm.o 17ib_cm-y := cm.o
18 18
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index db88e609bf42..f8d69b3fa307 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -71,6 +71,7 @@ static struct workqueue_struct *cma_wq;
71static DEFINE_IDR(sdp_ps); 71static DEFINE_IDR(sdp_ps);
72static DEFINE_IDR(tcp_ps); 72static DEFINE_IDR(tcp_ps);
73static DEFINE_IDR(udp_ps); 73static DEFINE_IDR(udp_ps);
74static DEFINE_IDR(ipoib_ps);
74static int next_port; 75static int next_port;
75 76
76struct cma_device { 77struct cma_device {
@@ -116,6 +117,7 @@ struct rdma_id_private {
116 struct list_head list; 117 struct list_head list;
117 struct list_head listen_list; 118 struct list_head listen_list;
118 struct cma_device *cma_dev; 119 struct cma_device *cma_dev;
120 struct list_head mc_list;
119 121
120 enum cma_state state; 122 enum cma_state state;
121 spinlock_t lock; 123 spinlock_t lock;
@@ -134,10 +136,23 @@ struct rdma_id_private {
134 } cm_id; 136 } cm_id;
135 137
136 u32 seq_num; 138 u32 seq_num;
139 u32 qkey;
137 u32 qp_num; 140 u32 qp_num;
138 u8 srq; 141 u8 srq;
139}; 142};
140 143
144struct cma_multicast {
145 struct rdma_id_private *id_priv;
146 union {
147 struct ib_sa_multicast *ib;
148 } multicast;
149 struct list_head list;
150 void *context;
151 struct sockaddr addr;
152 u8 pad[sizeof(struct sockaddr_in6) -
153 sizeof(struct sockaddr)];
154};
155
141struct cma_work { 156struct cma_work {
142 struct work_struct work; 157 struct work_struct work;
143 struct rdma_id_private *id; 158 struct rdma_id_private *id;
@@ -243,6 +258,11 @@ static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
243 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); 258 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
244} 259}
245 260
261static inline int cma_is_ud_ps(enum rdma_port_space ps)
262{
263 return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
264}
265
246static void cma_attach_to_dev(struct rdma_id_private *id_priv, 266static void cma_attach_to_dev(struct rdma_id_private *id_priv,
247 struct cma_device *cma_dev) 267 struct cma_device *cma_dev)
248{ 268{
@@ -265,19 +285,41 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
265 id_priv->cma_dev = NULL; 285 id_priv->cma_dev = NULL;
266} 286}
267 287
288static int cma_set_qkey(struct ib_device *device, u8 port_num,
289 enum rdma_port_space ps,
290 struct rdma_dev_addr *dev_addr, u32 *qkey)
291{
292 struct ib_sa_mcmember_rec rec;
293 int ret = 0;
294
295 switch (ps) {
296 case RDMA_PS_UDP:
297 *qkey = RDMA_UDP_QKEY;
298 break;
299 case RDMA_PS_IPOIB:
300 ib_addr_get_mgid(dev_addr, &rec.mgid);
301 ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
302 *qkey = be32_to_cpu(rec.qkey);
303 break;
304 default:
305 break;
306 }
307 return ret;
308}
309
268static int cma_acquire_dev(struct rdma_id_private *id_priv) 310static int cma_acquire_dev(struct rdma_id_private *id_priv)
269{ 311{
270 enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type; 312 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
271 struct cma_device *cma_dev; 313 struct cma_device *cma_dev;
272 union ib_gid gid; 314 union ib_gid gid;
273 int ret = -ENODEV; 315 int ret = -ENODEV;
274 316
275 switch (rdma_node_get_transport(dev_type)) { 317 switch (rdma_node_get_transport(dev_addr->dev_type)) {
276 case RDMA_TRANSPORT_IB: 318 case RDMA_TRANSPORT_IB:
277 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 319 ib_addr_get_sgid(dev_addr, &gid);
278 break; 320 break;
279 case RDMA_TRANSPORT_IWARP: 321 case RDMA_TRANSPORT_IWARP:
280 iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 322 iw_addr_get_sgid(dev_addr, &gid);
281 break; 323 break;
282 default: 324 default:
283 return -ENODEV; 325 return -ENODEV;
@@ -287,7 +329,12 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
287 ret = ib_find_cached_gid(cma_dev->device, &gid, 329 ret = ib_find_cached_gid(cma_dev->device, &gid,
288 &id_priv->id.port_num, NULL); 330 &id_priv->id.port_num, NULL);
289 if (!ret) { 331 if (!ret) {
290 cma_attach_to_dev(id_priv, cma_dev); 332 ret = cma_set_qkey(cma_dev->device,
333 id_priv->id.port_num,
334 id_priv->id.ps, dev_addr,
335 &id_priv->qkey);
336 if (!ret)
337 cma_attach_to_dev(id_priv, cma_dev);
291 break; 338 break;
292 } 339 }
293 } 340 }
@@ -325,40 +372,50 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
325 init_waitqueue_head(&id_priv->wait_remove); 372 init_waitqueue_head(&id_priv->wait_remove);
326 atomic_set(&id_priv->dev_remove, 0); 373 atomic_set(&id_priv->dev_remove, 0);
327 INIT_LIST_HEAD(&id_priv->listen_list); 374 INIT_LIST_HEAD(&id_priv->listen_list);
375 INIT_LIST_HEAD(&id_priv->mc_list);
328 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 376 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
329 377
330 return &id_priv->id; 378 return &id_priv->id;
331} 379}
332EXPORT_SYMBOL(rdma_create_id); 380EXPORT_SYMBOL(rdma_create_id);
333 381
334static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 382static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
335{ 383{
336 struct ib_qp_attr qp_attr; 384 struct ib_qp_attr qp_attr;
337 struct rdma_dev_addr *dev_addr; 385 int qp_attr_mask, ret;
338 int ret;
339 386
340 dev_addr = &id_priv->id.route.addr.dev_addr; 387 qp_attr.qp_state = IB_QPS_INIT;
341 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 388 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
342 ib_addr_get_pkey(dev_addr),
343 &qp_attr.pkey_index);
344 if (ret) 389 if (ret)
345 return ret; 390 return ret;
346 391
347 qp_attr.qp_state = IB_QPS_INIT; 392 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
348 qp_attr.qp_access_flags = 0; 393 if (ret)
349 qp_attr.port_num = id_priv->id.port_num; 394 return ret;
350 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS | 395
351 IB_QP_PKEY_INDEX | IB_QP_PORT); 396 qp_attr.qp_state = IB_QPS_RTR;
397 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
398 if (ret)
399 return ret;
400
401 qp_attr.qp_state = IB_QPS_RTS;
402 qp_attr.sq_psn = 0;
403 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
404
405 return ret;
352} 406}
353 407
354static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 408static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
355{ 409{
356 struct ib_qp_attr qp_attr; 410 struct ib_qp_attr qp_attr;
411 int qp_attr_mask, ret;
357 412
358 qp_attr.qp_state = IB_QPS_INIT; 413 qp_attr.qp_state = IB_QPS_INIT;
359 qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; 414 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
415 if (ret)
416 return ret;
360 417
361 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS); 418 return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
362} 419}
363 420
364int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 421int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
@@ -376,18 +433,10 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
376 if (IS_ERR(qp)) 433 if (IS_ERR(qp))
377 return PTR_ERR(qp); 434 return PTR_ERR(qp);
378 435
379 switch (rdma_node_get_transport(id->device->node_type)) { 436 if (cma_is_ud_ps(id_priv->id.ps))
380 case RDMA_TRANSPORT_IB: 437 ret = cma_init_ud_qp(id_priv, qp);
381 ret = cma_init_ib_qp(id_priv, qp); 438 else
382 break; 439 ret = cma_init_conn_qp(id_priv, qp);
383 case RDMA_TRANSPORT_IWARP:
384 ret = cma_init_iw_qp(id_priv, qp);
385 break;
386 default:
387 ret = -ENOSYS;
388 break;
389 }
390
391 if (ret) 440 if (ret)
392 goto err; 441 goto err;
393 442
@@ -460,23 +509,55 @@ static int cma_modify_qp_err(struct rdma_cm_id *id)
460 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); 509 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
461} 510}
462 511
512static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
513 struct ib_qp_attr *qp_attr, int *qp_attr_mask)
514{
515 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
516 int ret;
517
518 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
519 ib_addr_get_pkey(dev_addr),
520 &qp_attr->pkey_index);
521 if (ret)
522 return ret;
523
524 qp_attr->port_num = id_priv->id.port_num;
525 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
526
527 if (cma_is_ud_ps(id_priv->id.ps)) {
528 qp_attr->qkey = id_priv->qkey;
529 *qp_attr_mask |= IB_QP_QKEY;
530 } else {
531 qp_attr->qp_access_flags = 0;
532 *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
533 }
534 return 0;
535}
536
463int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 537int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
464 int *qp_attr_mask) 538 int *qp_attr_mask)
465{ 539{
466 struct rdma_id_private *id_priv; 540 struct rdma_id_private *id_priv;
467 int ret; 541 int ret = 0;
468 542
469 id_priv = container_of(id, struct rdma_id_private, id); 543 id_priv = container_of(id, struct rdma_id_private, id);
470 switch (rdma_node_get_transport(id_priv->id.device->node_type)) { 544 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
471 case RDMA_TRANSPORT_IB: 545 case RDMA_TRANSPORT_IB:
472 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 546 if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
473 qp_attr_mask); 547 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
548 else
549 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
550 qp_attr_mask);
474 if (qp_attr->qp_state == IB_QPS_RTR) 551 if (qp_attr->qp_state == IB_QPS_RTR)
475 qp_attr->rq_psn = id_priv->seq_num; 552 qp_attr->rq_psn = id_priv->seq_num;
476 break; 553 break;
477 case RDMA_TRANSPORT_IWARP: 554 case RDMA_TRANSPORT_IWARP:
478 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 555 if (!id_priv->cm_id.iw) {
479 qp_attr_mask); 556 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
557 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
558 } else
559 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
560 qp_attr_mask);
480 break; 561 break;
481 default: 562 default:
482 ret = -ENOSYS; 563 ret = -ENOSYS;
@@ -698,6 +779,19 @@ static void cma_release_port(struct rdma_id_private *id_priv)
698 mutex_unlock(&lock); 779 mutex_unlock(&lock);
699} 780}
700 781
782static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
783{
784 struct cma_multicast *mc;
785
786 while (!list_empty(&id_priv->mc_list)) {
787 mc = container_of(id_priv->mc_list.next,
788 struct cma_multicast, list);
789 list_del(&mc->list);
790 ib_sa_free_multicast(mc->multicast.ib);
791 kfree(mc);
792 }
793}
794
701void rdma_destroy_id(struct rdma_cm_id *id) 795void rdma_destroy_id(struct rdma_cm_id *id)
702{ 796{
703 struct rdma_id_private *id_priv; 797 struct rdma_id_private *id_priv;
@@ -722,6 +816,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
722 default: 816 default:
723 break; 817 break;
724 } 818 }
819 cma_leave_mc_groups(id_priv);
725 mutex_lock(&lock); 820 mutex_lock(&lock);
726 cma_detach_from_dev(id_priv); 821 cma_detach_from_dev(id_priv);
727 } 822 }
@@ -972,7 +1067,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
972 memset(&event, 0, sizeof event); 1067 memset(&event, 0, sizeof event);
973 offset = cma_user_data_offset(listen_id->id.ps); 1068 offset = cma_user_data_offset(listen_id->id.ps);
974 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1069 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
975 if (listen_id->id.ps == RDMA_PS_UDP) { 1070 if (cma_is_ud_ps(listen_id->id.ps)) {
976 conn_id = cma_new_udp_id(&listen_id->id, ib_event); 1071 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
977 event.param.ud.private_data = ib_event->private_data + offset; 1072 event.param.ud.private_data = ib_event->private_data + offset;
978 event.param.ud.private_data_len = 1073 event.param.ud.private_data_len =
@@ -1725,7 +1820,7 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1725 struct rdma_bind_list *bind_list; 1820 struct rdma_bind_list *bind_list;
1726 int port, ret; 1821 int port, ret;
1727 1822
1728 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 1823 bind_list = kmalloc(sizeof *bind_list, GFP_KERNEL);
1729 if (!bind_list) 1824 if (!bind_list)
1730 return -ENOMEM; 1825 return -ENOMEM;
1731 1826
@@ -1847,6 +1942,9 @@ static int cma_get_port(struct rdma_id_private *id_priv)
1847 case RDMA_PS_UDP: 1942 case RDMA_PS_UDP:
1848 ps = &udp_ps; 1943 ps = &udp_ps;
1849 break; 1944 break;
1945 case RDMA_PS_IPOIB:
1946 ps = &ipoib_ps;
1947 break;
1850 default: 1948 default:
1851 return -EPROTONOSUPPORT; 1949 return -EPROTONOSUPPORT;
1852 } 1950 }
@@ -1961,7 +2059,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
1961 event.status = ib_event->param.sidr_rep_rcvd.status; 2059 event.status = ib_event->param.sidr_rep_rcvd.status;
1962 break; 2060 break;
1963 } 2061 }
1964 if (rep->qkey != RDMA_UD_QKEY) { 2062 if (id_priv->qkey != rep->qkey) {
1965 event.event = RDMA_CM_EVENT_UNREACHABLE; 2063 event.event = RDMA_CM_EVENT_UNREACHABLE;
1966 event.status = -EINVAL; 2064 event.status = -EINVAL;
1967 break; 2065 break;
@@ -2160,7 +2258,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2160 2258
2161 switch (rdma_node_get_transport(id->device->node_type)) { 2259 switch (rdma_node_get_transport(id->device->node_type)) {
2162 case RDMA_TRANSPORT_IB: 2260 case RDMA_TRANSPORT_IB:
2163 if (id->ps == RDMA_PS_UDP) 2261 if (cma_is_ud_ps(id->ps))
2164 ret = cma_resolve_ib_udp(id_priv, conn_param); 2262 ret = cma_resolve_ib_udp(id_priv, conn_param);
2165 else 2263 else
2166 ret = cma_connect_ib(id_priv, conn_param); 2264 ret = cma_connect_ib(id_priv, conn_param);
@@ -2256,7 +2354,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2256 rep.status = status; 2354 rep.status = status;
2257 if (status == IB_SIDR_SUCCESS) { 2355 if (status == IB_SIDR_SUCCESS) {
2258 rep.qp_num = id_priv->qp_num; 2356 rep.qp_num = id_priv->qp_num;
2259 rep.qkey = RDMA_UD_QKEY; 2357 rep.qkey = id_priv->qkey;
2260 } 2358 }
2261 rep.private_data = private_data; 2359 rep.private_data = private_data;
2262 rep.private_data_len = private_data_len; 2360 rep.private_data_len = private_data_len;
@@ -2280,7 +2378,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2280 2378
2281 switch (rdma_node_get_transport(id->device->node_type)) { 2379 switch (rdma_node_get_transport(id->device->node_type)) {
2282 case RDMA_TRANSPORT_IB: 2380 case RDMA_TRANSPORT_IB:
2283 if (id->ps == RDMA_PS_UDP) 2381 if (cma_is_ud_ps(id->ps))
2284 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 2382 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2285 conn_param->private_data, 2383 conn_param->private_data,
2286 conn_param->private_data_len); 2384 conn_param->private_data_len);
@@ -2341,7 +2439,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2341 2439
2342 switch (rdma_node_get_transport(id->device->node_type)) { 2440 switch (rdma_node_get_transport(id->device->node_type)) {
2343 case RDMA_TRANSPORT_IB: 2441 case RDMA_TRANSPORT_IB:
2344 if (id->ps == RDMA_PS_UDP) 2442 if (cma_is_ud_ps(id->ps))
2345 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 2443 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2346 private_data, private_data_len); 2444 private_data, private_data_len);
2347 else 2445 else
@@ -2392,6 +2490,178 @@ out:
2392} 2490}
2393EXPORT_SYMBOL(rdma_disconnect); 2491EXPORT_SYMBOL(rdma_disconnect);
2394 2492
2493static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2494{
2495 struct rdma_id_private *id_priv;
2496 struct cma_multicast *mc = multicast->context;
2497 struct rdma_cm_event event;
2498 int ret;
2499
2500 id_priv = mc->id_priv;
2501 atomic_inc(&id_priv->dev_remove);
2502 if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2503 !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2504 goto out;
2505
2506 if (!status && id_priv->id.qp)
2507 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2508 multicast->rec.mlid);
2509
2510 memset(&event, 0, sizeof event);
2511 event.status = status;
2512 event.param.ud.private_data = mc->context;
2513 if (!status) {
2514 event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2515 ib_init_ah_from_mcmember(id_priv->id.device,
2516 id_priv->id.port_num, &multicast->rec,
2517 &event.param.ud.ah_attr);
2518 event.param.ud.qp_num = 0xFFFFFF;
2519 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2520 } else
2521 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2522
2523 ret = id_priv->id.event_handler(&id_priv->id, &event);
2524 if (ret) {
2525 cma_exch(id_priv, CMA_DESTROYING);
2526 cma_release_remove(id_priv);
2527 rdma_destroy_id(&id_priv->id);
2528 return 0;
2529 }
2530out:
2531 cma_release_remove(id_priv);
2532 return 0;
2533}
2534
2535static void cma_set_mgid(struct rdma_id_private *id_priv,
2536 struct sockaddr *addr, union ib_gid *mgid)
2537{
2538 unsigned char mc_map[MAX_ADDR_LEN];
2539 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2540 struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2541 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2542
2543 if (cma_any_addr(addr)) {
2544 memset(mgid, 0, sizeof *mgid);
2545 } else if ((addr->sa_family == AF_INET6) &&
2546 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
2547 0xFF10A01B)) {
2548 /* IPv6 address is an SA assigned MGID. */
2549 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2550 } else {
2551 ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
2552 if (id_priv->id.ps == RDMA_PS_UDP)
2553 mc_map[7] = 0x01; /* Use RDMA CM signature */
2554 mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
2555 mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
2556 *mgid = *(union ib_gid *) (mc_map + 4);
2557 }
2558}
2559
2560static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2561 struct cma_multicast *mc)
2562{
2563 struct ib_sa_mcmember_rec rec;
2564 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2565 ib_sa_comp_mask comp_mask;
2566 int ret;
2567
2568 ib_addr_get_mgid(dev_addr, &rec.mgid);
2569 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
2570 &rec.mgid, &rec);
2571 if (ret)
2572 return ret;
2573
2574 cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
2575 if (id_priv->id.ps == RDMA_PS_UDP)
2576 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2577 ib_addr_get_sgid(dev_addr, &rec.port_gid);
2578 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2579 rec.join_state = 1;
2580
2581 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
2582 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
2583 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
2584 IB_SA_MCMEMBER_REC_FLOW_LABEL |
2585 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
2586
2587 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
2588 id_priv->id.port_num, &rec,
2589 comp_mask, GFP_KERNEL,
2590 cma_ib_mc_handler, mc);
2591 if (IS_ERR(mc->multicast.ib))
2592 return PTR_ERR(mc->multicast.ib);
2593
2594 return 0;
2595}
2596
2597int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2598 void *context)
2599{
2600 struct rdma_id_private *id_priv;
2601 struct cma_multicast *mc;
2602 int ret;
2603
2604 id_priv = container_of(id, struct rdma_id_private, id);
2605 if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2606 !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2607 return -EINVAL;
2608
2609 mc = kmalloc(sizeof *mc, GFP_KERNEL);
2610 if (!mc)
2611 return -ENOMEM;
2612
2613 memcpy(&mc->addr, addr, ip_addr_size(addr));
2614 mc->context = context;
2615 mc->id_priv = id_priv;
2616
2617 spin_lock(&id_priv->lock);
2618 list_add(&mc->list, &id_priv->mc_list);
2619 spin_unlock(&id_priv->lock);
2620
2621 switch (rdma_node_get_transport(id->device->node_type)) {
2622 case RDMA_TRANSPORT_IB:
2623 ret = cma_join_ib_multicast(id_priv, mc);
2624 break;
2625 default:
2626 ret = -ENOSYS;
2627 break;
2628 }
2629
2630 if (ret) {
2631 spin_lock_irq(&id_priv->lock);
2632 list_del(&mc->list);
2633 spin_unlock_irq(&id_priv->lock);
2634 kfree(mc);
2635 }
2636 return ret;
2637}
2638EXPORT_SYMBOL(rdma_join_multicast);
2639
2640void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2641{
2642 struct rdma_id_private *id_priv;
2643 struct cma_multicast *mc;
2644
2645 id_priv = container_of(id, struct rdma_id_private, id);
2646 spin_lock_irq(&id_priv->lock);
2647 list_for_each_entry(mc, &id_priv->mc_list, list) {
2648 if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
2649 list_del(&mc->list);
2650 spin_unlock_irq(&id_priv->lock);
2651
2652 if (id->qp)
2653 ib_detach_mcast(id->qp,
2654 &mc->multicast.ib->rec.mgid,
2655 mc->multicast.ib->rec.mlid);
2656 ib_sa_free_multicast(mc->multicast.ib);
2657 kfree(mc);
2658 return;
2659 }
2660 }
2661 spin_unlock_irq(&id_priv->lock);
2662}
2663EXPORT_SYMBOL(rdma_leave_multicast);
2664
2395static void cma_add_one(struct ib_device *device) 2665static void cma_add_one(struct ib_device *device)
2396{ 2666{
2397 struct cma_device *cma_dev; 2667 struct cma_device *cma_dev;
@@ -2522,6 +2792,7 @@ static void cma_cleanup(void)
2522 idr_destroy(&sdp_ps); 2792 idr_destroy(&sdp_ps);
2523 idr_destroy(&tcp_ps); 2793 idr_destroy(&tcp_ps);
2524 idr_destroy(&udp_ps); 2794 idr_destroy(&udp_ps);
2795 idr_destroy(&ipoib_ps);
2525} 2796}
2526 2797
2527module_init(cma_init); 2798module_init(cma_init);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 8926a2bd4a87..1d796e7c8199 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -301,7 +301,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
301 301
302 { 302 {
303 struct ib_pool_fmr *fmr; 303 struct ib_pool_fmr *fmr;
304 struct ib_fmr_attr attr = { 304 struct ib_fmr_attr fmr_attr = {
305 .max_pages = params->max_pages_per_fmr, 305 .max_pages = params->max_pages_per_fmr,
306 .max_maps = pool->max_remaps, 306 .max_maps = pool->max_remaps,
307 .page_shift = params->page_shift 307 .page_shift = params->page_shift
@@ -321,7 +321,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
321 fmr->ref_count = 0; 321 fmr->ref_count = 0;
322 INIT_HLIST_NODE(&fmr->cache_node); 322 INIT_HLIST_NODE(&fmr->cache_node);
323 323
324 fmr->fmr = ib_alloc_fmr(pd, params->access, &attr); 324 fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
325 if (IS_ERR(fmr->fmr)) { 325 if (IS_ERR(fmr->fmr)) {
326 printk(KERN_WARNING "fmr_create failed for FMR %d", i); 326 printk(KERN_WARNING "fmr_create failed for FMR %d", i);
327 kfree(fmr); 327 kfree(fmr);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 1039ad57d53b..891d1fa7b2eb 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -146,6 +146,12 @@ static int copy_private_data(struct iw_cm_event *event)
146 return 0; 146 return 0;
147} 147}
148 148
149static void free_cm_id(struct iwcm_id_private *cm_id_priv)
150{
151 dealloc_work_entries(cm_id_priv);
152 kfree(cm_id_priv);
153}
154
149/* 155/*
150 * Release a reference on cm_id. If the last reference is being 156 * Release a reference on cm_id. If the last reference is being
151 * released, enable the waiting thread (in iw_destroy_cm_id) to 157 * released, enable the waiting thread (in iw_destroy_cm_id) to
@@ -153,21 +159,14 @@ static int copy_private_data(struct iw_cm_event *event)
153 */ 159 */
154static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 160static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
155{ 161{
156 int ret = 0;
157
158 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 162 BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
159 if (atomic_dec_and_test(&cm_id_priv->refcount)) { 163 if (atomic_dec_and_test(&cm_id_priv->refcount)) {
160 BUG_ON(!list_empty(&cm_id_priv->work_list)); 164 BUG_ON(!list_empty(&cm_id_priv->work_list));
161 if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
162 BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
163 BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
164 &cm_id_priv->flags));
165 ret = 1;
166 }
167 complete(&cm_id_priv->destroy_comp); 165 complete(&cm_id_priv->destroy_comp);
166 return 1;
168 } 167 }
169 168
170 return ret; 169 return 0;
171} 170}
172 171
173static void add_ref(struct iw_cm_id *cm_id) 172static void add_ref(struct iw_cm_id *cm_id)
@@ -181,7 +180,11 @@ static void rem_ref(struct iw_cm_id *cm_id)
181{ 180{
182 struct iwcm_id_private *cm_id_priv; 181 struct iwcm_id_private *cm_id_priv;
183 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 182 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
184 iwcm_deref_id(cm_id_priv); 183 if (iwcm_deref_id(cm_id_priv) &&
184 test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
185 BUG_ON(!list_empty(&cm_id_priv->work_list));
186 free_cm_id(cm_id_priv);
187 }
185} 188}
186 189
187static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 190static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
@@ -355,7 +358,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
355 case IW_CM_STATE_CONN_RECV: 358 case IW_CM_STATE_CONN_RECV:
356 /* 359 /*
357 * App called destroy before/without calling accept after 360 * App called destroy before/without calling accept after
358 * receiving connection request event notification. 361 * receiving connection request event notification or
362 * returned non zero from the event callback function.
363 * In either case, must tell the provider to reject.
359 */ 364 */
360 cm_id_priv->state = IW_CM_STATE_DESTROYING; 365 cm_id_priv->state = IW_CM_STATE_DESTROYING;
361 break; 366 break;
@@ -391,9 +396,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
391 396
392 wait_for_completion(&cm_id_priv->destroy_comp); 397 wait_for_completion(&cm_id_priv->destroy_comp);
393 398
394 dealloc_work_entries(cm_id_priv); 399 free_cm_id(cm_id_priv);
395
396 kfree(cm_id_priv);
397} 400}
398EXPORT_SYMBOL(iw_destroy_cm_id); 401EXPORT_SYMBOL(iw_destroy_cm_id);
399 402
@@ -647,10 +650,11 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
647 /* Call the client CM handler */ 650 /* Call the client CM handler */
648 ret = cm_id->cm_handler(cm_id, iw_event); 651 ret = cm_id->cm_handler(cm_id, iw_event);
649 if (ret) { 652 if (ret) {
653 iw_cm_reject(cm_id, NULL, 0);
650 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 654 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
651 destroy_cm_id(cm_id); 655 destroy_cm_id(cm_id);
652 if (atomic_read(&cm_id_priv->refcount)==0) 656 if (atomic_read(&cm_id_priv->refcount)==0)
653 kfree(cm_id); 657 free_cm_id(cm_id_priv);
654 } 658 }
655 659
656out: 660out:
@@ -854,13 +858,12 @@ static void cm_work_handler(struct work_struct *_work)
854 destroy_cm_id(&cm_id_priv->id); 858 destroy_cm_id(&cm_id_priv->id);
855 } 859 }
856 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 860 BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
857 if (iwcm_deref_id(cm_id_priv)) 861 if (iwcm_deref_id(cm_id_priv)) {
858 return; 862 if (test_bit(IWCM_F_CALLBACK_DESTROY,
859 863 &cm_id_priv->flags)) {
860 if (atomic_read(&cm_id_priv->refcount)==0 && 864 BUG_ON(!list_empty(&cm_id_priv->work_list));
861 test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { 865 free_cm_id(cm_id_priv);
862 dealloc_work_entries(cm_id_priv); 866 }
863 kfree(cm_id_priv);
864 return; 867 return;
865 } 868 }
866 spin_lock_irqsave(&cm_id_priv->lock, flags); 869 spin_lock_irqsave(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
new file mode 100644
index 000000000000..4a579b3a1c90
--- /dev/null
+++ b/drivers/infiniband/core/multicast.c
@@ -0,0 +1,837 @@
1/*
2 * Copyright (c) 2006 Intel Corporation.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/completion.h>
34#include <linux/dma-mapping.h>
35#include <linux/err.h>
36#include <linux/interrupt.h>
37#include <linux/pci.h>
38#include <linux/bitops.h>
39#include <linux/random.h>
40
41#include <rdma/ib_cache.h>
42#include "sa.h"
43
44static void mcast_add_one(struct ib_device *device);
45static void mcast_remove_one(struct ib_device *device);
46
47static struct ib_client mcast_client = {
48 .name = "ib_multicast",
49 .add = mcast_add_one,
50 .remove = mcast_remove_one
51};
52
53static struct ib_sa_client sa_client;
54static struct workqueue_struct *mcast_wq;
55static union ib_gid mgid0;
56
57struct mcast_device;
58
59struct mcast_port {
60 struct mcast_device *dev;
61 spinlock_t lock;
62 struct rb_root table;
63 atomic_t refcount;
64 struct completion comp;
65 u8 port_num;
66};
67
68struct mcast_device {
69 struct ib_device *device;
70 struct ib_event_handler event_handler;
71 int start_port;
72 int end_port;
73 struct mcast_port port[0];
74};
75
76enum mcast_state {
77 MCAST_IDLE,
78 MCAST_JOINING,
79 MCAST_MEMBER,
80 MCAST_BUSY,
81 MCAST_ERROR
82};
83
84struct mcast_member;
85
86struct mcast_group {
87 struct ib_sa_mcmember_rec rec;
88 struct rb_node node;
89 struct mcast_port *port;
90 spinlock_t lock;
91 struct work_struct work;
92 struct list_head pending_list;
93 struct list_head active_list;
94 struct mcast_member *last_join;
95 int members[3];
96 atomic_t refcount;
97 enum mcast_state state;
98 struct ib_sa_query *query;
99 int query_id;
100};
101
102struct mcast_member {
103 struct ib_sa_multicast multicast;
104 struct ib_sa_client *client;
105 struct mcast_group *group;
106 struct list_head list;
107 enum mcast_state state;
108 atomic_t refcount;
109 struct completion comp;
110};
111
112static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
113 void *context);
114static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
115 void *context);
116
117static struct mcast_group *mcast_find(struct mcast_port *port,
118 union ib_gid *mgid)
119{
120 struct rb_node *node = port->table.rb_node;
121 struct mcast_group *group;
122 int ret;
123
124 while (node) {
125 group = rb_entry(node, struct mcast_group, node);
126 ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
127 if (!ret)
128 return group;
129
130 if (ret < 0)
131 node = node->rb_left;
132 else
133 node = node->rb_right;
134 }
135 return NULL;
136}
137
138static struct mcast_group *mcast_insert(struct mcast_port *port,
139 struct mcast_group *group,
140 int allow_duplicates)
141{
142 struct rb_node **link = &port->table.rb_node;
143 struct rb_node *parent = NULL;
144 struct mcast_group *cur_group;
145 int ret;
146
147 while (*link) {
148 parent = *link;
149 cur_group = rb_entry(parent, struct mcast_group, node);
150
151 ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
152 sizeof group->rec.mgid);
153 if (ret < 0)
154 link = &(*link)->rb_left;
155 else if (ret > 0)
156 link = &(*link)->rb_right;
157 else if (allow_duplicates)
158 link = &(*link)->rb_left;
159 else
160 return cur_group;
161 }
162 rb_link_node(&group->node, parent, link);
163 rb_insert_color(&group->node, &port->table);
164 return NULL;
165}
166
167static void deref_port(struct mcast_port *port)
168{
169 if (atomic_dec_and_test(&port->refcount))
170 complete(&port->comp);
171}
172
173static void release_group(struct mcast_group *group)
174{
175 struct mcast_port *port = group->port;
176 unsigned long flags;
177
178 spin_lock_irqsave(&port->lock, flags);
179 if (atomic_dec_and_test(&group->refcount)) {
180 rb_erase(&group->node, &port->table);
181 spin_unlock_irqrestore(&port->lock, flags);
182 kfree(group);
183 deref_port(port);
184 } else
185 spin_unlock_irqrestore(&port->lock, flags);
186}
187
188static void deref_member(struct mcast_member *member)
189{
190 if (atomic_dec_and_test(&member->refcount))
191 complete(&member->comp);
192}
193
194static void queue_join(struct mcast_member *member)
195{
196 struct mcast_group *group = member->group;
197 unsigned long flags;
198
199 spin_lock_irqsave(&group->lock, flags);
200 list_add(&member->list, &group->pending_list);
201 if (group->state == MCAST_IDLE) {
202 group->state = MCAST_BUSY;
203 atomic_inc(&group->refcount);
204 queue_work(mcast_wq, &group->work);
205 }
206 spin_unlock_irqrestore(&group->lock, flags);
207}
208
209/*
210 * A multicast group has three types of members: full member, non member, and
211 * send only member. We need to keep track of the number of members of each
212 * type based on their join state. Adjust the number of members the belong to
213 * the specified join states.
214 */
215static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
216{
217 int i;
218
219 for (i = 0; i < 3; i++, join_state >>= 1)
220 if (join_state & 0x1)
221 group->members[i] += inc;
222}
223
224/*
225 * If a multicast group has zero members left for a particular join state, but
226 * the group is still a member with the SA, we need to leave that join state.
227 * Determine which join states we still belong to, but that do not have any
228 * active members.
229 */
230static u8 get_leave_state(struct mcast_group *group)
231{
232 u8 leave_state = 0;
233 int i;
234
235 for (i = 0; i < 3; i++)
236 if (!group->members[i])
237 leave_state |= (0x1 << i);
238
239 return leave_state & group->rec.join_state;
240}
241
242static int check_selector(ib_sa_comp_mask comp_mask,
243 ib_sa_comp_mask selector_mask,
244 ib_sa_comp_mask value_mask,
245 u8 selector, u8 src_value, u8 dst_value)
246{
247 int err;
248
249 if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
250 return 0;
251
252 switch (selector) {
253 case IB_SA_GT:
254 err = (src_value <= dst_value);
255 break;
256 case IB_SA_LT:
257 err = (src_value >= dst_value);
258 break;
259 case IB_SA_EQ:
260 err = (src_value != dst_value);
261 break;
262 default:
263 err = 0;
264 break;
265 }
266
267 return err;
268}
269
270static int cmp_rec(struct ib_sa_mcmember_rec *src,
271 struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
272{
273 /* MGID must already match */
274
275 if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID &&
276 memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid))
277 return -EINVAL;
278 if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
279 return -EINVAL;
280 if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
281 return -EINVAL;
282 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
283 IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
284 src->mtu, dst->mtu))
285 return -EINVAL;
286 if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
287 src->traffic_class != dst->traffic_class)
288 return -EINVAL;
289 if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
290 return -EINVAL;
291 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
292 IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
293 src->rate, dst->rate))
294 return -EINVAL;
295 if (check_selector(comp_mask,
296 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
297 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
298 dst->packet_life_time_selector,
299 src->packet_life_time, dst->packet_life_time))
300 return -EINVAL;
301 if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl)
302 return -EINVAL;
303 if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
304 src->flow_label != dst->flow_label)
305 return -EINVAL;
306 if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
307 src->hop_limit != dst->hop_limit)
308 return -EINVAL;
309 if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope)
310 return -EINVAL;
311
312 /* join_state checked separately, proxy_join ignored */
313
314 return 0;
315}
316
317static int send_join(struct mcast_group *group, struct mcast_member *member)
318{
319 struct mcast_port *port = group->port;
320 int ret;
321
322 group->last_join = member;
323 ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
324 port->port_num, IB_MGMT_METHOD_SET,
325 &member->multicast.rec,
326 member->multicast.comp_mask,
327 3000, GFP_KERNEL, join_handler, group,
328 &group->query);
329 if (ret >= 0) {
330 group->query_id = ret;
331 ret = 0;
332 }
333 return ret;
334}
335
336static int send_leave(struct mcast_group *group, u8 leave_state)
337{
338 struct mcast_port *port = group->port;
339 struct ib_sa_mcmember_rec rec;
340 int ret;
341
342 rec = group->rec;
343 rec.join_state = leave_state;
344
345 ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
346 port->port_num, IB_SA_METHOD_DELETE, &rec,
347 IB_SA_MCMEMBER_REC_MGID |
348 IB_SA_MCMEMBER_REC_PORT_GID |
349 IB_SA_MCMEMBER_REC_JOIN_STATE,
350 3000, GFP_KERNEL, leave_handler,
351 group, &group->query);
352 if (ret >= 0) {
353 group->query_id = ret;
354 ret = 0;
355 }
356 return ret;
357}
358
359static void join_group(struct mcast_group *group, struct mcast_member *member,
360 u8 join_state)
361{
362 member->state = MCAST_MEMBER;
363 adjust_membership(group, join_state, 1);
364 group->rec.join_state |= join_state;
365 member->multicast.rec = group->rec;
366 member->multicast.rec.join_state = join_state;
367 list_move(&member->list, &group->active_list);
368}
369
370static int fail_join(struct mcast_group *group, struct mcast_member *member,
371 int status)
372{
373 spin_lock_irq(&group->lock);
374 list_del_init(&member->list);
375 spin_unlock_irq(&group->lock);
376 return member->multicast.callback(status, &member->multicast);
377}
378
379static void process_group_error(struct mcast_group *group)
380{
381 struct mcast_member *member;
382 int ret;
383
384 spin_lock_irq(&group->lock);
385 while (!list_empty(&group->active_list)) {
386 member = list_entry(group->active_list.next,
387 struct mcast_member, list);
388 atomic_inc(&member->refcount);
389 list_del_init(&member->list);
390 adjust_membership(group, member->multicast.rec.join_state, -1);
391 member->state = MCAST_ERROR;
392 spin_unlock_irq(&group->lock);
393
394 ret = member->multicast.callback(-ENETRESET,
395 &member->multicast);
396 deref_member(member);
397 if (ret)
398 ib_sa_free_multicast(&member->multicast);
399 spin_lock_irq(&group->lock);
400 }
401
402 group->rec.join_state = 0;
403 group->state = MCAST_BUSY;
404 spin_unlock_irq(&group->lock);
405}
406
407static void mcast_work_handler(struct work_struct *work)
408{
409 struct mcast_group *group;
410 struct mcast_member *member;
411 struct ib_sa_multicast *multicast;
412 int status, ret;
413 u8 join_state;
414
415 group = container_of(work, typeof(*group), work);
416retest:
417 spin_lock_irq(&group->lock);
418 while (!list_empty(&group->pending_list) ||
419 (group->state == MCAST_ERROR)) {
420
421 if (group->state == MCAST_ERROR) {
422 spin_unlock_irq(&group->lock);
423 process_group_error(group);
424 goto retest;
425 }
426
427 member = list_entry(group->pending_list.next,
428 struct mcast_member, list);
429 multicast = &member->multicast;
430 join_state = multicast->rec.join_state;
431 atomic_inc(&member->refcount);
432
433 if (join_state == (group->rec.join_state & join_state)) {
434 status = cmp_rec(&group->rec, &multicast->rec,
435 multicast->comp_mask);
436 if (!status)
437 join_group(group, member, join_state);
438 else
439 list_del_init(&member->list);
440 spin_unlock_irq(&group->lock);
441 ret = multicast->callback(status, multicast);
442 } else {
443 spin_unlock_irq(&group->lock);
444 status = send_join(group, member);
445 if (!status) {
446 deref_member(member);
447 return;
448 }
449 ret = fail_join(group, member, status);
450 }
451
452 deref_member(member);
453 if (ret)
454 ib_sa_free_multicast(&member->multicast);
455 spin_lock_irq(&group->lock);
456 }
457
458 join_state = get_leave_state(group);
459 if (join_state) {
460 group->rec.join_state &= ~join_state;
461 spin_unlock_irq(&group->lock);
462 if (send_leave(group, join_state))
463 goto retest;
464 } else {
465 group->state = MCAST_IDLE;
466 spin_unlock_irq(&group->lock);
467 release_group(group);
468 }
469}
470
471/*
472 * Fail a join request if it is still active - at the head of the pending queue.
473 */
474static void process_join_error(struct mcast_group *group, int status)
475{
476 struct mcast_member *member;
477 int ret;
478
479 spin_lock_irq(&group->lock);
480 member = list_entry(group->pending_list.next,
481 struct mcast_member, list);
482 if (group->last_join == member) {
483 atomic_inc(&member->refcount);
484 list_del_init(&member->list);
485 spin_unlock_irq(&group->lock);
486 ret = member->multicast.callback(status, &member->multicast);
487 deref_member(member);
488 if (ret)
489 ib_sa_free_multicast(&member->multicast);
490 } else
491 spin_unlock_irq(&group->lock);
492}
493
494static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
495 void *context)
496{
497 struct mcast_group *group = context;
498
499 if (status)
500 process_join_error(group, status);
501 else {
502 spin_lock_irq(&group->port->lock);
503 group->rec = *rec;
504 if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
505 rb_erase(&group->node, &group->port->table);
506 mcast_insert(group->port, group, 1);
507 }
508 spin_unlock_irq(&group->port->lock);
509 }
510 mcast_work_handler(&group->work);
511}
512
513static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
514 void *context)
515{
516 struct mcast_group *group = context;
517
518 mcast_work_handler(&group->work);
519}
520
521static struct mcast_group *acquire_group(struct mcast_port *port,
522 union ib_gid *mgid, gfp_t gfp_mask)
523{
524 struct mcast_group *group, *cur_group;
525 unsigned long flags;
526 int is_mgid0;
527
528 is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
529 if (!is_mgid0) {
530 spin_lock_irqsave(&port->lock, flags);
531 group = mcast_find(port, mgid);
532 if (group)
533 goto found;
534 spin_unlock_irqrestore(&port->lock, flags);
535 }
536
537 group = kzalloc(sizeof *group, gfp_mask);
538 if (!group)
539 return NULL;
540
541 group->port = port;
542 group->rec.mgid = *mgid;
543 INIT_LIST_HEAD(&group->pending_list);
544 INIT_LIST_HEAD(&group->active_list);
545 INIT_WORK(&group->work, mcast_work_handler);
546 spin_lock_init(&group->lock);
547
548 spin_lock_irqsave(&port->lock, flags);
549 cur_group = mcast_insert(port, group, is_mgid0);
550 if (cur_group) {
551 kfree(group);
552 group = cur_group;
553 } else
554 atomic_inc(&port->refcount);
555found:
556 atomic_inc(&group->refcount);
557 spin_unlock_irqrestore(&port->lock, flags);
558 return group;
559}
560
561/*
562 * We serialize all join requests to a single group to make our lives much
563 * easier. Otherwise, two users could try to join the same group
564 * simultaneously, with different configurations, one could leave while the
565 * join is in progress, etc., which makes locking around error recovery
566 * difficult.
567 */
568struct ib_sa_multicast *
569ib_sa_join_multicast(struct ib_sa_client *client,
570 struct ib_device *device, u8 port_num,
571 struct ib_sa_mcmember_rec *rec,
572 ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
573 int (*callback)(int status,
574 struct ib_sa_multicast *multicast),
575 void *context)
576{
577 struct mcast_device *dev;
578 struct mcast_member *member;
579 struct ib_sa_multicast *multicast;
580 int ret;
581
582 dev = ib_get_client_data(device, &mcast_client);
583 if (!dev)
584 return ERR_PTR(-ENODEV);
585
586 member = kmalloc(sizeof *member, gfp_mask);
587 if (!member)
588 return ERR_PTR(-ENOMEM);
589
590 ib_sa_client_get(client);
591 member->client = client;
592 member->multicast.rec = *rec;
593 member->multicast.comp_mask = comp_mask;
594 member->multicast.callback = callback;
595 member->multicast.context = context;
596 init_completion(&member->comp);
597 atomic_set(&member->refcount, 1);
598 member->state = MCAST_JOINING;
599
600 member->group = acquire_group(&dev->port[port_num - dev->start_port],
601 &rec->mgid, gfp_mask);
602 if (!member->group) {
603 ret = -ENOMEM;
604 goto err;
605 }
606
607 /*
608 * The user will get the multicast structure in their callback. They
609 * could then free the multicast structure before we can return from
610 * this routine. So we save the pointer to return before queuing
611 * any callback.
612 */
613 multicast = &member->multicast;
614 queue_join(member);
615 return multicast;
616
617err:
618 ib_sa_client_put(client);
619 kfree(member);
620 return ERR_PTR(ret);
621}
622EXPORT_SYMBOL(ib_sa_join_multicast);
623
624void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
625{
626 struct mcast_member *member;
627 struct mcast_group *group;
628
629 member = container_of(multicast, struct mcast_member, multicast);
630 group = member->group;
631
632 spin_lock_irq(&group->lock);
633 if (member->state == MCAST_MEMBER)
634 adjust_membership(group, multicast->rec.join_state, -1);
635
636 list_del_init(&member->list);
637
638 if (group->state == MCAST_IDLE) {
639 group->state = MCAST_BUSY;
640 spin_unlock_irq(&group->lock);
641 /* Continue to hold reference on group until callback */
642 queue_work(mcast_wq, &group->work);
643 } else {
644 spin_unlock_irq(&group->lock);
645 release_group(group);
646 }
647
648 deref_member(member);
649 wait_for_completion(&member->comp);
650 ib_sa_client_put(member->client);
651 kfree(member);
652}
653EXPORT_SYMBOL(ib_sa_free_multicast);
654
655int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
656 union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
657{
658 struct mcast_device *dev;
659 struct mcast_port *port;
660 struct mcast_group *group;
661 unsigned long flags;
662 int ret = 0;
663
664 dev = ib_get_client_data(device, &mcast_client);
665 if (!dev)
666 return -ENODEV;
667
668 port = &dev->port[port_num - dev->start_port];
669 spin_lock_irqsave(&port->lock, flags);
670 group = mcast_find(port, mgid);
671 if (group)
672 *rec = group->rec;
673 else
674 ret = -EADDRNOTAVAIL;
675 spin_unlock_irqrestore(&port->lock, flags);
676
677 return ret;
678}
679EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
680
681int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
682 struct ib_sa_mcmember_rec *rec,
683 struct ib_ah_attr *ah_attr)
684{
685 int ret;
686 u16 gid_index;
687 u8 p;
688
689 ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
690 if (ret)
691 return ret;
692
693 memset(ah_attr, 0, sizeof *ah_attr);
694 ah_attr->dlid = be16_to_cpu(rec->mlid);
695 ah_attr->sl = rec->sl;
696 ah_attr->port_num = port_num;
697 ah_attr->static_rate = rec->rate;
698
699 ah_attr->ah_flags = IB_AH_GRH;
700 ah_attr->grh.dgid = rec->mgid;
701
702 ah_attr->grh.sgid_index = (u8) gid_index;
703 ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
704 ah_attr->grh.hop_limit = rec->hop_limit;
705 ah_attr->grh.traffic_class = rec->traffic_class;
706
707 return 0;
708}
709EXPORT_SYMBOL(ib_init_ah_from_mcmember);
710
711static void mcast_groups_lost(struct mcast_port *port)
712{
713 struct mcast_group *group;
714 struct rb_node *node;
715 unsigned long flags;
716
717 spin_lock_irqsave(&port->lock, flags);
718 for (node = rb_first(&port->table); node; node = rb_next(node)) {
719 group = rb_entry(node, struct mcast_group, node);
720 spin_lock(&group->lock);
721 if (group->state == MCAST_IDLE) {
722 atomic_inc(&group->refcount);
723 queue_work(mcast_wq, &group->work);
724 }
725 group->state = MCAST_ERROR;
726 spin_unlock(&group->lock);
727 }
728 spin_unlock_irqrestore(&port->lock, flags);
729}
730
731static void mcast_event_handler(struct ib_event_handler *handler,
732 struct ib_event *event)
733{
734 struct mcast_device *dev;
735
736 dev = container_of(handler, struct mcast_device, event_handler);
737
738 switch (event->event) {
739 case IB_EVENT_PORT_ERR:
740 case IB_EVENT_LID_CHANGE:
741 case IB_EVENT_SM_CHANGE:
742 case IB_EVENT_CLIENT_REREGISTER:
743 mcast_groups_lost(&dev->port[event->element.port_num -
744 dev->start_port]);
745 break;
746 default:
747 break;
748 }
749}
750
751static void mcast_add_one(struct ib_device *device)
752{
753 struct mcast_device *dev;
754 struct mcast_port *port;
755 int i;
756
757 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
758 return;
759
760 dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
761 GFP_KERNEL);
762 if (!dev)
763 return;
764
765 if (device->node_type == RDMA_NODE_IB_SWITCH)
766 dev->start_port = dev->end_port = 0;
767 else {
768 dev->start_port = 1;
769 dev->end_port = device->phys_port_cnt;
770 }
771
772 for (i = 0; i <= dev->end_port - dev->start_port; i++) {
773 port = &dev->port[i];
774 port->dev = dev;
775 port->port_num = dev->start_port + i;
776 spin_lock_init(&port->lock);
777 port->table = RB_ROOT;
778 init_completion(&port->comp);
779 atomic_set(&port->refcount, 1);
780 }
781
782 dev->device = device;
783 ib_set_client_data(device, &mcast_client, dev);
784
785 INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
786 ib_register_event_handler(&dev->event_handler);
787}
788
789static void mcast_remove_one(struct ib_device *device)
790{
791 struct mcast_device *dev;
792 struct mcast_port *port;
793 int i;
794
795 dev = ib_get_client_data(device, &mcast_client);
796 if (!dev)
797 return;
798
799 ib_unregister_event_handler(&dev->event_handler);
800 flush_workqueue(mcast_wq);
801
802 for (i = 0; i <= dev->end_port - dev->start_port; i++) {
803 port = &dev->port[i];
804 deref_port(port);
805 wait_for_completion(&port->comp);
806 }
807
808 kfree(dev);
809}
810
811int mcast_init(void)
812{
813 int ret;
814
815 mcast_wq = create_singlethread_workqueue("ib_mcast");
816 if (!mcast_wq)
817 return -ENOMEM;
818
819 ib_sa_register_client(&sa_client);
820
821 ret = ib_register_client(&mcast_client);
822 if (ret)
823 goto err;
824 return 0;
825
826err:
827 ib_sa_unregister_client(&sa_client);
828 destroy_workqueue(mcast_wq);
829 return ret;
830}
831
832void mcast_cleanup(void)
833{
834 ib_unregister_client(&mcast_client);
835 ib_sa_unregister_client(&sa_client);
836 destroy_workqueue(mcast_wq);
837}
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
new file mode 100644
index 000000000000..24c93fd320fb
--- /dev/null
+++ b/drivers/infiniband/core/sa.h
@@ -0,0 +1,66 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4 * Copyright (c) 2006 Intel Corporation. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#ifndef SA_H
36#define SA_H
37
38#include <rdma/ib_sa.h>
39
40static inline void ib_sa_client_get(struct ib_sa_client *client)
41{
42 atomic_inc(&client->users);
43}
44
45static inline void ib_sa_client_put(struct ib_sa_client *client)
46{
47 if (atomic_dec_and_test(&client->users))
48 complete(&client->comp);
49}
50
51int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
52 struct ib_device *device, u8 port_num,
53 u8 method,
54 struct ib_sa_mcmember_rec *rec,
55 ib_sa_comp_mask comp_mask,
56 int timeout_ms, gfp_t gfp_mask,
57 void (*callback)(int status,
58 struct ib_sa_mcmember_rec *resp,
59 void *context),
60 void *context,
61 struct ib_sa_query **sa_query);
62
63int mcast_init(void);
64void mcast_cleanup(void);
65
66#endif /* SA_H */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index e45afba75341..68db633711c5 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -47,8 +47,8 @@
47#include <linux/workqueue.h> 47#include <linux/workqueue.h>
48 48
49#include <rdma/ib_pack.h> 49#include <rdma/ib_pack.h>
50#include <rdma/ib_sa.h>
51#include <rdma/ib_cache.h> 50#include <rdma/ib_cache.h>
51#include "sa.h"
52 52
53MODULE_AUTHOR("Roland Dreier"); 53MODULE_AUTHOR("Roland Dreier");
54MODULE_DESCRIPTION("InfiniBand subnet administration query support"); 54MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -425,17 +425,6 @@ void ib_sa_register_client(struct ib_sa_client *client)
425} 425}
426EXPORT_SYMBOL(ib_sa_register_client); 426EXPORT_SYMBOL(ib_sa_register_client);
427 427
428static inline void ib_sa_client_get(struct ib_sa_client *client)
429{
430 atomic_inc(&client->users);
431}
432
433static inline void ib_sa_client_put(struct ib_sa_client *client)
434{
435 if (atomic_dec_and_test(&client->users))
436 complete(&client->comp);
437}
438
439void ib_sa_unregister_client(struct ib_sa_client *client) 428void ib_sa_unregister_client(struct ib_sa_client *client)
440{ 429{
441 ib_sa_client_put(client); 430 ib_sa_client_put(client);
@@ -482,6 +471,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
482 ah_attr->sl = rec->sl; 471 ah_attr->sl = rec->sl;
483 ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f; 472 ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
484 ah_attr->port_num = port_num; 473 ah_attr->port_num = port_num;
474 ah_attr->static_rate = rec->rate;
485 475
486 if (rec->hop_limit > 1) { 476 if (rec->hop_limit > 1) {
487 ah_attr->ah_flags = IB_AH_GRH; 477 ah_attr->ah_flags = IB_AH_GRH;
@@ -901,7 +891,6 @@ err1:
901 kfree(query); 891 kfree(query);
902 return ret; 892 return ret;
903} 893}
904EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
905 894
906static void send_handler(struct ib_mad_agent *agent, 895static void send_handler(struct ib_mad_agent *agent,
907 struct ib_mad_send_wc *mad_send_wc) 896 struct ib_mad_send_wc *mad_send_wc)
@@ -1053,14 +1042,27 @@ static int __init ib_sa_init(void)
1053 get_random_bytes(&tid, sizeof tid); 1042 get_random_bytes(&tid, sizeof tid);
1054 1043
1055 ret = ib_register_client(&sa_client); 1044 ret = ib_register_client(&sa_client);
1056 if (ret) 1045 if (ret) {
1057 printk(KERN_ERR "Couldn't register ib_sa client\n"); 1046 printk(KERN_ERR "Couldn't register ib_sa client\n");
1047 goto err1;
1048 }
1049
1050 ret = mcast_init();
1051 if (ret) {
1052 printk(KERN_ERR "Couldn't initialize multicast handling\n");
1053 goto err2;
1054 }
1058 1055
1056 return 0;
1057err2:
1058 ib_unregister_client(&sa_client);
1059err1:
1059 return ret; 1060 return ret;
1060} 1061}
1061 1062
1062static void __exit ib_sa_cleanup(void) 1063static void __exit ib_sa_cleanup(void)
1063{ 1064{
1065 mcast_cleanup();
1064 ib_unregister_client(&sa_client); 1066 ib_unregister_client(&sa_client);
1065 idr_destroy(&query_idr); 1067 idr_destroy(&query_idr);
1066} 1068}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 709323c14c5d..000c086bf2e9 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -714,8 +714,6 @@ int ib_device_register_sysfs(struct ib_device *device)
714 if (ret) 714 if (ret)
715 goto err_put; 715 goto err_put;
716 } else { 716 } else {
717 int i;
718
719 for (i = 1; i <= device->phys_port_cnt; ++i) { 717 for (i = 1; i <= device->phys_port_cnt; ++i) {
720 ret = add_port(device, i); 718 ret = add_port(device, i);
721 if (ret) 719 if (ret)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 6b81b98961c7..b516b93b8550 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -70,10 +70,24 @@ struct ucma_context {
70 u64 uid; 70 u64 uid;
71 71
72 struct list_head list; 72 struct list_head list;
73 struct list_head mc_list;
74};
75
76struct ucma_multicast {
77 struct ucma_context *ctx;
78 int id;
79 int events_reported;
80
81 u64 uid;
82 struct list_head list;
83 struct sockaddr addr;
84 u8 pad[sizeof(struct sockaddr_in6) -
85 sizeof(struct sockaddr)];
73}; 86};
74 87
75struct ucma_event { 88struct ucma_event {
76 struct ucma_context *ctx; 89 struct ucma_context *ctx;
90 struct ucma_multicast *mc;
77 struct list_head list; 91 struct list_head list;
78 struct rdma_cm_id *cm_id; 92 struct rdma_cm_id *cm_id;
79 struct rdma_ucm_event_resp resp; 93 struct rdma_ucm_event_resp resp;
@@ -81,6 +95,7 @@ struct ucma_event {
81 95
82static DEFINE_MUTEX(mut); 96static DEFINE_MUTEX(mut);
83static DEFINE_IDR(ctx_idr); 97static DEFINE_IDR(ctx_idr);
98static DEFINE_IDR(multicast_idr);
84 99
85static inline struct ucma_context *_ucma_find_context(int id, 100static inline struct ucma_context *_ucma_find_context(int id,
86 struct ucma_file *file) 101 struct ucma_file *file)
@@ -124,6 +139,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
124 139
125 atomic_set(&ctx->ref, 1); 140 atomic_set(&ctx->ref, 1);
126 init_completion(&ctx->comp); 141 init_completion(&ctx->comp);
142 INIT_LIST_HEAD(&ctx->mc_list);
127 ctx->file = file; 143 ctx->file = file;
128 144
129 do { 145 do {
@@ -147,6 +163,37 @@ error:
147 return NULL; 163 return NULL;
148} 164}
149 165
166static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
167{
168 struct ucma_multicast *mc;
169 int ret;
170
171 mc = kzalloc(sizeof(*mc), GFP_KERNEL);
172 if (!mc)
173 return NULL;
174
175 do {
176 ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
177 if (!ret)
178 goto error;
179
180 mutex_lock(&mut);
181 ret = idr_get_new(&multicast_idr, mc, &mc->id);
182 mutex_unlock(&mut);
183 } while (ret == -EAGAIN);
184
185 if (ret)
186 goto error;
187
188 mc->ctx = ctx;
189 list_add_tail(&mc->list, &ctx->mc_list);
190 return mc;
191
192error:
193 kfree(mc);
194 return NULL;
195}
196
150static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, 197static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
151 struct rdma_conn_param *src) 198 struct rdma_conn_param *src)
152{ 199{
@@ -180,8 +227,19 @@ static void ucma_set_event_context(struct ucma_context *ctx,
180 struct ucma_event *uevent) 227 struct ucma_event *uevent)
181{ 228{
182 uevent->ctx = ctx; 229 uevent->ctx = ctx;
183 uevent->resp.uid = ctx->uid; 230 switch (event->event) {
184 uevent->resp.id = ctx->id; 231 case RDMA_CM_EVENT_MULTICAST_JOIN:
232 case RDMA_CM_EVENT_MULTICAST_ERROR:
233 uevent->mc = (struct ucma_multicast *)
234 event->param.ud.private_data;
235 uevent->resp.uid = uevent->mc->uid;
236 uevent->resp.id = uevent->mc->id;
237 break;
238 default:
239 uevent->resp.uid = ctx->uid;
240 uevent->resp.id = ctx->id;
241 break;
242 }
185} 243}
186 244
187static int ucma_event_handler(struct rdma_cm_id *cm_id, 245static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -199,7 +257,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
199 ucma_set_event_context(ctx, event, uevent); 257 ucma_set_event_context(ctx, event, uevent);
200 uevent->resp.event = event->event; 258 uevent->resp.event = event->event;
201 uevent->resp.status = event->status; 259 uevent->resp.status = event->status;
202 if (cm_id->ps == RDMA_PS_UDP) 260 if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
203 ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); 261 ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
204 else 262 else
205 ucma_copy_conn_event(&uevent->resp.param.conn, 263 ucma_copy_conn_event(&uevent->resp.param.conn,
@@ -290,6 +348,8 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
290 348
291 list_del(&uevent->list); 349 list_del(&uevent->list);
292 uevent->ctx->events_reported++; 350 uevent->ctx->events_reported++;
351 if (uevent->mc)
352 uevent->mc->events_reported++;
293 kfree(uevent); 353 kfree(uevent);
294done: 354done:
295 mutex_unlock(&file->mut); 355 mutex_unlock(&file->mut);
@@ -342,6 +402,19 @@ err1:
342 return ret; 402 return ret;
343} 403}
344 404
405static void ucma_cleanup_multicast(struct ucma_context *ctx)
406{
407 struct ucma_multicast *mc, *tmp;
408
409 mutex_lock(&mut);
410 list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
411 list_del(&mc->list);
412 idr_remove(&multicast_idr, mc->id);
413 kfree(mc);
414 }
415 mutex_unlock(&mut);
416}
417
345static void ucma_cleanup_events(struct ucma_context *ctx) 418static void ucma_cleanup_events(struct ucma_context *ctx)
346{ 419{
347 struct ucma_event *uevent, *tmp; 420 struct ucma_event *uevent, *tmp;
@@ -360,6 +433,19 @@ static void ucma_cleanup_events(struct ucma_context *ctx)
360 } 433 }
361} 434}
362 435
436static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
437{
438 struct ucma_event *uevent, *tmp;
439
440 list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
441 if (uevent->mc != mc)
442 continue;
443
444 list_del(&uevent->list);
445 kfree(uevent);
446 }
447}
448
363static int ucma_free_ctx(struct ucma_context *ctx) 449static int ucma_free_ctx(struct ucma_context *ctx)
364{ 450{
365 int events_reported; 451 int events_reported;
@@ -367,6 +453,8 @@ static int ucma_free_ctx(struct ucma_context *ctx)
367 /* No new events will be generated after destroying the id. */ 453 /* No new events will be generated after destroying the id. */
368 rdma_destroy_id(ctx->cm_id); 454 rdma_destroy_id(ctx->cm_id);
369 455
456 ucma_cleanup_multicast(ctx);
457
370 /* Cleanup events not yet reported to the user. */ 458 /* Cleanup events not yet reported to the user. */
371 mutex_lock(&ctx->file->mut); 459 mutex_lock(&ctx->file->mut);
372 ucma_cleanup_events(ctx); 460 ucma_cleanup_events(ctx);
@@ -731,6 +819,114 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
731 return ret; 819 return ret;
732} 820}
733 821
822static ssize_t ucma_join_multicast(struct ucma_file *file,
823 const char __user *inbuf,
824 int in_len, int out_len)
825{
826 struct rdma_ucm_join_mcast cmd;
827 struct rdma_ucm_create_id_resp resp;
828 struct ucma_context *ctx;
829 struct ucma_multicast *mc;
830 int ret;
831
832 if (out_len < sizeof(resp))
833 return -ENOSPC;
834
835 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
836 return -EFAULT;
837
838 ctx = ucma_get_ctx(file, cmd.id);
839 if (IS_ERR(ctx))
840 return PTR_ERR(ctx);
841
842 mutex_lock(&file->mut);
843 mc = ucma_alloc_multicast(ctx);
844 if (IS_ERR(mc)) {
845 ret = PTR_ERR(mc);
846 goto err1;
847 }
848
849 mc->uid = cmd.uid;
850 memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
851 ret = rdma_join_multicast(ctx->cm_id, &mc->addr, mc);
852 if (ret)
853 goto err2;
854
855 resp.id = mc->id;
856 if (copy_to_user((void __user *)(unsigned long)cmd.response,
857 &resp, sizeof(resp))) {
858 ret = -EFAULT;
859 goto err3;
860 }
861
862 mutex_unlock(&file->mut);
863 ucma_put_ctx(ctx);
864 return 0;
865
866err3:
867 rdma_leave_multicast(ctx->cm_id, &mc->addr);
868 ucma_cleanup_mc_events(mc);
869err2:
870 mutex_lock(&mut);
871 idr_remove(&multicast_idr, mc->id);
872 mutex_unlock(&mut);
873 list_del(&mc->list);
874 kfree(mc);
875err1:
876 mutex_unlock(&file->mut);
877 ucma_put_ctx(ctx);
878 return ret;
879}
880
881static ssize_t ucma_leave_multicast(struct ucma_file *file,
882 const char __user *inbuf,
883 int in_len, int out_len)
884{
885 struct rdma_ucm_destroy_id cmd;
886 struct rdma_ucm_destroy_id_resp resp;
887 struct ucma_multicast *mc;
888 int ret = 0;
889
890 if (out_len < sizeof(resp))
891 return -ENOSPC;
892
893 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
894 return -EFAULT;
895
896 mutex_lock(&mut);
897 mc = idr_find(&multicast_idr, cmd.id);
898 if (!mc)
899 mc = ERR_PTR(-ENOENT);
900 else if (mc->ctx->file != file)
901 mc = ERR_PTR(-EINVAL);
902 else {
903 idr_remove(&multicast_idr, mc->id);
904 atomic_inc(&mc->ctx->ref);
905 }
906 mutex_unlock(&mut);
907
908 if (IS_ERR(mc)) {
909 ret = PTR_ERR(mc);
910 goto out;
911 }
912
913 rdma_leave_multicast(mc->ctx->cm_id, &mc->addr);
914 mutex_lock(&mc->ctx->file->mut);
915 ucma_cleanup_mc_events(mc);
916 list_del(&mc->list);
917 mutex_unlock(&mc->ctx->file->mut);
918
919 ucma_put_ctx(mc->ctx);
920 resp.events_reported = mc->events_reported;
921 kfree(mc);
922
923 if (copy_to_user((void __user *)(unsigned long)cmd.response,
924 &resp, sizeof(resp)))
925 ret = -EFAULT;
926out:
927 return ret;
928}
929
734static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 930static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
735 const char __user *inbuf, 931 const char __user *inbuf,
736 int in_len, int out_len) = { 932 int in_len, int out_len) = {
@@ -750,6 +946,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
750 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 946 [RDMA_USER_CM_CMD_GET_OPTION] = NULL,
751 [RDMA_USER_CM_CMD_SET_OPTION] = NULL, 947 [RDMA_USER_CM_CMD_SET_OPTION] = NULL,
752 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 948 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
949 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
950 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
753}; 951};
754 952
755static ssize_t ucma_write(struct file *filp, const char __user *buf, 953static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 5a7306f5efae..75f7b16a271d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 82fa72041989..114ac3b775dc 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 1b97e80b8780..8ab04a7c6f6e 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 997aa32cbf07..65bf577311aa 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
index a6bbe8370d81..a2703a3d882d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 103fc42d6976..90d7b8972cb4 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 4611afa52220..0315c9d9fce9 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 6517ef85026f..caf4e6007a44 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index a522b1baa3b4..e5442e34b788 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 7c810d904279..0c6f281bd4a0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 98b3bdb5de9e..d7624c170ee7 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index a6efa8fe15d8..54362afbf72f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 2b6cd53bb3fc..a6c2c4ba29e6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 6861087d776c..2aef122f9955 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 61e3278fd7a8..2af3e93b607f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index e066727504b6..4dda2f6da2de 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -846,6 +845,8 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
846 break; 845 break;
847 case IWCH_QP_STATE_TERMINATE: 846 case IWCH_QP_STATE_TERMINATE:
848 qhp->attr.state = IWCH_QP_STATE_TERMINATE; 847 qhp->attr.state = IWCH_QP_STATE_TERMINATE;
848 if (t3b_device(qhp->rhp))
849 cxio_set_wq_in_error(&qhp->wq);
849 if (!internal) 850 if (!internal)
850 terminate = 1; 851 terminate = 1;
851 break; 852 break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h
index c4e7fbea8bbd..cb7086f558c1 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_user.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_user.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
index 727b10d89686..1a854598e0e6 100644
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -7,11 +7,3 @@ config INFINIBAND_EHCA
7 To compile the driver as a module, choose M here. The module 7 To compile the driver as a module, choose M here. The module
8 will be called ib_ehca. 8 will be called ib_ehca.
9 9
10config INFINIBAND_EHCA_SCALING
11 bool "Scaling support (EXPERIMENTAL)"
12 depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
13 default y
14 ---help---
15 eHCA scaling support schedules the CQ callbacks to different CPUs.
16
17 To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index cf95ee474b0f..40404c9e2817 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -42,8 +42,6 @@
42#ifndef __EHCA_CLASSES_H__ 42#ifndef __EHCA_CLASSES_H__
43#define __EHCA_CLASSES_H__ 43#define __EHCA_CLASSES_H__
44 44
45#include "ehca_classes.h"
46#include "ipz_pt_fn.h"
47 45
48struct ehca_module; 46struct ehca_module;
49struct ehca_qp; 47struct ehca_qp;
@@ -54,14 +52,22 @@ struct ehca_mw;
54struct ehca_pd; 52struct ehca_pd;
55struct ehca_av; 53struct ehca_av;
56 54
55#include <rdma/ib_verbs.h>
56#include <rdma/ib_user_verbs.h>
57
57#ifdef CONFIG_PPC64 58#ifdef CONFIG_PPC64
58#include "ehca_classes_pSeries.h" 59#include "ehca_classes_pSeries.h"
59#endif 60#endif
61#include "ipz_pt_fn.h"
62#include "ehca_qes.h"
63#include "ehca_irq.h"
60 64
61#include <rdma/ib_verbs.h> 65#define EHCA_EQE_CACHE_SIZE 20
62#include <rdma/ib_user_verbs.h>
63 66
64#include "ehca_irq.h" 67struct ehca_eqe_cache_entry {
68 struct ehca_eqe *eqe;
69 struct ehca_cq *cq;
70};
65 71
66struct ehca_eq { 72struct ehca_eq {
67 u32 length; 73 u32 length;
@@ -74,6 +80,8 @@ struct ehca_eq {
74 spinlock_t spinlock; 80 spinlock_t spinlock;
75 struct tasklet_struct interrupt_task; 81 struct tasklet_struct interrupt_task;
76 u32 ist; 82 u32 ist;
83 spinlock_t irq_spinlock;
84 struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
77}; 85};
78 86
79struct ehca_sport { 87struct ehca_sport {
@@ -269,6 +277,7 @@ extern struct idr ehca_cq_idr;
269extern int ehca_static_rate; 277extern int ehca_static_rate;
270extern int ehca_port_act_time; 278extern int ehca_port_act_time;
271extern int ehca_use_hp_mr; 279extern int ehca_use_hp_mr;
280extern int ehca_scaling_code;
272 281
273struct ipzu_queue_resp { 282struct ipzu_queue_resp {
274 u32 qe_size; /* queue entry size */ 283 u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 24ceab0bae4a..4961eb88827c 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -61,6 +61,7 @@ int ehca_create_eq(struct ehca_shca *shca,
61 struct ib_device *ib_dev = &shca->ib_device; 61 struct ib_device *ib_dev = &shca->ib_device;
62 62
63 spin_lock_init(&eq->spinlock); 63 spin_lock_init(&eq->spinlock);
64 spin_lock_init(&eq->irq_spinlock);
64 eq->is_initialized = 0; 65 eq->is_initialized = 0;
65 66
66 if (type != EHCA_EQ && type != EHCA_NEQ) { 67 if (type != EHCA_EQ && type != EHCA_NEQ) {
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index b7be950ab47c..30eb45df9f0b 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -162,6 +162,9 @@ int ehca_query_port(struct ib_device *ibdev,
162 props->active_width = IB_WIDTH_12X; 162 props->active_width = IB_WIDTH_12X;
163 props->active_speed = 0x1; 163 props->active_speed = 0x1;
164 164
165 /* at the moment (logical) link state is always LINK_UP */
166 props->phys_state = 0x5;
167
165query_port1: 168query_port1:
166 ehca_free_fw_ctrlblock(rblock); 169 ehca_free_fw_ctrlblock(rblock);
167 170
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 6c4f9f91b15d..3ec53c687d08 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -63,15 +63,11 @@
63#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) 63#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
64#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) 64#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
65 65
66#ifdef CONFIG_INFINIBAND_EHCA_SCALING
67
68static void queue_comp_task(struct ehca_cq *__cq); 66static void queue_comp_task(struct ehca_cq *__cq);
69 67
70static struct ehca_comp_pool* pool; 68static struct ehca_comp_pool* pool;
71static struct notifier_block comp_pool_callback_nb; 69static struct notifier_block comp_pool_callback_nb;
72 70
73#endif
74
75static inline void comp_event_callback(struct ehca_cq *cq) 71static inline void comp_event_callback(struct ehca_cq *cq)
76{ 72{
77 if (!cq->ib_cq.comp_handler) 73 if (!cq->ib_cq.comp_handler)
@@ -206,7 +202,7 @@ static void qp_event_callback(struct ehca_shca *shca,
206} 202}
207 203
208static void cq_event_callback(struct ehca_shca *shca, 204static void cq_event_callback(struct ehca_shca *shca,
209 u64 eqe) 205 u64 eqe)
210{ 206{
211 struct ehca_cq *cq; 207 struct ehca_cq *cq;
212 unsigned long flags; 208 unsigned long flags;
@@ -318,7 +314,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
318 "disruptive port %x configuration change", port); 314 "disruptive port %x configuration change", port);
319 315
320 ehca_info(&shca->ib_device, 316 ehca_info(&shca->ib_device,
321 "port %x is inactive.", port); 317 "port %x is inactive.", port);
322 event.device = &shca->ib_device; 318 event.device = &shca->ib_device;
323 event.event = IB_EVENT_PORT_ERR; 319 event.event = IB_EVENT_PORT_ERR;
324 event.element.port_num = port; 320 event.element.port_num = port;
@@ -326,7 +322,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
326 ib_dispatch_event(&event); 322 ib_dispatch_event(&event);
327 323
328 ehca_info(&shca->ib_device, 324 ehca_info(&shca->ib_device,
329 "port %x is active.", port); 325 "port %x is active.", port);
330 event.device = &shca->ib_device; 326 event.device = &shca->ib_device;
331 event.event = IB_EVENT_PORT_ACTIVE; 327 event.event = IB_EVENT_PORT_ACTIVE;
332 event.element.port_num = port; 328 event.element.port_num = port;
@@ -401,115 +397,170 @@ irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
401 return IRQ_HANDLED; 397 return IRQ_HANDLED;
402} 398}
403 399
404void ehca_tasklet_eq(unsigned long data)
405{
406 struct ehca_shca *shca = (struct ehca_shca*)data;
407 struct ehca_eqe *eqe;
408 int int_state;
409 int query_cnt = 0;
410 400
411 do { 401static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
412 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); 402{
403 u64 eqe_value;
404 u32 token;
405 unsigned long flags;
406 struct ehca_cq *cq;
407 eqe_value = eqe->entry;
408 ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
409 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
410 ehca_dbg(&shca->ib_device, "... completion event");
411 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
412 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
413 cq = idr_find(&ehca_cq_idr, token);
414 if (cq == NULL) {
415 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
416 ehca_err(&shca->ib_device,
417 "Invalid eqe for non-existing cq token=%x",
418 token);
419 return;
420 }
421 reset_eq_pending(cq);
422 if (ehca_scaling_code) {
423 queue_comp_task(cq);
424 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
425 } else {
426 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
427 comp_event_callback(cq);
428 }
429 } else {
430 ehca_dbg(&shca->ib_device,
431 "Got non completion event");
432 parse_identifier(shca, eqe_value);
433 }
434}
413 435
414 if ((shca->hw_level >= 2) && eqe) 436void ehca_process_eq(struct ehca_shca *shca, int is_irq)
415 int_state = 1; 437{
416 else 438 struct ehca_eq *eq = &shca->eq;
417 int_state = 0; 439 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
418 440 u64 eqe_value;
419 while ((int_state == 1) || eqe) { 441 unsigned long flags;
420 while (eqe) { 442 int eqe_cnt, i;
421 u64 eqe_value = eqe->entry; 443 int eq_empty = 0;
422 444
423 ehca_dbg(&shca->ib_device, 445 spin_lock_irqsave(&eq->irq_spinlock, flags);
424 "eqe_value=%lx", eqe_value); 446 if (is_irq) {
425 447 const int max_query_cnt = 100;
426 /* TODO: better structure */ 448 int query_cnt = 0;
427 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, 449 int int_state = 1;
428 eqe_value)) { 450 do {
429 unsigned long flags; 451 int_state = hipz_h_query_int_state(
430 u32 token; 452 shca->ipz_hca_handle, eq->ist);
431 struct ehca_cq *cq; 453 query_cnt++;
432 454 iosync();
433 ehca_dbg(&shca->ib_device, 455 } while (int_state && query_cnt < max_query_cnt);
434 "... completion event"); 456 if (unlikely((query_cnt == max_query_cnt)))
435 token = 457 ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
436 EHCA_BMASK_GET(EQE_CQ_TOKEN, 458 int_state, query_cnt);
437 eqe_value); 459 }
438 spin_lock_irqsave(&ehca_cq_idr_lock,
439 flags);
440 cq = idr_find(&ehca_cq_idr, token);
441
442 if (cq == NULL) {
443 spin_unlock_irqrestore(&ehca_cq_idr_lock,
444 flags);
445 break;
446 }
447
448 reset_eq_pending(cq);
449#ifdef CONFIG_INFINIBAND_EHCA_SCALING
450 queue_comp_task(cq);
451 spin_unlock_irqrestore(&ehca_cq_idr_lock,
452 flags);
453#else
454 spin_unlock_irqrestore(&ehca_cq_idr_lock,
455 flags);
456 comp_event_callback(cq);
457#endif
458 } else {
459 ehca_dbg(&shca->ib_device,
460 "... non completion event");
461 parse_identifier(shca, eqe_value);
462 }
463 eqe =
464 (struct ehca_eqe *)ehca_poll_eq(shca,
465 &shca->eq);
466 }
467 460
468 if (shca->hw_level >= 2) { 461 /* read out all eqes */
469 int_state = 462 eqe_cnt = 0;
470 hipz_h_query_int_state(shca->ipz_hca_handle, 463 do {
471 shca->eq.ist); 464 u32 token;
472 query_cnt++; 465 eqe_cache[eqe_cnt].eqe =
473 iosync(); 466 (struct ehca_eqe *)ehca_poll_eq(shca, eq);
474 if (query_cnt >= 100) { 467 if (!eqe_cache[eqe_cnt].eqe)
475 query_cnt = 0; 468 break;
476 int_state = 0; 469 eqe_value = eqe_cache[eqe_cnt].eqe->entry;
477 } 470 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
471 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
472 spin_lock(&ehca_cq_idr_lock);
473 eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
474 if (!eqe_cache[eqe_cnt].cq) {
475 spin_unlock(&ehca_cq_idr_lock);
476 ehca_err(&shca->ib_device,
477 "Invalid eqe for non-existing cq "
478 "token=%x", token);
479 continue;
478 } 480 }
479 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); 481 spin_unlock(&ehca_cq_idr_lock);
480 482 } else
483 eqe_cache[eqe_cnt].cq = NULL;
484 eqe_cnt++;
485 } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
486 if (!eqe_cnt) {
487 if (is_irq)
488 ehca_dbg(&shca->ib_device,
489 "No eqe found for irq event");
490 goto unlock_irq_spinlock;
491 } else if (!is_irq)
492 ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
493 if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
494 ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
495 /* enable irq for new packets */
496 for (i = 0; i < eqe_cnt; i++) {
497 if (eq->eqe_cache[i].cq)
498 reset_eq_pending(eq->eqe_cache[i].cq);
499 }
500 /* check eq */
501 spin_lock(&eq->spinlock);
502 eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
503 spin_unlock(&eq->spinlock);
504 /* call completion handler for cached eqes */
505 for (i = 0; i < eqe_cnt; i++)
506 if (eq->eqe_cache[i].cq) {
507 if (ehca_scaling_code) {
508 spin_lock(&ehca_cq_idr_lock);
509 queue_comp_task(eq->eqe_cache[i].cq);
510 spin_unlock(&ehca_cq_idr_lock);
511 } else
512 comp_event_callback(eq->eqe_cache[i].cq);
513 } else {
514 ehca_dbg(&shca->ib_device, "Got non completion event");
515 parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
481 } 516 }
482 } while (int_state != 0); 517 /* poll eq if not empty */
483 518 if (eq_empty)
484 return; 519 goto unlock_irq_spinlock;
520 do {
521 struct ehca_eqe *eqe;
522 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
523 if (!eqe)
524 break;
525 process_eqe(shca, eqe);
526 eqe_cnt++;
527 } while (1);
528
529unlock_irq_spinlock:
530 spin_unlock_irqrestore(&eq->irq_spinlock, flags);
485} 531}
486 532
487#ifdef CONFIG_INFINIBAND_EHCA_SCALING 533void ehca_tasklet_eq(unsigned long data)
534{
535 ehca_process_eq((struct ehca_shca*)data, 1);
536}
488 537
489static inline int find_next_online_cpu(struct ehca_comp_pool* pool) 538static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
490{ 539{
491 unsigned long flags_last_cpu; 540 int cpu;
541 unsigned long flags;
492 542
543 WARN_ON_ONCE(!in_interrupt());
493 if (ehca_debug_level) 544 if (ehca_debug_level)
494 ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); 545 ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
495 546
496 spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu); 547 spin_lock_irqsave(&pool->last_cpu_lock, flags);
497 pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map); 548 cpu = next_cpu(pool->last_cpu, cpu_online_map);
498 if (pool->last_cpu == NR_CPUS) 549 if (cpu == NR_CPUS)
499 pool->last_cpu = first_cpu(cpu_online_map); 550 cpu = first_cpu(cpu_online_map);
500 spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu); 551 pool->last_cpu = cpu;
552 spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
501 553
502 return pool->last_cpu; 554 return cpu;
503} 555}
504 556
505static void __queue_comp_task(struct ehca_cq *__cq, 557static void __queue_comp_task(struct ehca_cq *__cq,
506 struct ehca_cpu_comp_task *cct) 558 struct ehca_cpu_comp_task *cct)
507{ 559{
508 unsigned long flags_cct; 560 unsigned long flags;
509 unsigned long flags_cq;
510 561
511 spin_lock_irqsave(&cct->task_lock, flags_cct); 562 spin_lock_irqsave(&cct->task_lock, flags);
512 spin_lock_irqsave(&__cq->task_lock, flags_cq); 563 spin_lock(&__cq->task_lock);
513 564
514 if (__cq->nr_callbacks == 0) { 565 if (__cq->nr_callbacks == 0) {
515 __cq->nr_callbacks++; 566 __cq->nr_callbacks++;
@@ -520,8 +571,8 @@ static void __queue_comp_task(struct ehca_cq *__cq,
520 else 571 else
521 __cq->nr_callbacks++; 572 __cq->nr_callbacks++;
522 573
523 spin_unlock_irqrestore(&__cq->task_lock, flags_cq); 574 spin_unlock(&__cq->task_lock);
524 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 575 spin_unlock_irqrestore(&cct->task_lock, flags);
525} 576}
526 577
527static void queue_comp_task(struct ehca_cq *__cq) 578static void queue_comp_task(struct ehca_cq *__cq)
@@ -532,69 +583,69 @@ static void queue_comp_task(struct ehca_cq *__cq)
532 583
533 cpu = get_cpu(); 584 cpu = get_cpu();
534 cpu_id = find_next_online_cpu(pool); 585 cpu_id = find_next_online_cpu(pool);
535
536 BUG_ON(!cpu_online(cpu_id)); 586 BUG_ON(!cpu_online(cpu_id));
537 587
538 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 588 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
589 BUG_ON(!cct);
539 590
540 if (cct->cq_jobs > 0) { 591 if (cct->cq_jobs > 0) {
541 cpu_id = find_next_online_cpu(pool); 592 cpu_id = find_next_online_cpu(pool);
542 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 593 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
594 BUG_ON(!cct);
543 } 595 }
544 596
545 __queue_comp_task(__cq, cct); 597 __queue_comp_task(__cq, cct);
546
547 put_cpu();
548
549 return;
550} 598}
551 599
552static void run_comp_task(struct ehca_cpu_comp_task* cct) 600static void run_comp_task(struct ehca_cpu_comp_task* cct)
553{ 601{
554 struct ehca_cq *cq; 602 struct ehca_cq *cq;
555 unsigned long flags_cct; 603 unsigned long flags;
556 unsigned long flags_cq;
557 604
558 spin_lock_irqsave(&cct->task_lock, flags_cct); 605 spin_lock_irqsave(&cct->task_lock, flags);
559 606
560 while (!list_empty(&cct->cq_list)) { 607 while (!list_empty(&cct->cq_list)) {
561 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 608 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
562 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 609 spin_unlock_irqrestore(&cct->task_lock, flags);
563 comp_event_callback(cq); 610 comp_event_callback(cq);
564 spin_lock_irqsave(&cct->task_lock, flags_cct); 611 spin_lock_irqsave(&cct->task_lock, flags);
565 612
566 spin_lock_irqsave(&cq->task_lock, flags_cq); 613 spin_lock(&cq->task_lock);
567 cq->nr_callbacks--; 614 cq->nr_callbacks--;
568 if (cq->nr_callbacks == 0) { 615 if (cq->nr_callbacks == 0) {
569 list_del_init(cct->cq_list.next); 616 list_del_init(cct->cq_list.next);
570 cct->cq_jobs--; 617 cct->cq_jobs--;
571 } 618 }
572 spin_unlock_irqrestore(&cq->task_lock, flags_cq); 619 spin_unlock(&cq->task_lock);
573
574 } 620 }
575 621
576 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 622 spin_unlock_irqrestore(&cct->task_lock, flags);
577
578 return;
579} 623}
580 624
581static int comp_task(void *__cct) 625static int comp_task(void *__cct)
582{ 626{
583 struct ehca_cpu_comp_task* cct = __cct; 627 struct ehca_cpu_comp_task* cct = __cct;
628 int cql_empty;
584 DECLARE_WAITQUEUE(wait, current); 629 DECLARE_WAITQUEUE(wait, current);
585 630
586 set_current_state(TASK_INTERRUPTIBLE); 631 set_current_state(TASK_INTERRUPTIBLE);
587 while(!kthread_should_stop()) { 632 while(!kthread_should_stop()) {
588 add_wait_queue(&cct->wait_queue, &wait); 633 add_wait_queue(&cct->wait_queue, &wait);
589 634
590 if (list_empty(&cct->cq_list)) 635 spin_lock_irq(&cct->task_lock);
636 cql_empty = list_empty(&cct->cq_list);
637 spin_unlock_irq(&cct->task_lock);
638 if (cql_empty)
591 schedule(); 639 schedule();
592 else 640 else
593 __set_current_state(TASK_RUNNING); 641 __set_current_state(TASK_RUNNING);
594 642
595 remove_wait_queue(&cct->wait_queue, &wait); 643 remove_wait_queue(&cct->wait_queue, &wait);
596 644
597 if (!list_empty(&cct->cq_list)) 645 spin_lock_irq(&cct->task_lock);
646 cql_empty = list_empty(&cct->cq_list);
647 spin_unlock_irq(&cct->task_lock);
648 if (!cql_empty)
598 run_comp_task(__cct); 649 run_comp_task(__cct);
599 650
600 set_current_state(TASK_INTERRUPTIBLE); 651 set_current_state(TASK_INTERRUPTIBLE);
@@ -637,8 +688,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
637 688
638 if (task) 689 if (task)
639 kthread_stop(task); 690 kthread_stop(task);
640
641 return;
642} 691}
643 692
644static void take_over_work(struct ehca_comp_pool *pool, 693static void take_over_work(struct ehca_comp_pool *pool,
@@ -654,11 +703,11 @@ static void take_over_work(struct ehca_comp_pool *pool,
654 list_splice_init(&cct->cq_list, &list); 703 list_splice_init(&cct->cq_list, &list);
655 704
656 while(!list_empty(&list)) { 705 while(!list_empty(&list)) {
657 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 706 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
658 707
659 list_del(&cq->entry); 708 list_del(&cq->entry);
660 __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, 709 __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
661 smp_processor_id())); 710 smp_processor_id()));
662 } 711 }
663 712
664 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 713 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
@@ -708,14 +757,14 @@ static int comp_pool_callback(struct notifier_block *nfb,
708 return NOTIFY_OK; 757 return NOTIFY_OK;
709} 758}
710 759
711#endif
712
713int ehca_create_comp_pool(void) 760int ehca_create_comp_pool(void)
714{ 761{
715#ifdef CONFIG_INFINIBAND_EHCA_SCALING
716 int cpu; 762 int cpu;
717 struct task_struct *task; 763 struct task_struct *task;
718 764
765 if (!ehca_scaling_code)
766 return 0;
767
719 pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); 768 pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
720 if (pool == NULL) 769 if (pool == NULL)
721 return -ENOMEM; 770 return -ENOMEM;
@@ -740,16 +789,19 @@ int ehca_create_comp_pool(void)
740 comp_pool_callback_nb.notifier_call = comp_pool_callback; 789 comp_pool_callback_nb.notifier_call = comp_pool_callback;
741 comp_pool_callback_nb.priority =0; 790 comp_pool_callback_nb.priority =0;
742 register_cpu_notifier(&comp_pool_callback_nb); 791 register_cpu_notifier(&comp_pool_callback_nb);
743#endif 792
793 printk(KERN_INFO "eHCA scaling code enabled\n");
744 794
745 return 0; 795 return 0;
746} 796}
747 797
748void ehca_destroy_comp_pool(void) 798void ehca_destroy_comp_pool(void)
749{ 799{
750#ifdef CONFIG_INFINIBAND_EHCA_SCALING
751 int i; 800 int i;
752 801
802 if (!ehca_scaling_code)
803 return;
804
753 unregister_cpu_notifier(&comp_pool_callback_nb); 805 unregister_cpu_notifier(&comp_pool_callback_nb);
754 806
755 for (i = 0; i < NR_CPUS; i++) { 807 for (i = 0; i < NR_CPUS; i++) {
@@ -758,7 +810,4 @@ void ehca_destroy_comp_pool(void)
758 } 810 }
759 free_percpu(pool->cpu_comp_tasks); 811 free_percpu(pool->cpu_comp_tasks);
760 kfree(pool); 812 kfree(pool);
761#endif
762
763 return;
764} 813}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index be579cc0adf6..6ed06ee033ed 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -56,6 +56,7 @@ void ehca_tasklet_neq(unsigned long data);
56 56
57irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); 57irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
58void ehca_tasklet_eq(unsigned long data); 58void ehca_tasklet_eq(unsigned long data);
59void ehca_process_eq(struct ehca_shca *shca, int is_irq);
59 60
60struct ehca_cpu_comp_task { 61struct ehca_cpu_comp_task {
61 wait_queue_head_t wait_queue; 62 wait_queue_head_t wait_queue;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 1155bcf48212..c1835121a822 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@
52MODULE_LICENSE("Dual BSD/GPL"); 52MODULE_LICENSE("Dual BSD/GPL");
53MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); 53MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
54MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); 54MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
55MODULE_VERSION("SVNEHCA_0020"); 55MODULE_VERSION("SVNEHCA_0021");
56 56
57int ehca_open_aqp1 = 0; 57int ehca_open_aqp1 = 0;
58int ehca_debug_level = 0; 58int ehca_debug_level = 0;
@@ -62,6 +62,7 @@ int ehca_use_hp_mr = 0;
62int ehca_port_act_time = 30; 62int ehca_port_act_time = 30;
63int ehca_poll_all_eqs = 1; 63int ehca_poll_all_eqs = 1;
64int ehca_static_rate = -1; 64int ehca_static_rate = -1;
65int ehca_scaling_code = 1;
65 66
66module_param_named(open_aqp1, ehca_open_aqp1, int, 0); 67module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
67module_param_named(debug_level, ehca_debug_level, int, 0); 68module_param_named(debug_level, ehca_debug_level, int, 0);
@@ -71,6 +72,7 @@ module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
71module_param_named(port_act_time, ehca_port_act_time, int, 0); 72module_param_named(port_act_time, ehca_port_act_time, int, 0);
72module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0); 73module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0);
73module_param_named(static_rate, ehca_static_rate, int, 0); 74module_param_named(static_rate, ehca_static_rate, int, 0);
75module_param_named(scaling_code, ehca_scaling_code, int, 0);
74 76
75MODULE_PARM_DESC(open_aqp1, 77MODULE_PARM_DESC(open_aqp1,
76 "AQP1 on startup (0: no (default), 1: yes)"); 78 "AQP1 on startup (0: no (default), 1: yes)");
@@ -91,6 +93,8 @@ MODULE_PARM_DESC(poll_all_eqs,
91 " (0: no, 1: yes (default))"); 93 " (0: no, 1: yes (default))");
92MODULE_PARM_DESC(static_rate, 94MODULE_PARM_DESC(static_rate,
93 "set permanent static rate (default: disabled)"); 95 "set permanent static rate (default: disabled)");
96MODULE_PARM_DESC(scaling_code,
97 "set scaling code (0: disabled, 1: enabled/default)");
94 98
95spinlock_t ehca_qp_idr_lock; 99spinlock_t ehca_qp_idr_lock;
96spinlock_t ehca_cq_idr_lock; 100spinlock_t ehca_cq_idr_lock;
@@ -432,8 +436,8 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
432 436
433static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) 437static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
434{ 438{
435 return snprintf(buf, PAGE_SIZE, "%d\n", 439 return snprintf(buf, PAGE_SIZE, "%d\n",
436 ehca_debug_level); 440 ehca_debug_level);
437} 441}
438 442
439static ssize_t ehca_store_debug_level(struct device_driver *ddp, 443static ssize_t ehca_store_debug_level(struct device_driver *ddp,
@@ -778,8 +782,24 @@ void ehca_poll_eqs(unsigned long data)
778 782
779 spin_lock(&shca_list_lock); 783 spin_lock(&shca_list_lock);
780 list_for_each_entry(shca, &shca_list, shca_list) { 784 list_for_each_entry(shca, &shca_list, shca_list) {
781 if (shca->eq.is_initialized) 785 if (shca->eq.is_initialized) {
782 ehca_tasklet_eq((unsigned long)(void*)shca); 786 /* call deadman proc only if eq ptr does not change */
787 struct ehca_eq *eq = &shca->eq;
788 int max = 3;
789 volatile u64 q_ofs, q_ofs2;
790 u64 flags;
791 spin_lock_irqsave(&eq->spinlock, flags);
792 q_ofs = eq->ipz_queue.current_q_offset;
793 spin_unlock_irqrestore(&eq->spinlock, flags);
794 do {
795 spin_lock_irqsave(&eq->spinlock, flags);
796 q_ofs2 = eq->ipz_queue.current_q_offset;
797 spin_unlock_irqrestore(&eq->spinlock, flags);
798 max--;
799 } while (q_ofs == q_ofs2 && max > 0);
800 if (q_ofs == q_ofs2)
801 ehca_process_eq(shca, 0);
802 }
783 } 803 }
784 mod_timer(&poll_eqs_timer, jiffies + HZ); 804 mod_timer(&poll_eqs_timer, jiffies + HZ);
785 spin_unlock(&shca_list_lock); 805 spin_unlock(&shca_list_lock);
@@ -790,7 +810,7 @@ int __init ehca_module_init(void)
790 int ret; 810 int ret;
791 811
792 printk(KERN_INFO "eHCA Infiniband Device Driver " 812 printk(KERN_INFO "eHCA Infiniband Device Driver "
793 "(Rel.: SVNEHCA_0020)\n"); 813 "(Rel.: SVNEHCA_0021)\n");
794 idr_init(&ehca_qp_idr); 814 idr_init(&ehca_qp_idr);
795 idr_init(&ehca_cq_idr); 815 idr_init(&ehca_cq_idr);
796 spin_lock_init(&ehca_qp_idr_lock); 816 spin_lock_init(&ehca_qp_idr_lock);
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index dc3bda2634b7..8199c45768a3 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -79,7 +79,7 @@ static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
79 if (q_offset >= queue->queue_length) 79 if (q_offset >= queue->queue_length)
80 return NULL; 80 return NULL;
81 current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; 81 current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
82 return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; 82 return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
83} 83}
84 84
85/* 85/*
@@ -247,6 +247,15 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
247 return ret; 247 return ret;
248} 248}
249 249
250static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
251{
252 void *ret = ipz_qeit_get(queue);
253 u32 qe = *(u8 *) ret;
254 if ((qe >> 7) != (queue->toggle_state & 1))
255 return NULL;
256 return ret;
257}
258
250/* returns address (GX) of first queue entry */ 259/* returns address (GX) of first queue entry */
251static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) 260static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
252{ 261{
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index 6e0f2b8918ce..f6f949040825 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -96,8 +96,8 @@ static void ipath_dma_unmap_page(struct ib_device *dev,
96 BUG_ON(!valid_dma_direction(direction)); 96 BUG_ON(!valid_dma_direction(direction));
97} 97}
98 98
99int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, 99static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
100 enum dma_data_direction direction) 100 enum dma_data_direction direction)
101{ 101{
102 u64 addr; 102 u64 addr;
103 int i; 103 int i;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 7468477ba837..993482545021 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -1534,7 +1534,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1534 * @kbase: ipath_base_info pointer 1534 * @kbase: ipath_base_info pointer
1535 * 1535 *
1536 * We set the PCIE flag because the lower bandwidth on PCIe vs 1536 * We set the PCIE flag because the lower bandwidth on PCIe vs
1537 * HyperTransport can affect some user packet algorithims. 1537 * HyperTransport can affect some user packet algorithms.
1538 */ 1538 */
1539static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) 1539static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
1540{ 1540{
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index ae8bf9950c6d..05918e1e7c36 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -1293,7 +1293,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
1293 * @kbase: ipath_base_info pointer 1293 * @kbase: ipath_base_info pointer
1294 * 1294 *
1295 * We set the PCIE flag because the lower bandwidth on PCIe vs 1295 * We set the PCIE flag because the lower bandwidth on PCIe vs
1296 * HyperTransport can affect some user packet algorithims. 1296 * HyperTransport can affect some user packet algorithms.
1297 */ 1297 */
1298static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) 1298static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
1299{ 1299{
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 0b9d053a599d..48f7c65e9aed 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -175,7 +175,9 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
175 if (!ret) { 175 if (!ret) {
176 ++chunk->npages; 176 ++chunk->npages;
177 177
178 if (!coherent && chunk->npages == MTHCA_ICM_CHUNK_LEN) { 178 if (coherent)
179 ++chunk->nsg;
180 else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
179 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, 181 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
180 chunk->npages, 182 chunk->npages,
181 PCI_DMA_BIDIRECTIONAL); 183 PCI_DMA_BIDIRECTIONAL);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 224c93dd29eb..71dc84bd4254 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -573,6 +573,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
573 goto out; 573 goto out;
574 } 574 }
575 575
576 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
577 err = 0;
578 goto out;
579 }
580
576 if ((attr_mask & IB_QP_PKEY_INDEX) && 581 if ((attr_mask & IB_QP_PKEY_INDEX) &&
577 attr->pkey_index >= dev->limits.pkey_table_len) { 582 attr->pkey_index >= dev->limits.pkey_table_len) {
578 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", 583 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2d483874a589..4d59682f7d4a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -145,7 +145,7 @@ partial_error:
145 for (; i >= 0; --i) 145 for (; i >= 0; --i)
146 ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); 146 ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
147 147
148 kfree_skb(skb); 148 dev_kfree_skb_any(skb);
149 return -ENOMEM; 149 return -ENOMEM;
150} 150}
151 151
@@ -1138,7 +1138,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
1138 return -EINVAL; 1138 return -EINVAL;
1139} 1139}
1140 1140
1141static DEVICE_ATTR(mode, S_IWUGO | S_IRUGO, show_mode, set_mode); 1141static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
1142 1142
1143int ipoib_cm_add_mode_attr(struct net_device *dev) 1143int ipoib_cm_add_mode_attr(struct net_device *dev)
1144{ 1144{
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index fea737f520fd..b303ce6bc21e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -60,14 +60,11 @@ static DEFINE_MUTEX(mcast_mutex);
60/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ 60/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
61struct ipoib_mcast { 61struct ipoib_mcast {
62 struct ib_sa_mcmember_rec mcmember; 62 struct ib_sa_mcmember_rec mcmember;
63 struct ib_sa_multicast *mc;
63 struct ipoib_ah *ah; 64 struct ipoib_ah *ah;
64 65
65 struct rb_node rb_node; 66 struct rb_node rb_node;
66 struct list_head list; 67 struct list_head list;
67 struct completion done;
68
69 int query_id;
70 struct ib_sa_query *query;
71 68
72 unsigned long created; 69 unsigned long created;
73 unsigned long backoff; 70 unsigned long backoff;
@@ -299,18 +296,22 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
299 return 0; 296 return 0;
300} 297}
301 298
302static void 299static int
303ipoib_mcast_sendonly_join_complete(int status, 300ipoib_mcast_sendonly_join_complete(int status,
304 struct ib_sa_mcmember_rec *mcmember, 301 struct ib_sa_multicast *multicast)
305 void *mcast_ptr)
306{ 302{
307 struct ipoib_mcast *mcast = mcast_ptr; 303 struct ipoib_mcast *mcast = multicast->context;
308 struct net_device *dev = mcast->dev; 304 struct net_device *dev = mcast->dev;
309 struct ipoib_dev_priv *priv = netdev_priv(dev); 305 struct ipoib_dev_priv *priv = netdev_priv(dev);
310 306
307 /* We trap for port events ourselves. */
308 if (status == -ENETRESET)
309 return 0;
310
311 if (!status) 311 if (!status)
312 ipoib_mcast_join_finish(mcast, mcmember); 312 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
313 else { 313
314 if (status) {
314 if (mcast->logcount++ < 20) 315 if (mcast->logcount++ < 20)
315 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " 316 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
316 IPOIB_GID_FMT ", status %d\n", 317 IPOIB_GID_FMT ", status %d\n",
@@ -325,11 +326,10 @@ ipoib_mcast_sendonly_join_complete(int status,
325 spin_unlock_irq(&priv->tx_lock); 326 spin_unlock_irq(&priv->tx_lock);
326 327
327 /* Clear the busy flag so we try again */ 328 /* Clear the busy flag so we try again */
328 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 329 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
329 mcast->query = NULL; 330 &mcast->flags);
330 } 331 }
331 332 return status;
332 complete(&mcast->done);
333} 333}
334 334
335static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) 335static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
@@ -359,35 +359,33 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
359 rec.port_gid = priv->local_gid; 359 rec.port_gid = priv->local_gid;
360 rec.pkey = cpu_to_be16(priv->pkey); 360 rec.pkey = cpu_to_be16(priv->pkey);
361 361
362 init_completion(&mcast->done); 362 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
363 363 priv->port, &rec,
364 ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec, 364 IB_SA_MCMEMBER_REC_MGID |
365 IB_SA_MCMEMBER_REC_MGID | 365 IB_SA_MCMEMBER_REC_PORT_GID |
366 IB_SA_MCMEMBER_REC_PORT_GID | 366 IB_SA_MCMEMBER_REC_PKEY |
367 IB_SA_MCMEMBER_REC_PKEY | 367 IB_SA_MCMEMBER_REC_JOIN_STATE,
368 IB_SA_MCMEMBER_REC_JOIN_STATE, 368 GFP_ATOMIC,
369 1000, GFP_ATOMIC, 369 ipoib_mcast_sendonly_join_complete,
370 ipoib_mcast_sendonly_join_complete, 370 mcast);
371 mcast, &mcast->query); 371 if (IS_ERR(mcast->mc)) {
372 if (ret < 0) { 372 ret = PTR_ERR(mcast->mc);
373 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n", 373 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
374 ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
374 ret); 375 ret);
375 } else { 376 } else {
376 ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT 377 ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT
377 ", starting join\n", 378 ", starting join\n",
378 IPOIB_GID_ARG(mcast->mcmember.mgid)); 379 IPOIB_GID_ARG(mcast->mcmember.mgid));
379
380 mcast->query_id = ret;
381 } 380 }
382 381
383 return ret; 382 return ret;
384} 383}
385 384
386static void ipoib_mcast_join_complete(int status, 385static int ipoib_mcast_join_complete(int status,
387 struct ib_sa_mcmember_rec *mcmember, 386 struct ib_sa_multicast *multicast)
388 void *mcast_ptr)
389{ 387{
390 struct ipoib_mcast *mcast = mcast_ptr; 388 struct ipoib_mcast *mcast = multicast->context;
391 struct net_device *dev = mcast->dev; 389 struct net_device *dev = mcast->dev;
392 struct ipoib_dev_priv *priv = netdev_priv(dev); 390 struct ipoib_dev_priv *priv = netdev_priv(dev);
393 391
@@ -395,24 +393,25 @@ static void ipoib_mcast_join_complete(int status,
395 " (status %d)\n", 393 " (status %d)\n",
396 IPOIB_GID_ARG(mcast->mcmember.mgid), status); 394 IPOIB_GID_ARG(mcast->mcmember.mgid), status);
397 395
398 if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { 396 /* We trap for port events ourselves. */
397 if (status == -ENETRESET)
398 return 0;
399
400 if (!status)
401 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
402
403 if (!status) {
399 mcast->backoff = 1; 404 mcast->backoff = 1;
400 mutex_lock(&mcast_mutex); 405 mutex_lock(&mcast_mutex);
401 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 406 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
402 queue_delayed_work(ipoib_workqueue, 407 queue_delayed_work(ipoib_workqueue,
403 &priv->mcast_task, 0); 408 &priv->mcast_task, 0);
404 mutex_unlock(&mcast_mutex); 409 mutex_unlock(&mcast_mutex);
405 complete(&mcast->done); 410 return 0;
406 return;
407 }
408
409 if (status == -EINTR) {
410 complete(&mcast->done);
411 return;
412 } 411 }
413 412
414 if (status && mcast->logcount++ < 20) { 413 if (mcast->logcount++ < 20) {
415 if (status == -ETIMEDOUT || status == -EINTR) { 414 if (status == -ETIMEDOUT) {
416 ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT 415 ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT
417 ", status %d\n", 416 ", status %d\n",
418 IPOIB_GID_ARG(mcast->mcmember.mgid), 417 IPOIB_GID_ARG(mcast->mcmember.mgid),
@@ -429,24 +428,18 @@ static void ipoib_mcast_join_complete(int status,
429 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 428 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
430 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 429 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
431 430
432 mutex_lock(&mcast_mutex); 431 /* Clear the busy flag so we try again */
432 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
433 433
434 mutex_lock(&mcast_mutex);
434 spin_lock_irq(&priv->lock); 435 spin_lock_irq(&priv->lock);
435 mcast->query = NULL; 436 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
436 437 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
437 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { 438 mcast->backoff * HZ);
438 if (status == -ETIMEDOUT)
439 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
440 0);
441 else
442 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
443 mcast->backoff * HZ);
444 } else
445 complete(&mcast->done);
446 spin_unlock_irq(&priv->lock); 439 spin_unlock_irq(&priv->lock);
447 mutex_unlock(&mcast_mutex); 440 mutex_unlock(&mcast_mutex);
448 441
449 return; 442 return status;
450} 443}
451 444
452static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, 445static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
@@ -495,15 +488,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
495 rec.hop_limit = priv->broadcast->mcmember.hop_limit; 488 rec.hop_limit = priv->broadcast->mcmember.hop_limit;
496 } 489 }
497 490
498 init_completion(&mcast->done); 491 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
499 492 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
500 ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, 493 &rec, comp_mask, GFP_KERNEL,
501 &rec, comp_mask, mcast->backoff * 1000, 494 ipoib_mcast_join_complete, mcast);
502 GFP_ATOMIC, ipoib_mcast_join_complete, 495 if (IS_ERR(mcast->mc)) {
503 mcast, &mcast->query); 496 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
504 497 ret = PTR_ERR(mcast->mc);
505 if (ret < 0) { 498 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
506 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret);
507 499
508 mcast->backoff *= 2; 500 mcast->backoff *= 2;
509 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 501 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
@@ -515,8 +507,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
515 &priv->mcast_task, 507 &priv->mcast_task,
516 mcast->backoff * HZ); 508 mcast->backoff * HZ);
517 mutex_unlock(&mcast_mutex); 509 mutex_unlock(&mcast_mutex);
518 } else 510 }
519 mcast->query_id = ret;
520} 511}
521 512
522void ipoib_mcast_join_task(struct work_struct *work) 513void ipoib_mcast_join_task(struct work_struct *work)
@@ -541,7 +532,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
541 priv->local_rate = attr.active_speed * 532 priv->local_rate = attr.active_speed *
542 ib_width_enum_to_int(attr.active_width); 533 ib_width_enum_to_int(attr.active_width);
543 } else 534 } else
544 ipoib_warn(priv, "ib_query_port failed\n"); 535 ipoib_warn(priv, "ib_query_port failed\n");
545 } 536 }
546 537
547 if (!priv->broadcast) { 538 if (!priv->broadcast) {
@@ -568,7 +559,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
568 } 559 }
569 560
570 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 561 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
571 ipoib_mcast_join(dev, priv->broadcast, 0); 562 if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
563 ipoib_mcast_join(dev, priv->broadcast, 0);
572 return; 564 return;
573 } 565 }
574 566
@@ -625,26 +617,9 @@ int ipoib_mcast_start_thread(struct net_device *dev)
625 return 0; 617 return 0;
626} 618}
627 619
628static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
629 struct ipoib_mcast *mcast)
630{
631 spin_lock_irq(&priv->lock);
632 if (mcast && mcast->query) {
633 ib_sa_cancel_query(mcast->query_id, mcast->query);
634 mcast->query = NULL;
635 spin_unlock_irq(&priv->lock);
636 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
637 IPOIB_GID_ARG(mcast->mcmember.mgid));
638 wait_for_completion(&mcast->done);
639 }
640 else
641 spin_unlock_irq(&priv->lock);
642}
643
644int ipoib_mcast_stop_thread(struct net_device *dev, int flush) 620int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
645{ 621{
646 struct ipoib_dev_priv *priv = netdev_priv(dev); 622 struct ipoib_dev_priv *priv = netdev_priv(dev);
647 struct ipoib_mcast *mcast;
648 623
649 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 624 ipoib_dbg_mcast(priv, "stopping multicast thread\n");
650 625
@@ -660,52 +635,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
660 if (flush) 635 if (flush)
661 flush_workqueue(ipoib_workqueue); 636 flush_workqueue(ipoib_workqueue);
662 637
663 wait_for_mcast_join(priv, priv->broadcast);
664
665 list_for_each_entry(mcast, &priv->multicast_list, list)
666 wait_for_mcast_join(priv, mcast);
667
668 return 0; 638 return 0;
669} 639}
670 640
671static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) 641static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
672{ 642{
673 struct ipoib_dev_priv *priv = netdev_priv(dev); 643 struct ipoib_dev_priv *priv = netdev_priv(dev);
674 struct ib_sa_mcmember_rec rec = {
675 .join_state = 1
676 };
677 int ret = 0; 644 int ret = 0;
678 645
679 if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) 646 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
680 return 0; 647 ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
681 648 IPOIB_GID_ARG(mcast->mcmember.mgid));
682 ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
683 IPOIB_GID_ARG(mcast->mcmember.mgid));
684
685 rec.mgid = mcast->mcmember.mgid;
686 rec.port_gid = priv->local_gid;
687 rec.pkey = cpu_to_be16(priv->pkey);
688 649
689 /* Remove ourselves from the multicast group */ 650 /* Remove ourselves from the multicast group */
690 ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), 651 ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
691 &mcast->mcmember.mgid); 652 &mcast->mcmember.mgid);
692 if (ret) 653 if (ret)
693 ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); 654 ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
655 }
694 656
695 /* 657 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
696 * Just make one shot at leaving and don't wait for a reply; 658 ib_sa_free_multicast(mcast->mc);
697 * if we fail, too bad.
698 */
699 ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
700 IB_SA_MCMEMBER_REC_MGID |
701 IB_SA_MCMEMBER_REC_PORT_GID |
702 IB_SA_MCMEMBER_REC_PKEY |
703 IB_SA_MCMEMBER_REC_JOIN_STATE,
704 0, GFP_ATOMIC, NULL,
705 mcast, &mcast->query);
706 if (ret < 0)
707 ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed "
708 "for leave (result = %d)\n", ret);
709 659
710 return 0; 660 return 0;
711} 661}
@@ -758,7 +708,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
758 dev_kfree_skb_any(skb); 708 dev_kfree_skb_any(skb);
759 } 709 }
760 710
761 if (mcast->query) 711 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
762 ipoib_dbg_mcast(priv, "no address vector, " 712 ipoib_dbg_mcast(priv, "no address vector, "
763 "but multicast join already started\n"); 713 "but multicast join already started\n");
764 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 714 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -916,7 +866,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
916 866
917 /* We have to cancel outside of the spinlock */ 867 /* We have to cancel outside of the spinlock */
918 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 868 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
919 wait_for_mcast_join(priv, mcast);
920 ipoib_mcast_leave(mcast->dev, mcast); 869 ipoib_mcast_leave(mcast->dev, mcast);
921 ipoib_mcast_free(mcast); 870 ipoib_mcast_free(mcast);
922 } 871 }