aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
authorSean Hefty <sean.hefty@intel.com>2007-02-15 20:00:17 -0500
committerRoland Dreier <rolandd@cisco.com>2007-02-16 17:20:02 -0500
commitfaec2f7b96b555055d0aa6cc6b83a537270bed52 (patch)
tree0916cb780a1a5d5fe2ac98364917c79f25d57fcf /drivers/infiniband/ulp
parent8a2e65f87c66ab1e720f49378750cdd800f9e9cf (diff)
IB/sa: Track multicast join/leave requests
The IB SA tracks multicast join/leave requests on a per port basis and does not do any reference counting: if two users of the same port join the same group, and one leaves that group, then the SA will remove the port from the group even though there is one user who wants to stay a member left. Therefore, in order to support multiple users of the same multicast group from the same port, we need to perform reference counting locally. To do this, add an multicast submodule to ib_sa to perform reference counting of multicast join/leave operations. Modify ib_ipoib (the only in-kernel user of multicast) to use the new interface. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c195
1 files changed, 72 insertions, 123 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index fea737f520fd..b303ce6bc21e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -60,14 +60,11 @@ static DEFINE_MUTEX(mcast_mutex);
60/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ 60/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
61struct ipoib_mcast { 61struct ipoib_mcast {
62 struct ib_sa_mcmember_rec mcmember; 62 struct ib_sa_mcmember_rec mcmember;
63 struct ib_sa_multicast *mc;
63 struct ipoib_ah *ah; 64 struct ipoib_ah *ah;
64 65
65 struct rb_node rb_node; 66 struct rb_node rb_node;
66 struct list_head list; 67 struct list_head list;
67 struct completion done;
68
69 int query_id;
70 struct ib_sa_query *query;
71 68
72 unsigned long created; 69 unsigned long created;
73 unsigned long backoff; 70 unsigned long backoff;
@@ -299,18 +296,22 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
299 return 0; 296 return 0;
300} 297}
301 298
302static void 299static int
303ipoib_mcast_sendonly_join_complete(int status, 300ipoib_mcast_sendonly_join_complete(int status,
304 struct ib_sa_mcmember_rec *mcmember, 301 struct ib_sa_multicast *multicast)
305 void *mcast_ptr)
306{ 302{
307 struct ipoib_mcast *mcast = mcast_ptr; 303 struct ipoib_mcast *mcast = multicast->context;
308 struct net_device *dev = mcast->dev; 304 struct net_device *dev = mcast->dev;
309 struct ipoib_dev_priv *priv = netdev_priv(dev); 305 struct ipoib_dev_priv *priv = netdev_priv(dev);
310 306
307 /* We trap for port events ourselves. */
308 if (status == -ENETRESET)
309 return 0;
310
311 if (!status) 311 if (!status)
312 ipoib_mcast_join_finish(mcast, mcmember); 312 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
313 else { 313
314 if (status) {
314 if (mcast->logcount++ < 20) 315 if (mcast->logcount++ < 20)
315 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " 316 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
316 IPOIB_GID_FMT ", status %d\n", 317 IPOIB_GID_FMT ", status %d\n",
@@ -325,11 +326,10 @@ ipoib_mcast_sendonly_join_complete(int status,
325 spin_unlock_irq(&priv->tx_lock); 326 spin_unlock_irq(&priv->tx_lock);
326 327
327 /* Clear the busy flag so we try again */ 328 /* Clear the busy flag so we try again */
328 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 329 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
329 mcast->query = NULL; 330 &mcast->flags);
330 } 331 }
331 332 return status;
332 complete(&mcast->done);
333} 333}
334 334
335static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) 335static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
@@ -359,35 +359,33 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
359 rec.port_gid = priv->local_gid; 359 rec.port_gid = priv->local_gid;
360 rec.pkey = cpu_to_be16(priv->pkey); 360 rec.pkey = cpu_to_be16(priv->pkey);
361 361
362 init_completion(&mcast->done); 362 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
363 363 priv->port, &rec,
364 ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec, 364 IB_SA_MCMEMBER_REC_MGID |
365 IB_SA_MCMEMBER_REC_MGID | 365 IB_SA_MCMEMBER_REC_PORT_GID |
366 IB_SA_MCMEMBER_REC_PORT_GID | 366 IB_SA_MCMEMBER_REC_PKEY |
367 IB_SA_MCMEMBER_REC_PKEY | 367 IB_SA_MCMEMBER_REC_JOIN_STATE,
368 IB_SA_MCMEMBER_REC_JOIN_STATE, 368 GFP_ATOMIC,
369 1000, GFP_ATOMIC, 369 ipoib_mcast_sendonly_join_complete,
370 ipoib_mcast_sendonly_join_complete, 370 mcast);
371 mcast, &mcast->query); 371 if (IS_ERR(mcast->mc)) {
372 if (ret < 0) { 372 ret = PTR_ERR(mcast->mc);
373 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n", 373 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
374 ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
374 ret); 375 ret);
375 } else { 376 } else {
376 ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT 377 ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT
377 ", starting join\n", 378 ", starting join\n",
378 IPOIB_GID_ARG(mcast->mcmember.mgid)); 379 IPOIB_GID_ARG(mcast->mcmember.mgid));
379
380 mcast->query_id = ret;
381 } 380 }
382 381
383 return ret; 382 return ret;
384} 383}
385 384
386static void ipoib_mcast_join_complete(int status, 385static int ipoib_mcast_join_complete(int status,
387 struct ib_sa_mcmember_rec *mcmember, 386 struct ib_sa_multicast *multicast)
388 void *mcast_ptr)
389{ 387{
390 struct ipoib_mcast *mcast = mcast_ptr; 388 struct ipoib_mcast *mcast = multicast->context;
391 struct net_device *dev = mcast->dev; 389 struct net_device *dev = mcast->dev;
392 struct ipoib_dev_priv *priv = netdev_priv(dev); 390 struct ipoib_dev_priv *priv = netdev_priv(dev);
393 391
@@ -395,24 +393,25 @@ static void ipoib_mcast_join_complete(int status,
395 " (status %d)\n", 393 " (status %d)\n",
396 IPOIB_GID_ARG(mcast->mcmember.mgid), status); 394 IPOIB_GID_ARG(mcast->mcmember.mgid), status);
397 395
398 if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { 396 /* We trap for port events ourselves. */
397 if (status == -ENETRESET)
398 return 0;
399
400 if (!status)
401 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
402
403 if (!status) {
399 mcast->backoff = 1; 404 mcast->backoff = 1;
400 mutex_lock(&mcast_mutex); 405 mutex_lock(&mcast_mutex);
401 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 406 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
402 queue_delayed_work(ipoib_workqueue, 407 queue_delayed_work(ipoib_workqueue,
403 &priv->mcast_task, 0); 408 &priv->mcast_task, 0);
404 mutex_unlock(&mcast_mutex); 409 mutex_unlock(&mcast_mutex);
405 complete(&mcast->done); 410 return 0;
406 return;
407 }
408
409 if (status == -EINTR) {
410 complete(&mcast->done);
411 return;
412 } 411 }
413 412
414 if (status && mcast->logcount++ < 20) { 413 if (mcast->logcount++ < 20) {
415 if (status == -ETIMEDOUT || status == -EINTR) { 414 if (status == -ETIMEDOUT) {
416 ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT 415 ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT
417 ", status %d\n", 416 ", status %d\n",
418 IPOIB_GID_ARG(mcast->mcmember.mgid), 417 IPOIB_GID_ARG(mcast->mcmember.mgid),
@@ -429,24 +428,18 @@ static void ipoib_mcast_join_complete(int status,
429 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 428 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
430 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 429 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
431 430
432 mutex_lock(&mcast_mutex); 431 /* Clear the busy flag so we try again */
432 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
433 433
434 mutex_lock(&mcast_mutex);
434 spin_lock_irq(&priv->lock); 435 spin_lock_irq(&priv->lock);
435 mcast->query = NULL; 436 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
436 437 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
437 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { 438 mcast->backoff * HZ);
438 if (status == -ETIMEDOUT)
439 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
440 0);
441 else
442 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
443 mcast->backoff * HZ);
444 } else
445 complete(&mcast->done);
446 spin_unlock_irq(&priv->lock); 439 spin_unlock_irq(&priv->lock);
447 mutex_unlock(&mcast_mutex); 440 mutex_unlock(&mcast_mutex);
448 441
449 return; 442 return status;
450} 443}
451 444
452static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, 445static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
@@ -495,15 +488,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
495 rec.hop_limit = priv->broadcast->mcmember.hop_limit; 488 rec.hop_limit = priv->broadcast->mcmember.hop_limit;
496 } 489 }
497 490
498 init_completion(&mcast->done); 491 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
499 492 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
500 ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, 493 &rec, comp_mask, GFP_KERNEL,
501 &rec, comp_mask, mcast->backoff * 1000, 494 ipoib_mcast_join_complete, mcast);
502 GFP_ATOMIC, ipoib_mcast_join_complete, 495 if (IS_ERR(mcast->mc)) {
503 mcast, &mcast->query); 496 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
504 497 ret = PTR_ERR(mcast->mc);
505 if (ret < 0) { 498 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
506 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret);
507 499
508 mcast->backoff *= 2; 500 mcast->backoff *= 2;
509 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 501 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
@@ -515,8 +507,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
515 &priv->mcast_task, 507 &priv->mcast_task,
516 mcast->backoff * HZ); 508 mcast->backoff * HZ);
517 mutex_unlock(&mcast_mutex); 509 mutex_unlock(&mcast_mutex);
518 } else 510 }
519 mcast->query_id = ret;
520} 511}
521 512
522void ipoib_mcast_join_task(struct work_struct *work) 513void ipoib_mcast_join_task(struct work_struct *work)
@@ -541,7 +532,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
541 priv->local_rate = attr.active_speed * 532 priv->local_rate = attr.active_speed *
542 ib_width_enum_to_int(attr.active_width); 533 ib_width_enum_to_int(attr.active_width);
543 } else 534 } else
544 ipoib_warn(priv, "ib_query_port failed\n"); 535 ipoib_warn(priv, "ib_query_port failed\n");
545 } 536 }
546 537
547 if (!priv->broadcast) { 538 if (!priv->broadcast) {
@@ -568,7 +559,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
568 } 559 }
569 560
570 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 561 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
571 ipoib_mcast_join(dev, priv->broadcast, 0); 562 if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
563 ipoib_mcast_join(dev, priv->broadcast, 0);
572 return; 564 return;
573 } 565 }
574 566
@@ -625,26 +617,9 @@ int ipoib_mcast_start_thread(struct net_device *dev)
625 return 0; 617 return 0;
626} 618}
627 619
628static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
629 struct ipoib_mcast *mcast)
630{
631 spin_lock_irq(&priv->lock);
632 if (mcast && mcast->query) {
633 ib_sa_cancel_query(mcast->query_id, mcast->query);
634 mcast->query = NULL;
635 spin_unlock_irq(&priv->lock);
636 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
637 IPOIB_GID_ARG(mcast->mcmember.mgid));
638 wait_for_completion(&mcast->done);
639 }
640 else
641 spin_unlock_irq(&priv->lock);
642}
643
644int ipoib_mcast_stop_thread(struct net_device *dev, int flush) 620int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
645{ 621{
646 struct ipoib_dev_priv *priv = netdev_priv(dev); 622 struct ipoib_dev_priv *priv = netdev_priv(dev);
647 struct ipoib_mcast *mcast;
648 623
649 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 624 ipoib_dbg_mcast(priv, "stopping multicast thread\n");
650 625
@@ -660,52 +635,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
660 if (flush) 635 if (flush)
661 flush_workqueue(ipoib_workqueue); 636 flush_workqueue(ipoib_workqueue);
662 637
663 wait_for_mcast_join(priv, priv->broadcast);
664
665 list_for_each_entry(mcast, &priv->multicast_list, list)
666 wait_for_mcast_join(priv, mcast);
667
668 return 0; 638 return 0;
669} 639}
670 640
671static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) 641static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
672{ 642{
673 struct ipoib_dev_priv *priv = netdev_priv(dev); 643 struct ipoib_dev_priv *priv = netdev_priv(dev);
674 struct ib_sa_mcmember_rec rec = {
675 .join_state = 1
676 };
677 int ret = 0; 644 int ret = 0;
678 645
679 if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) 646 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
680 return 0; 647 ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
681 648 IPOIB_GID_ARG(mcast->mcmember.mgid));
682 ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
683 IPOIB_GID_ARG(mcast->mcmember.mgid));
684
685 rec.mgid = mcast->mcmember.mgid;
686 rec.port_gid = priv->local_gid;
687 rec.pkey = cpu_to_be16(priv->pkey);
688 649
689 /* Remove ourselves from the multicast group */ 650 /* Remove ourselves from the multicast group */
690 ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), 651 ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
691 &mcast->mcmember.mgid); 652 &mcast->mcmember.mgid);
692 if (ret) 653 if (ret)
693 ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); 654 ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
655 }
694 656
695 /* 657 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
696 * Just make one shot at leaving and don't wait for a reply; 658 ib_sa_free_multicast(mcast->mc);
697 * if we fail, too bad.
698 */
699 ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
700 IB_SA_MCMEMBER_REC_MGID |
701 IB_SA_MCMEMBER_REC_PORT_GID |
702 IB_SA_MCMEMBER_REC_PKEY |
703 IB_SA_MCMEMBER_REC_JOIN_STATE,
704 0, GFP_ATOMIC, NULL,
705 mcast, &mcast->query);
706 if (ret < 0)
707 ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed "
708 "for leave (result = %d)\n", ret);
709 659
710 return 0; 660 return 0;
711} 661}
@@ -758,7 +708,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
758 dev_kfree_skb_any(skb); 708 dev_kfree_skb_any(skb);
759 } 709 }
760 710
761 if (mcast->query) 711 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
762 ipoib_dbg_mcast(priv, "no address vector, " 712 ipoib_dbg_mcast(priv, "no address vector, "
763 "but multicast join already started\n"); 713 "but multicast join already started\n");
764 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 714 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -916,7 +866,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
916 866
917 /* We have to cancel outside of the spinlock */ 867 /* We have to cancel outside of the spinlock */
918 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 868 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
919 wait_for_mcast_join(priv, mcast);
920 ipoib_mcast_leave(mcast->dev, mcast); 869 ipoib_mcast_leave(mcast->dev, mcast);
921 ipoib_mcast_free(mcast); 870 ipoib_mcast_free(mcast);
922 } 871 }