aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c250
1 files changed, 82 insertions, 168 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 277e7ac7c4db..c670d9c2cda7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -307,111 +307,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
307 return 0; 307 return 0;
308} 308}
309 309
310static int
311ipoib_mcast_sendonly_join_complete(int status,
312 struct ib_sa_multicast *multicast)
313{
314 struct ipoib_mcast *mcast = multicast->context;
315 struct net_device *dev = mcast->dev;
316 struct ipoib_dev_priv *priv = netdev_priv(dev);
317
318 /*
319 * We have to take the mutex to force mcast_sendonly_join to
320 * return from ib_sa_multicast_join and set mcast->mc to a
321 * valid value. Otherwise we were racing with ourselves in
322 * that we might fail here, but get a valid return from
323 * ib_sa_multicast_join after we had cleared mcast->mc here,
324 * resulting in mis-matched joins and leaves and a deadlock
325 */
326 mutex_lock(&mcast_mutex);
327
328 /* We trap for port events ourselves. */
329 if (status == -ENETRESET) {
330 status = 0;
331 goto out;
332 }
333
334 if (!status)
335 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
336
337 if (status) {
338 if (mcast->logcount++ < 20)
339 ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast "
340 "join failed for %pI6, status %d\n",
341 mcast->mcmember.mgid.raw, status);
342
343 /* Flush out any queued packets */
344 netif_tx_lock_bh(dev);
345 while (!skb_queue_empty(&mcast->pkt_queue)) {
346 ++dev->stats.tx_dropped;
347 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
348 }
349 netif_tx_unlock_bh(dev);
350 __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
351 } else {
352 mcast->backoff = 1;
353 mcast->delay_until = jiffies;
354 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
355 }
356out:
357 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
358 if (status)
359 mcast->mc = NULL;
360 complete(&mcast->done);
361 mutex_unlock(&mcast_mutex);
362 return status;
363}
364
365static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
366{
367 struct net_device *dev = mcast->dev;
368 struct ipoib_dev_priv *priv = netdev_priv(dev);
369 struct ib_sa_mcmember_rec rec = {
370#if 0 /* Some SMs don't support send-only yet */
371 .join_state = 4
372#else
373 .join_state = 1
374#endif
375 };
376 int ret = 0;
377
378 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
379 ipoib_dbg_mcast(priv, "device shutting down, no sendonly "
380 "multicast joins\n");
381 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
382 complete(&mcast->done);
383 return -ENODEV;
384 }
385
386 rec.mgid = mcast->mcmember.mgid;
387 rec.port_gid = priv->local_gid;
388 rec.pkey = cpu_to_be16(priv->pkey);
389
390 mutex_lock(&mcast_mutex);
391 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
392 priv->port, &rec,
393 IB_SA_MCMEMBER_REC_MGID |
394 IB_SA_MCMEMBER_REC_PORT_GID |
395 IB_SA_MCMEMBER_REC_PKEY |
396 IB_SA_MCMEMBER_REC_JOIN_STATE,
397 GFP_ATOMIC,
398 ipoib_mcast_sendonly_join_complete,
399 mcast);
400 if (IS_ERR(mcast->mc)) {
401 ret = PTR_ERR(mcast->mc);
402 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
403 ipoib_warn(priv, "ib_sa_join_multicast for sendonly join "
404 "failed (ret = %d)\n", ret);
405 complete(&mcast->done);
406 } else {
407 ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting "
408 "sendonly join\n", mcast->mcmember.mgid.raw);
409 }
410 mutex_unlock(&mcast_mutex);
411
412 return ret;
413}
414
415void ipoib_mcast_carrier_on_task(struct work_struct *work) 310void ipoib_mcast_carrier_on_task(struct work_struct *work)
416{ 311{
417 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 312 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
@@ -452,7 +347,9 @@ static int ipoib_mcast_join_complete(int status,
452 struct net_device *dev = mcast->dev; 347 struct net_device *dev = mcast->dev;
453 struct ipoib_dev_priv *priv = netdev_priv(dev); 348 struct ipoib_dev_priv *priv = netdev_priv(dev);
454 349
455 ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", 350 ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
351 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
352 "sendonly " : "",
456 mcast->mcmember.mgid.raw, status); 353 mcast->mcmember.mgid.raw, status);
457 354
458 /* 355 /*
@@ -477,27 +374,52 @@ static int ipoib_mcast_join_complete(int status,
477 if (!status) { 374 if (!status) {
478 mcast->backoff = 1; 375 mcast->backoff = 1;
479 mcast->delay_until = jiffies; 376 mcast->delay_until = jiffies;
480 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
481 377
482 /* 378 /*
483 * Defer carrier on work to priv->wq to avoid a 379 * Defer carrier on work to priv->wq to avoid a
484 * deadlock on rtnl_lock here. 380 * deadlock on rtnl_lock here. Requeue our multicast
381 * work too, which will end up happening right after
382 * our carrier on task work and will allow us to
383 * send out all of the non-broadcast joins
485 */ 384 */
486 if (mcast == priv->broadcast) 385 if (mcast == priv->broadcast) {
487 queue_work(priv->wq, &priv->carrier_on_task); 386 queue_work(priv->wq, &priv->carrier_on_task);
387 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
388 }
488 } else { 389 } else {
489 if (mcast->logcount++ < 20) { 390 if (mcast->logcount++ < 20) {
490 if (status == -ETIMEDOUT || status == -EAGAIN) { 391 if (status == -ETIMEDOUT || status == -EAGAIN) {
491 ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", 392 ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
393 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
492 mcast->mcmember.mgid.raw, status); 394 mcast->mcmember.mgid.raw, status);
493 } else { 395 } else {
494 ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", 396 ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
397 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
495 mcast->mcmember.mgid.raw, status); 398 mcast->mcmember.mgid.raw, status);
496 } 399 }
497 } 400 }
498 401
499 /* Requeue this join task with a backoff delay */ 402 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
500 __ipoib_mcast_schedule_join_thread(priv, mcast, 1); 403 mcast->backoff >= 2) {
404 /*
405 * We only retry sendonly joins once before we drop
406 * the packet and quit trying to deal with the
407 * group. However, we leave the group in the
408 * mcast list as an unjoined group. If we want to
409 * try joining again, we simply queue up a packet
410 * and restart the join thread. The empty queue
411 * is why the join thread ignores this group.
412 */
413 mcast->backoff = 1;
414 netif_tx_lock_bh(dev);
415 while (!skb_queue_empty(&mcast->pkt_queue)) {
416 ++dev->stats.tx_dropped;
417 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
418 }
419 netif_tx_unlock_bh(dev);
420 } else
421 /* Requeue this join task with a backoff delay */
422 __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
501 } 423 }
502out: 424out:
503 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 425 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
@@ -650,45 +572,45 @@ void ipoib_mcast_join_task(struct work_struct *work)
650 list_for_each_entry(mcast, &priv->multicast_list, list) { 572 list_for_each_entry(mcast, &priv->multicast_list, list) {
651 if (IS_ERR_OR_NULL(mcast->mc) && 573 if (IS_ERR_OR_NULL(mcast->mc) &&
652 !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && 574 !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
653 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 575 (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
576 !skb_queue_empty(&mcast->pkt_queue))) {
654 if (mcast->backoff == 1 || 577 if (mcast->backoff == 1 ||
655 time_after_eq(jiffies, mcast->delay_until)) 578 time_after_eq(jiffies, mcast->delay_until)) {
656 /* Found the next unjoined group */ 579 /* Found the next unjoined group */
657 break; 580 init_completion(&mcast->done);
658 else if (!delay_until || 581 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
582 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
583 create = 0;
584 else
585 create = 1;
586 spin_unlock_irq(&priv->lock);
587 mutex_unlock(&mcast_mutex);
588 ipoib_mcast_join(dev, mcast, create);
589 mutex_lock(&mcast_mutex);
590 spin_lock_irq(&priv->lock);
591 } else if (!delay_until ||
659 time_before(mcast->delay_until, delay_until)) 592 time_before(mcast->delay_until, delay_until))
660 delay_until = mcast->delay_until; 593 delay_until = mcast->delay_until;
661 } 594 }
662 } 595 }
663 596
664 if (&mcast->list == &priv->multicast_list) { 597 mcast = NULL;
665 /* 598 ipoib_dbg_mcast(priv, "successfully started all multicast joins\n");
666 * All done, unless we have delayed work from
667 * backoff retransmissions, but we will get
668 * restarted when the time is right, so we are
669 * done for now
670 */
671 mcast = NULL;
672 ipoib_dbg_mcast(priv, "successfully joined all "
673 "multicast groups\n");
674 }
675 599
676out: 600out:
601 if (delay_until) {
602 cancel_delayed_work(&priv->mcast_task);
603 queue_delayed_work(priv->wq, &priv->mcast_task,
604 delay_until - jiffies);
605 }
677 if (mcast) { 606 if (mcast) {
678 init_completion(&mcast->done); 607 init_completion(&mcast->done);
679 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 608 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
680 } 609 }
681 spin_unlock_irq(&priv->lock); 610 spin_unlock_irq(&priv->lock);
682 mutex_unlock(&mcast_mutex); 611 mutex_unlock(&mcast_mutex);
683 if (mcast) { 612 if (mcast)
684 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 613 ipoib_mcast_join(dev, mcast, create);
685 ipoib_mcast_sendonly_join(mcast);
686 else
687 ipoib_mcast_join(dev, mcast, create);
688 }
689 if (delay_until)
690 queue_delayed_work(priv->wq, &priv->mcast_task,
691 delay_until - jiffies);
692} 614}
693 615
694int ipoib_mcast_start_thread(struct net_device *dev) 616int ipoib_mcast_start_thread(struct net_device *dev)
@@ -731,8 +653,6 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
731 653
732 if (!IS_ERR_OR_NULL(mcast->mc)) 654 if (!IS_ERR_OR_NULL(mcast->mc))
733 ib_sa_free_multicast(mcast->mc); 655 ib_sa_free_multicast(mcast->mc);
734 else
735 ipoib_dbg(priv, "ipoib_mcast_leave with mcast->mc invalid\n");
736 656
737 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 657 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
738 ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", 658 ipoib_dbg_mcast(priv, "leaving MGID %pI6\n",
@@ -768,43 +688,37 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
768 } 688 }
769 689
770 mcast = __ipoib_mcast_find(dev, mgid); 690 mcast = __ipoib_mcast_find(dev, mgid);
771 if (!mcast) { 691 if (!mcast || !mcast->ah) {
772 /* Let's create a new send only group now */
773 ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
774 mgid);
775
776 mcast = ipoib_mcast_alloc(dev, 0);
777 if (!mcast) { 692 if (!mcast) {
778 ipoib_warn(priv, "unable to allocate memory for " 693 /* Let's create a new send only group now */
779 "multicast structure\n"); 694 ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
780 ++dev->stats.tx_dropped; 695 mgid);
781 dev_kfree_skb_any(skb); 696
782 goto out; 697 mcast = ipoib_mcast_alloc(dev, 0);
783 } 698 if (!mcast) {
784 699 ipoib_warn(priv, "unable to allocate memory "
785 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); 700 "for multicast structure\n");
786 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); 701 ++dev->stats.tx_dropped;
787 __ipoib_mcast_add(dev, mcast); 702 dev_kfree_skb_any(skb);
788 list_add_tail(&mcast->list, &priv->multicast_list); 703 goto unlock;
789 __ipoib_mcast_schedule_join_thread(priv, NULL, 0); 704 }
790 }
791 705
792 if (!mcast->ah) { 706 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
707 memcpy(mcast->mcmember.mgid.raw, mgid,
708 sizeof (union ib_gid));
709 __ipoib_mcast_add(dev, mcast);
710 list_add_tail(&mcast->list, &priv->multicast_list);
711 }
793 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) 712 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
794 skb_queue_tail(&mcast->pkt_queue, skb); 713 skb_queue_tail(&mcast->pkt_queue, skb);
795 else { 714 else {
796 ++dev->stats.tx_dropped; 715 ++dev->stats.tx_dropped;
797 dev_kfree_skb_any(skb); 716 dev_kfree_skb_any(skb);
798 } 717 }
799 /* 718 if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
800 * If lookup completes between here and out:, don't 719 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
801 * want to send packet twice. 720 }
802 */ 721 } else {
803 mcast = NULL;
804 }
805
806out:
807 if (mcast && mcast->ah) {
808 struct ipoib_neigh *neigh; 722 struct ipoib_neigh *neigh;
809 723
810 spin_unlock_irqrestore(&priv->lock, flags); 724 spin_unlock_irqrestore(&priv->lock, flags);