aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDoug Ledford <dledford@redhat.com>2015-02-21 19:27:06 -0500
committerDoug Ledford <dledford@redhat.com>2015-04-15 16:06:18 -0400
commitd2fe937ce6ce23daf5fb214e45432dbb631581b7 (patch)
treec09ebad9541189444910a8e7e9e8e51b8a43c26b
parent69911416d87d6673c48d23a9fbc060e85f41fc73 (diff)
IB/ipoib: deserialize multicast joins
Allow the ipoib layer to attempt to join all outstanding multicast groups at once. The ib_sa layer will serialize multiple attempts to join the same group, but will process attempts to join different groups in parallel. Take advantage of that. In order to make this happen, change the mcast_join_thread to loop through all needed joins, sending a join request for each one that we still need to join. There are a few special cases we handle though: 1) Don't attempt to join anything but the broadcast group until the join of the broadcast group has succeeded. 2) No longer restart the join task at the end of completion handling. If we completed successfully, we are done. The join task now needs kicked either by mcast_send or mcast_restart_task or mcast_start_thread, but should not need started anytime else except when scheduling a backoff attempt to rejoin. 3) No longer use separate join/completion routines for regular and sendonly joins, pass them all through the same routine and just do the right thing based on the SENDONLY join flag. 4) Only try to join a SENDONLY join twice, then drop the packets and quit trying. We leave the mcast group in the list so that if we get a new packet, all that we have to do is queue up the packet and restart the join task and it will automatically try to join twice and then either send or flush the queue again. Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c250
1 files changed, 82 insertions, 168 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 277e7ac7c4db..c670d9c2cda7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -307,111 +307,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
307 return 0; 307 return 0;
308} 308}
309 309
310static int
311ipoib_mcast_sendonly_join_complete(int status,
312 struct ib_sa_multicast *multicast)
313{
314 struct ipoib_mcast *mcast = multicast->context;
315 struct net_device *dev = mcast->dev;
316 struct ipoib_dev_priv *priv = netdev_priv(dev);
317
318 /*
319 * We have to take the mutex to force mcast_sendonly_join to
320 * return from ib_sa_multicast_join and set mcast->mc to a
321 * valid value. Otherwise we were racing with ourselves in
322 * that we might fail here, but get a valid return from
323 * ib_sa_multicast_join after we had cleared mcast->mc here,
324 * resulting in mis-matched joins and leaves and a deadlock
325 */
326 mutex_lock(&mcast_mutex);
327
328 /* We trap for port events ourselves. */
329 if (status == -ENETRESET) {
330 status = 0;
331 goto out;
332 }
333
334 if (!status)
335 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
336
337 if (status) {
338 if (mcast->logcount++ < 20)
339 ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast "
340 "join failed for %pI6, status %d\n",
341 mcast->mcmember.mgid.raw, status);
342
343 /* Flush out any queued packets */
344 netif_tx_lock_bh(dev);
345 while (!skb_queue_empty(&mcast->pkt_queue)) {
346 ++dev->stats.tx_dropped;
347 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
348 }
349 netif_tx_unlock_bh(dev);
350 __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
351 } else {
352 mcast->backoff = 1;
353 mcast->delay_until = jiffies;
354 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
355 }
356out:
357 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
358 if (status)
359 mcast->mc = NULL;
360 complete(&mcast->done);
361 mutex_unlock(&mcast_mutex);
362 return status;
363}
364
365static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
366{
367 struct net_device *dev = mcast->dev;
368 struct ipoib_dev_priv *priv = netdev_priv(dev);
369 struct ib_sa_mcmember_rec rec = {
370#if 0 /* Some SMs don't support send-only yet */
371 .join_state = 4
372#else
373 .join_state = 1
374#endif
375 };
376 int ret = 0;
377
378 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
379 ipoib_dbg_mcast(priv, "device shutting down, no sendonly "
380 "multicast joins\n");
381 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
382 complete(&mcast->done);
383 return -ENODEV;
384 }
385
386 rec.mgid = mcast->mcmember.mgid;
387 rec.port_gid = priv->local_gid;
388 rec.pkey = cpu_to_be16(priv->pkey);
389
390 mutex_lock(&mcast_mutex);
391 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
392 priv->port, &rec,
393 IB_SA_MCMEMBER_REC_MGID |
394 IB_SA_MCMEMBER_REC_PORT_GID |
395 IB_SA_MCMEMBER_REC_PKEY |
396 IB_SA_MCMEMBER_REC_JOIN_STATE,
397 GFP_ATOMIC,
398 ipoib_mcast_sendonly_join_complete,
399 mcast);
400 if (IS_ERR(mcast->mc)) {
401 ret = PTR_ERR(mcast->mc);
402 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
403 ipoib_warn(priv, "ib_sa_join_multicast for sendonly join "
404 "failed (ret = %d)\n", ret);
405 complete(&mcast->done);
406 } else {
407 ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting "
408 "sendonly join\n", mcast->mcmember.mgid.raw);
409 }
410 mutex_unlock(&mcast_mutex);
411
412 return ret;
413}
414
415void ipoib_mcast_carrier_on_task(struct work_struct *work) 310void ipoib_mcast_carrier_on_task(struct work_struct *work)
416{ 311{
417 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 312 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
@@ -452,7 +347,9 @@ static int ipoib_mcast_join_complete(int status,
452 struct net_device *dev = mcast->dev; 347 struct net_device *dev = mcast->dev;
453 struct ipoib_dev_priv *priv = netdev_priv(dev); 348 struct ipoib_dev_priv *priv = netdev_priv(dev);
454 349
455 ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", 350 ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
351 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
352 "sendonly " : "",
456 mcast->mcmember.mgid.raw, status); 353 mcast->mcmember.mgid.raw, status);
457 354
458 /* 355 /*
@@ -477,27 +374,52 @@ static int ipoib_mcast_join_complete(int status,
477 if (!status) { 374 if (!status) {
478 mcast->backoff = 1; 375 mcast->backoff = 1;
479 mcast->delay_until = jiffies; 376 mcast->delay_until = jiffies;
480 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
481 377
482 /* 378 /*
483 * Defer carrier on work to priv->wq to avoid a 379 * Defer carrier on work to priv->wq to avoid a
484 * deadlock on rtnl_lock here. 380 * deadlock on rtnl_lock here. Requeue our multicast
381 * work too, which will end up happening right after
382 * our carrier on task work and will allow us to
383 * send out all of the non-broadcast joins
485 */ 384 */
486 if (mcast == priv->broadcast) 385 if (mcast == priv->broadcast) {
487 queue_work(priv->wq, &priv->carrier_on_task); 386 queue_work(priv->wq, &priv->carrier_on_task);
387 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
388 }
488 } else { 389 } else {
489 if (mcast->logcount++ < 20) { 390 if (mcast->logcount++ < 20) {
490 if (status == -ETIMEDOUT || status == -EAGAIN) { 391 if (status == -ETIMEDOUT || status == -EAGAIN) {
491 ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", 392 ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
393 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
492 mcast->mcmember.mgid.raw, status); 394 mcast->mcmember.mgid.raw, status);
493 } else { 395 } else {
494 ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", 396 ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
397 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
495 mcast->mcmember.mgid.raw, status); 398 mcast->mcmember.mgid.raw, status);
496 } 399 }
497 } 400 }
498 401
499 /* Requeue this join task with a backoff delay */ 402 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
500 __ipoib_mcast_schedule_join_thread(priv, mcast, 1); 403 mcast->backoff >= 2) {
404 /*
405 * We only retry sendonly joins once before we drop
406 * the packet and quit trying to deal with the
407 * group. However, we leave the group in the
408 * mcast list as an unjoined group. If we want to
409 * try joining again, we simply queue up a packet
410 * and restart the join thread. The empty queue
411 * is why the join thread ignores this group.
412 */
413 mcast->backoff = 1;
414 netif_tx_lock_bh(dev);
415 while (!skb_queue_empty(&mcast->pkt_queue)) {
416 ++dev->stats.tx_dropped;
417 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
418 }
419 netif_tx_unlock_bh(dev);
420 } else
421 /* Requeue this join task with a backoff delay */
422 __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
501 } 423 }
502out: 424out:
503 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 425 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
@@ -650,45 +572,45 @@ void ipoib_mcast_join_task(struct work_struct *work)
650 list_for_each_entry(mcast, &priv->multicast_list, list) { 572 list_for_each_entry(mcast, &priv->multicast_list, list) {
651 if (IS_ERR_OR_NULL(mcast->mc) && 573 if (IS_ERR_OR_NULL(mcast->mc) &&
652 !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && 574 !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
653 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 575 (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
576 !skb_queue_empty(&mcast->pkt_queue))) {
654 if (mcast->backoff == 1 || 577 if (mcast->backoff == 1 ||
655 time_after_eq(jiffies, mcast->delay_until)) 578 time_after_eq(jiffies, mcast->delay_until)) {
656 /* Found the next unjoined group */ 579 /* Found the next unjoined group */
657 break; 580 init_completion(&mcast->done);
658 else if (!delay_until || 581 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
582 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
583 create = 0;
584 else
585 create = 1;
586 spin_unlock_irq(&priv->lock);
587 mutex_unlock(&mcast_mutex);
588 ipoib_mcast_join(dev, mcast, create);
589 mutex_lock(&mcast_mutex);
590 spin_lock_irq(&priv->lock);
591 } else if (!delay_until ||
659 time_before(mcast->delay_until, delay_until)) 592 time_before(mcast->delay_until, delay_until))
660 delay_until = mcast->delay_until; 593 delay_until = mcast->delay_until;
661 } 594 }
662 } 595 }
663 596
664 if (&mcast->list == &priv->multicast_list) { 597 mcast = NULL;
665 /* 598 ipoib_dbg_mcast(priv, "successfully started all multicast joins\n");
666 * All done, unless we have delayed work from
667 * backoff retransmissions, but we will get
668 * restarted when the time is right, so we are
669 * done for now
670 */
671 mcast = NULL;
672 ipoib_dbg_mcast(priv, "successfully joined all "
673 "multicast groups\n");
674 }
675 599
676out: 600out:
601 if (delay_until) {
602 cancel_delayed_work(&priv->mcast_task);
603 queue_delayed_work(priv->wq, &priv->mcast_task,
604 delay_until - jiffies);
605 }
677 if (mcast) { 606 if (mcast) {
678 init_completion(&mcast->done); 607 init_completion(&mcast->done);
679 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 608 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
680 } 609 }
681 spin_unlock_irq(&priv->lock); 610 spin_unlock_irq(&priv->lock);
682 mutex_unlock(&mcast_mutex); 611 mutex_unlock(&mcast_mutex);
683 if (mcast) { 612 if (mcast)
684 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 613 ipoib_mcast_join(dev, mcast, create);
685 ipoib_mcast_sendonly_join(mcast);
686 else
687 ipoib_mcast_join(dev, mcast, create);
688 }
689 if (delay_until)
690 queue_delayed_work(priv->wq, &priv->mcast_task,
691 delay_until - jiffies);
692} 614}
693 615
694int ipoib_mcast_start_thread(struct net_device *dev) 616int ipoib_mcast_start_thread(struct net_device *dev)
@@ -731,8 +653,6 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
731 653
732 if (!IS_ERR_OR_NULL(mcast->mc)) 654 if (!IS_ERR_OR_NULL(mcast->mc))
733 ib_sa_free_multicast(mcast->mc); 655 ib_sa_free_multicast(mcast->mc);
734 else
735 ipoib_dbg(priv, "ipoib_mcast_leave with mcast->mc invalid\n");
736 656
737 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 657 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
738 ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", 658 ipoib_dbg_mcast(priv, "leaving MGID %pI6\n",
@@ -768,43 +688,37 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
768 } 688 }
769 689
770 mcast = __ipoib_mcast_find(dev, mgid); 690 mcast = __ipoib_mcast_find(dev, mgid);
771 if (!mcast) { 691 if (!mcast || !mcast->ah) {
772 /* Let's create a new send only group now */
773 ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
774 mgid);
775
776 mcast = ipoib_mcast_alloc(dev, 0);
777 if (!mcast) { 692 if (!mcast) {
778 ipoib_warn(priv, "unable to allocate memory for " 693 /* Let's create a new send only group now */
779 "multicast structure\n"); 694 ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
780 ++dev->stats.tx_dropped; 695 mgid);
781 dev_kfree_skb_any(skb); 696
782 goto out; 697 mcast = ipoib_mcast_alloc(dev, 0);
783 } 698 if (!mcast) {
784 699 ipoib_warn(priv, "unable to allocate memory "
785 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); 700 "for multicast structure\n");
786 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); 701 ++dev->stats.tx_dropped;
787 __ipoib_mcast_add(dev, mcast); 702 dev_kfree_skb_any(skb);
788 list_add_tail(&mcast->list, &priv->multicast_list); 703 goto unlock;
789 __ipoib_mcast_schedule_join_thread(priv, NULL, 0); 704 }
790 }
791 705
792 if (!mcast->ah) { 706 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
707 memcpy(mcast->mcmember.mgid.raw, mgid,
708 sizeof (union ib_gid));
709 __ipoib_mcast_add(dev, mcast);
710 list_add_tail(&mcast->list, &priv->multicast_list);
711 }
793 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) 712 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
794 skb_queue_tail(&mcast->pkt_queue, skb); 713 skb_queue_tail(&mcast->pkt_queue, skb);
795 else { 714 else {
796 ++dev->stats.tx_dropped; 715 ++dev->stats.tx_dropped;
797 dev_kfree_skb_any(skb); 716 dev_kfree_skb_any(skb);
798 } 717 }
799 /* 718 if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
800 * If lookup completes between here and out:, don't 719 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
801 * want to send packet twice. 720 }
802 */ 721 } else {
803 mcast = NULL;
804 }
805
806out:
807 if (mcast && mcast->ah) {
808 struct ipoib_neigh *neigh; 722 struct ipoib_neigh *neigh;
809 723
810 spin_unlock_irqrestore(&priv->lock, flags); 724 spin_unlock_irqrestore(&priv->lock, flags);