diff options
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 250 |
1 files changed, 82 insertions, 168 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 277e7ac7c4db..c670d9c2cda7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |||
@@ -307,111 +307,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, | |||
307 | return 0; | 307 | return 0; |
308 | } | 308 | } |
309 | 309 | ||
310 | static int | ||
311 | ipoib_mcast_sendonly_join_complete(int status, | ||
312 | struct ib_sa_multicast *multicast) | ||
313 | { | ||
314 | struct ipoib_mcast *mcast = multicast->context; | ||
315 | struct net_device *dev = mcast->dev; | ||
316 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
317 | |||
318 | /* | ||
319 | * We have to take the mutex to force mcast_sendonly_join to | ||
320 | * return from ib_sa_multicast_join and set mcast->mc to a | ||
321 | * valid value. Otherwise we were racing with ourselves in | ||
322 | * that we might fail here, but get a valid return from | ||
323 | * ib_sa_multicast_join after we had cleared mcast->mc here, | ||
324 | * resulting in mis-matched joins and leaves and a deadlock | ||
325 | */ | ||
326 | mutex_lock(&mcast_mutex); | ||
327 | |||
328 | /* We trap for port events ourselves. */ | ||
329 | if (status == -ENETRESET) { | ||
330 | status = 0; | ||
331 | goto out; | ||
332 | } | ||
333 | |||
334 | if (!status) | ||
335 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); | ||
336 | |||
337 | if (status) { | ||
338 | if (mcast->logcount++ < 20) | ||
339 | ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast " | ||
340 | "join failed for %pI6, status %d\n", | ||
341 | mcast->mcmember.mgid.raw, status); | ||
342 | |||
343 | /* Flush out any queued packets */ | ||
344 | netif_tx_lock_bh(dev); | ||
345 | while (!skb_queue_empty(&mcast->pkt_queue)) { | ||
346 | ++dev->stats.tx_dropped; | ||
347 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); | ||
348 | } | ||
349 | netif_tx_unlock_bh(dev); | ||
350 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); | ||
351 | } else { | ||
352 | mcast->backoff = 1; | ||
353 | mcast->delay_until = jiffies; | ||
354 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); | ||
355 | } | ||
356 | out: | ||
357 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | ||
358 | if (status) | ||
359 | mcast->mc = NULL; | ||
360 | complete(&mcast->done); | ||
361 | mutex_unlock(&mcast_mutex); | ||
362 | return status; | ||
363 | } | ||
364 | |||
365 | static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) | ||
366 | { | ||
367 | struct net_device *dev = mcast->dev; | ||
368 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
369 | struct ib_sa_mcmember_rec rec = { | ||
370 | #if 0 /* Some SMs don't support send-only yet */ | ||
371 | .join_state = 4 | ||
372 | #else | ||
373 | .join_state = 1 | ||
374 | #endif | ||
375 | }; | ||
376 | int ret = 0; | ||
377 | |||
378 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { | ||
379 | ipoib_dbg_mcast(priv, "device shutting down, no sendonly " | ||
380 | "multicast joins\n"); | ||
381 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | ||
382 | complete(&mcast->done); | ||
383 | return -ENODEV; | ||
384 | } | ||
385 | |||
386 | rec.mgid = mcast->mcmember.mgid; | ||
387 | rec.port_gid = priv->local_gid; | ||
388 | rec.pkey = cpu_to_be16(priv->pkey); | ||
389 | |||
390 | mutex_lock(&mcast_mutex); | ||
391 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, | ||
392 | priv->port, &rec, | ||
393 | IB_SA_MCMEMBER_REC_MGID | | ||
394 | IB_SA_MCMEMBER_REC_PORT_GID | | ||
395 | IB_SA_MCMEMBER_REC_PKEY | | ||
396 | IB_SA_MCMEMBER_REC_JOIN_STATE, | ||
397 | GFP_ATOMIC, | ||
398 | ipoib_mcast_sendonly_join_complete, | ||
399 | mcast); | ||
400 | if (IS_ERR(mcast->mc)) { | ||
401 | ret = PTR_ERR(mcast->mc); | ||
402 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | ||
403 | ipoib_warn(priv, "ib_sa_join_multicast for sendonly join " | ||
404 | "failed (ret = %d)\n", ret); | ||
405 | complete(&mcast->done); | ||
406 | } else { | ||
407 | ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting " | ||
408 | "sendonly join\n", mcast->mcmember.mgid.raw); | ||
409 | } | ||
410 | mutex_unlock(&mcast_mutex); | ||
411 | |||
412 | return ret; | ||
413 | } | ||
414 | |||
415 | void ipoib_mcast_carrier_on_task(struct work_struct *work) | 310 | void ipoib_mcast_carrier_on_task(struct work_struct *work) |
416 | { | 311 | { |
417 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, | 312 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, |
@@ -452,7 +347,9 @@ static int ipoib_mcast_join_complete(int status, | |||
452 | struct net_device *dev = mcast->dev; | 347 | struct net_device *dev = mcast->dev; |
453 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 348 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
454 | 349 | ||
455 | ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", | 350 | ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n", |
351 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? | ||
352 | "sendonly " : "", | ||
456 | mcast->mcmember.mgid.raw, status); | 353 | mcast->mcmember.mgid.raw, status); |
457 | 354 | ||
458 | /* | 355 | /* |
@@ -477,27 +374,52 @@ static int ipoib_mcast_join_complete(int status, | |||
477 | if (!status) { | 374 | if (!status) { |
478 | mcast->backoff = 1; | 375 | mcast->backoff = 1; |
479 | mcast->delay_until = jiffies; | 376 | mcast->delay_until = jiffies; |
480 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); | ||
481 | 377 | ||
482 | /* | 378 | /* |
483 | * Defer carrier on work to priv->wq to avoid a | 379 | * Defer carrier on work to priv->wq to avoid a |
484 | * deadlock on rtnl_lock here. | 380 | * deadlock on rtnl_lock here. Requeue our multicast |
381 | * work too, which will end up happening right after | ||
382 | * our carrier on task work and will allow us to | ||
383 | * send out all of the non-broadcast joins | ||
485 | */ | 384 | */ |
486 | if (mcast == priv->broadcast) | 385 | if (mcast == priv->broadcast) { |
487 | queue_work(priv->wq, &priv->carrier_on_task); | 386 | queue_work(priv->wq, &priv->carrier_on_task); |
387 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); | ||
388 | } | ||
488 | } else { | 389 | } else { |
489 | if (mcast->logcount++ < 20) { | 390 | if (mcast->logcount++ < 20) { |
490 | if (status == -ETIMEDOUT || status == -EAGAIN) { | 391 | if (status == -ETIMEDOUT || status == -EAGAIN) { |
491 | ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", | 392 | ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n", |
393 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", | ||
492 | mcast->mcmember.mgid.raw, status); | 394 | mcast->mcmember.mgid.raw, status); |
493 | } else { | 395 | } else { |
494 | ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", | 396 | ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n", |
397 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", | ||
495 | mcast->mcmember.mgid.raw, status); | 398 | mcast->mcmember.mgid.raw, status); |
496 | } | 399 | } |
497 | } | 400 | } |
498 | 401 | ||
499 | /* Requeue this join task with a backoff delay */ | 402 | if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && |
500 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); | 403 | mcast->backoff >= 2) { |
404 | /* | ||
405 | * We only retry sendonly joins once before we drop | ||
406 | * the packet and quit trying to deal with the | ||
407 | * group. However, we leave the group in the | ||
408 | * mcast list as an unjoined group. If we want to | ||
409 | * try joining again, we simply queue up a packet | ||
410 | * and restart the join thread. The empty queue | ||
411 | * is why the join thread ignores this group. | ||
412 | */ | ||
413 | mcast->backoff = 1; | ||
414 | netif_tx_lock_bh(dev); | ||
415 | while (!skb_queue_empty(&mcast->pkt_queue)) { | ||
416 | ++dev->stats.tx_dropped; | ||
417 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); | ||
418 | } | ||
419 | netif_tx_unlock_bh(dev); | ||
420 | } else | ||
421 | /* Requeue this join task with a backoff delay */ | ||
422 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); | ||
501 | } | 423 | } |
502 | out: | 424 | out: |
503 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | 425 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
@@ -650,45 +572,45 @@ void ipoib_mcast_join_task(struct work_struct *work) | |||
650 | list_for_each_entry(mcast, &priv->multicast_list, list) { | 572 | list_for_each_entry(mcast, &priv->multicast_list, list) { |
651 | if (IS_ERR_OR_NULL(mcast->mc) && | 573 | if (IS_ERR_OR_NULL(mcast->mc) && |
652 | !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && | 574 | !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && |
653 | !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | 575 | (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) || |
576 | !skb_queue_empty(&mcast->pkt_queue))) { | ||
654 | if (mcast->backoff == 1 || | 577 | if (mcast->backoff == 1 || |
655 | time_after_eq(jiffies, mcast->delay_until)) | 578 | time_after_eq(jiffies, mcast->delay_until)) { |
656 | /* Found the next unjoined group */ | 579 | /* Found the next unjoined group */ |
657 | break; | 580 | init_completion(&mcast->done); |
658 | else if (!delay_until || | 581 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
582 | if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) | ||
583 | create = 0; | ||
584 | else | ||
585 | create = 1; | ||
586 | spin_unlock_irq(&priv->lock); | ||
587 | mutex_unlock(&mcast_mutex); | ||
588 | ipoib_mcast_join(dev, mcast, create); | ||
589 | mutex_lock(&mcast_mutex); | ||
590 | spin_lock_irq(&priv->lock); | ||
591 | } else if (!delay_until || | ||
659 | time_before(mcast->delay_until, delay_until)) | 592 | time_before(mcast->delay_until, delay_until)) |
660 | delay_until = mcast->delay_until; | 593 | delay_until = mcast->delay_until; |
661 | } | 594 | } |
662 | } | 595 | } |
663 | 596 | ||
664 | if (&mcast->list == &priv->multicast_list) { | 597 | mcast = NULL; |
665 | /* | 598 | ipoib_dbg_mcast(priv, "successfully started all multicast joins\n"); |
666 | * All done, unless we have delayed work from | ||
667 | * backoff retransmissions, but we will get | ||
668 | * restarted when the time is right, so we are | ||
669 | * done for now | ||
670 | */ | ||
671 | mcast = NULL; | ||
672 | ipoib_dbg_mcast(priv, "successfully joined all " | ||
673 | "multicast groups\n"); | ||
674 | } | ||
675 | 599 | ||
676 | out: | 600 | out: |
601 | if (delay_until) { | ||
602 | cancel_delayed_work(&priv->mcast_task); | ||
603 | queue_delayed_work(priv->wq, &priv->mcast_task, | ||
604 | delay_until - jiffies); | ||
605 | } | ||
677 | if (mcast) { | 606 | if (mcast) { |
678 | init_completion(&mcast->done); | 607 | init_completion(&mcast->done); |
679 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | 608 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
680 | } | 609 | } |
681 | spin_unlock_irq(&priv->lock); | 610 | spin_unlock_irq(&priv->lock); |
682 | mutex_unlock(&mcast_mutex); | 611 | mutex_unlock(&mcast_mutex); |
683 | if (mcast) { | 612 | if (mcast) |
684 | if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) | 613 | ipoib_mcast_join(dev, mcast, create); |
685 | ipoib_mcast_sendonly_join(mcast); | ||
686 | else | ||
687 | ipoib_mcast_join(dev, mcast, create); | ||
688 | } | ||
689 | if (delay_until) | ||
690 | queue_delayed_work(priv->wq, &priv->mcast_task, | ||
691 | delay_until - jiffies); | ||
692 | } | 614 | } |
693 | 615 | ||
694 | int ipoib_mcast_start_thread(struct net_device *dev) | 616 | int ipoib_mcast_start_thread(struct net_device *dev) |
@@ -731,8 +653,6 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) | |||
731 | 653 | ||
732 | if (!IS_ERR_OR_NULL(mcast->mc)) | 654 | if (!IS_ERR_OR_NULL(mcast->mc)) |
733 | ib_sa_free_multicast(mcast->mc); | 655 | ib_sa_free_multicast(mcast->mc); |
734 | else | ||
735 | ipoib_dbg(priv, "ipoib_mcast_leave with mcast->mc invalid\n"); | ||
736 | 656 | ||
737 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | 657 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { |
738 | ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", | 658 | ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", |
@@ -768,43 +688,37 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) | |||
768 | } | 688 | } |
769 | 689 | ||
770 | mcast = __ipoib_mcast_find(dev, mgid); | 690 | mcast = __ipoib_mcast_find(dev, mgid); |
771 | if (!mcast) { | 691 | if (!mcast || !mcast->ah) { |
772 | /* Let's create a new send only group now */ | ||
773 | ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", | ||
774 | mgid); | ||
775 | |||
776 | mcast = ipoib_mcast_alloc(dev, 0); | ||
777 | if (!mcast) { | 692 | if (!mcast) { |
778 | ipoib_warn(priv, "unable to allocate memory for " | 693 | /* Let's create a new send only group now */ |
779 | "multicast structure\n"); | 694 | ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", |
780 | ++dev->stats.tx_dropped; | 695 | mgid); |
781 | dev_kfree_skb_any(skb); | 696 | |
782 | goto out; | 697 | mcast = ipoib_mcast_alloc(dev, 0); |
783 | } | 698 | if (!mcast) { |
784 | 699 | ipoib_warn(priv, "unable to allocate memory " | |
785 | set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); | 700 | "for multicast structure\n"); |
786 | memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); | 701 | ++dev->stats.tx_dropped; |
787 | __ipoib_mcast_add(dev, mcast); | 702 | dev_kfree_skb_any(skb); |
788 | list_add_tail(&mcast->list, &priv->multicast_list); | 703 | goto unlock; |
789 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); | 704 | } |
790 | } | ||
791 | 705 | ||
792 | if (!mcast->ah) { | 706 | set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); |
707 | memcpy(mcast->mcmember.mgid.raw, mgid, | ||
708 | sizeof (union ib_gid)); | ||
709 | __ipoib_mcast_add(dev, mcast); | ||
710 | list_add_tail(&mcast->list, &priv->multicast_list); | ||
711 | } | ||
793 | if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) | 712 | if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) |
794 | skb_queue_tail(&mcast->pkt_queue, skb); | 713 | skb_queue_tail(&mcast->pkt_queue, skb); |
795 | else { | 714 | else { |
796 | ++dev->stats.tx_dropped; | 715 | ++dev->stats.tx_dropped; |
797 | dev_kfree_skb_any(skb); | 716 | dev_kfree_skb_any(skb); |
798 | } | 717 | } |
799 | /* | 718 | if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { |
800 | * If lookup completes between here and out:, don't | 719 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); |
801 | * want to send packet twice. | 720 | } |
802 | */ | 721 | } else { |
803 | mcast = NULL; | ||
804 | } | ||
805 | |||
806 | out: | ||
807 | if (mcast && mcast->ah) { | ||
808 | struct ipoib_neigh *neigh; | 722 | struct ipoib_neigh *neigh; |
809 | 723 | ||
810 | spin_unlock_irqrestore(&priv->lock, flags); | 724 | spin_unlock_irqrestore(&priv->lock, flags); |