diff options
author | Doug Ledford <dledford@redhat.com> | 2015-02-21 19:27:07 -0500 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2015-04-15 16:06:18 -0400 |
commit | 1c0453d64a341909bbf89cb68c9edaa6cff93850 (patch) | |
tree | c287b5518d572e6be2b634fcf32084f8837f0dab /drivers/infiniband/ulp/ipoib | |
parent | d2fe937ce6ce23daf5fb214e45432dbb631581b7 (diff) |
IB/ipoib: drop mcast_mutex usage
We needed the mcast_mutex when we had to prevent the join completion
callback from having the value it stored in mcast->mc overwritten
by a delayed return from ib_sa_join_multicast. By storing the return
of ib_sa_join_multicast in an intermediate variable, we prevent a
delayed return from ib_sa_join_multicast overwriting the valid
contents of mcast->mc, and we no longer need a mutex to force the
join callback to run after the return of ib_sa_join_multicast. This
allows us to do away with the mutex entirely and protect our critical
sections with a just a spinlock instead. This is highly desirable
as there were some places where we couldn't use a mutex because the
code was not allowed to sleep, and so we were currently using a mix
of mutex and spinlock to protect what we needed to protect. Now we
only have a spin lock and the locking complexity is greatly reduced.
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/ulp/ipoib')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 70 |
1 files changed, 32 insertions, 38 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index c670d9c2cda7..3203ebe9b100 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |||
@@ -55,8 +55,6 @@ MODULE_PARM_DESC(mcast_debug_level, | |||
55 | "Enable multicast debug tracing if > 0"); | 55 | "Enable multicast debug tracing if > 0"); |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | static DEFINE_MUTEX(mcast_mutex); | ||
59 | |||
60 | struct ipoib_mcast_iter { | 58 | struct ipoib_mcast_iter { |
61 | struct net_device *dev; | 59 | struct net_device *dev; |
62 | union ib_gid mgid; | 60 | union ib_gid mgid; |
@@ -67,7 +65,7 @@ struct ipoib_mcast_iter { | |||
67 | }; | 65 | }; |
68 | 66 | ||
69 | /* | 67 | /* |
70 | * This should be called with the mcast_mutex held | 68 | * This should be called with the priv->lock held |
71 | */ | 69 | */ |
72 | static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv, | 70 | static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv, |
73 | struct ipoib_mcast *mcast, | 71 | struct ipoib_mcast *mcast, |
@@ -352,16 +350,6 @@ static int ipoib_mcast_join_complete(int status, | |||
352 | "sendonly " : "", | 350 | "sendonly " : "", |
353 | mcast->mcmember.mgid.raw, status); | 351 | mcast->mcmember.mgid.raw, status); |
354 | 352 | ||
355 | /* | ||
356 | * We have to take the mutex to force mcast_join to | ||
357 | * return from ib_sa_multicast_join and set mcast->mc to a | ||
358 | * valid value. Otherwise we were racing with ourselves in | ||
359 | * that we might fail here, but get a valid return from | ||
360 | * ib_sa_multicast_join after we had cleared mcast->mc here, | ||
361 | * resulting in mis-matched joins and leaves and a deadlock | ||
362 | */ | ||
363 | mutex_lock(&mcast_mutex); | ||
364 | |||
365 | /* We trap for port events ourselves. */ | 353 | /* We trap for port events ourselves. */ |
366 | if (status == -ENETRESET) { | 354 | if (status == -ENETRESET) { |
367 | status = 0; | 355 | status = 0; |
@@ -383,8 +371,10 @@ static int ipoib_mcast_join_complete(int status, | |||
383 | * send out all of the non-broadcast joins | 371 | * send out all of the non-broadcast joins |
384 | */ | 372 | */ |
385 | if (mcast == priv->broadcast) { | 373 | if (mcast == priv->broadcast) { |
374 | spin_lock_irq(&priv->lock); | ||
386 | queue_work(priv->wq, &priv->carrier_on_task); | 375 | queue_work(priv->wq, &priv->carrier_on_task); |
387 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); | 376 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); |
377 | goto out_locked; | ||
388 | } | 378 | } |
389 | } else { | 379 | } else { |
390 | if (mcast->logcount++ < 20) { | 380 | if (mcast->logcount++ < 20) { |
@@ -417,16 +407,28 @@ static int ipoib_mcast_join_complete(int status, | |||
417 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); | 407 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); |
418 | } | 408 | } |
419 | netif_tx_unlock_bh(dev); | 409 | netif_tx_unlock_bh(dev); |
420 | } else | 410 | } else { |
411 | spin_lock_irq(&priv->lock); | ||
421 | /* Requeue this join task with a backoff delay */ | 412 | /* Requeue this join task with a backoff delay */ |
422 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); | 413 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); |
414 | goto out_locked; | ||
415 | } | ||
423 | } | 416 | } |
424 | out: | 417 | out: |
425 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | 418 | spin_lock_irq(&priv->lock); |
419 | out_locked: | ||
420 | /* | ||
421 | * Make sure to set mcast->mc before we clear the busy flag to avoid | ||
422 | * racing with code that checks for BUSY before checking mcast->mc | ||
423 | */ | ||
426 | if (status) | 424 | if (status) |
427 | mcast->mc = NULL; | 425 | mcast->mc = NULL; |
426 | else | ||
427 | mcast->mc = multicast; | ||
428 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | ||
429 | spin_unlock_irq(&priv->lock); | ||
428 | complete(&mcast->done); | 430 | complete(&mcast->done); |
429 | mutex_unlock(&mcast_mutex); | 431 | |
430 | return status; | 432 | return status; |
431 | } | 433 | } |
432 | 434 | ||
@@ -434,6 +436,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | |||
434 | int create) | 436 | int create) |
435 | { | 437 | { |
436 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 438 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
439 | struct ib_sa_multicast *multicast; | ||
437 | struct ib_sa_mcmember_rec rec = { | 440 | struct ib_sa_mcmember_rec rec = { |
438 | .join_state = 1 | 441 | .join_state = 1 |
439 | }; | 442 | }; |
@@ -475,18 +478,19 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | |||
475 | rec.hop_limit = priv->broadcast->mcmember.hop_limit; | 478 | rec.hop_limit = priv->broadcast->mcmember.hop_limit; |
476 | } | 479 | } |
477 | 480 | ||
478 | mutex_lock(&mcast_mutex); | 481 | multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, |
479 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, | ||
480 | &rec, comp_mask, GFP_KERNEL, | 482 | &rec, comp_mask, GFP_KERNEL, |
481 | ipoib_mcast_join_complete, mcast); | 483 | ipoib_mcast_join_complete, mcast); |
482 | if (IS_ERR(mcast->mc)) { | 484 | if (IS_ERR(multicast)) { |
483 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | 485 | ret = PTR_ERR(multicast); |
484 | ret = PTR_ERR(mcast->mc); | ||
485 | ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); | 486 | ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); |
487 | spin_lock_irq(&priv->lock); | ||
488 | /* Requeue this join task with a backoff delay */ | ||
486 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); | 489 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); |
490 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | ||
491 | spin_unlock_irq(&priv->lock); | ||
487 | complete(&mcast->done); | 492 | complete(&mcast->done); |
488 | } | 493 | } |
489 | mutex_unlock(&mcast_mutex); | ||
490 | } | 494 | } |
491 | 495 | ||
492 | void ipoib_mcast_join_task(struct work_struct *work) | 496 | void ipoib_mcast_join_task(struct work_struct *work) |
@@ -515,15 +519,6 @@ void ipoib_mcast_join_task(struct work_struct *work) | |||
515 | else | 519 | else |
516 | memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); | 520 | memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); |
517 | 521 | ||
518 | /* | ||
519 | * We have to hold the mutex to keep from racing with the join | ||
520 | * completion threads on setting flags on mcasts, and we have | ||
521 | * to hold the priv->lock because dev_flush will remove entries | ||
522 | * out from underneath us, so at a minimum we need the lock | ||
523 | * through the time that we do the for_each loop of the mcast | ||
524 | * list or else dev_flush can make us oops. | ||
525 | */ | ||
526 | mutex_lock(&mcast_mutex); | ||
527 | spin_lock_irq(&priv->lock); | 522 | spin_lock_irq(&priv->lock); |
528 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) | 523 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) |
529 | goto out; | 524 | goto out; |
@@ -584,9 +579,7 @@ void ipoib_mcast_join_task(struct work_struct *work) | |||
584 | else | 579 | else |
585 | create = 1; | 580 | create = 1; |
586 | spin_unlock_irq(&priv->lock); | 581 | spin_unlock_irq(&priv->lock); |
587 | mutex_unlock(&mcast_mutex); | ||
588 | ipoib_mcast_join(dev, mcast, create); | 582 | ipoib_mcast_join(dev, mcast, create); |
589 | mutex_lock(&mcast_mutex); | ||
590 | spin_lock_irq(&priv->lock); | 583 | spin_lock_irq(&priv->lock); |
591 | } else if (!delay_until || | 584 | } else if (!delay_until || |
592 | time_before(mcast->delay_until, delay_until)) | 585 | time_before(mcast->delay_until, delay_until)) |
@@ -608,7 +601,6 @@ out: | |||
608 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | 601 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
609 | } | 602 | } |
610 | spin_unlock_irq(&priv->lock); | 603 | spin_unlock_irq(&priv->lock); |
611 | mutex_unlock(&mcast_mutex); | ||
612 | if (mcast) | 604 | if (mcast) |
613 | ipoib_mcast_join(dev, mcast, create); | 605 | ipoib_mcast_join(dev, mcast, create); |
614 | } | 606 | } |
@@ -616,13 +608,14 @@ out: | |||
616 | int ipoib_mcast_start_thread(struct net_device *dev) | 608 | int ipoib_mcast_start_thread(struct net_device *dev) |
617 | { | 609 | { |
618 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 610 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
611 | unsigned long flags; | ||
619 | 612 | ||
620 | ipoib_dbg_mcast(priv, "starting multicast thread\n"); | 613 | ipoib_dbg_mcast(priv, "starting multicast thread\n"); |
621 | 614 | ||
622 | mutex_lock(&mcast_mutex); | 615 | spin_lock_irqsave(&priv->lock, flags); |
623 | set_bit(IPOIB_MCAST_RUN, &priv->flags); | 616 | set_bit(IPOIB_MCAST_RUN, &priv->flags); |
624 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); | 617 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); |
625 | mutex_unlock(&mcast_mutex); | 618 | spin_unlock_irqrestore(&priv->lock, flags); |
626 | 619 | ||
627 | return 0; | 620 | return 0; |
628 | } | 621 | } |
@@ -630,13 +623,14 @@ int ipoib_mcast_start_thread(struct net_device *dev) | |||
630 | int ipoib_mcast_stop_thread(struct net_device *dev) | 623 | int ipoib_mcast_stop_thread(struct net_device *dev) |
631 | { | 624 | { |
632 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 625 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
626 | unsigned long flags; | ||
633 | 627 | ||
634 | ipoib_dbg_mcast(priv, "stopping multicast thread\n"); | 628 | ipoib_dbg_mcast(priv, "stopping multicast thread\n"); |
635 | 629 | ||
636 | mutex_lock(&mcast_mutex); | 630 | spin_lock_irqsave(&priv->lock, flags); |
637 | clear_bit(IPOIB_MCAST_RUN, &priv->flags); | 631 | clear_bit(IPOIB_MCAST_RUN, &priv->flags); |
638 | cancel_delayed_work(&priv->mcast_task); | 632 | cancel_delayed_work(&priv->mcast_task); |
639 | mutex_unlock(&mcast_mutex); | 633 | spin_unlock_irqrestore(&priv->lock, flags); |
640 | 634 | ||
641 | flush_workqueue(priv->wq); | 635 | flush_workqueue(priv->wq); |
642 | 636 | ||