diff options
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 10 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 148 |
2 files changed, 101 insertions, 57 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index d7562beb5423..f4c1b20b23b2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h | |||
@@ -98,9 +98,15 @@ enum { | |||
98 | 98 | ||
99 | IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ | 99 | IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ |
100 | IPOIB_MCAST_FLAG_SENDONLY = 1, | 100 | IPOIB_MCAST_FLAG_SENDONLY = 1, |
101 | IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ | 101 | /* |
102 | * For IPOIB_MCAST_FLAG_BUSY | ||
103 | * When set, in flight join and mcast->mc is unreliable | ||
104 | * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or | ||
105 | * haven't started yet | ||
106 | * When clear and mcast->mc is valid pointer, join was successful | ||
107 | */ | ||
108 | IPOIB_MCAST_FLAG_BUSY = 2, | ||
102 | IPOIB_MCAST_FLAG_ATTACHED = 3, | 109 | IPOIB_MCAST_FLAG_ATTACHED = 3, |
103 | IPOIB_MCAST_JOIN_STARTED = 4, | ||
104 | 110 | ||
105 | MAX_SEND_CQE = 16, | 111 | MAX_SEND_CQE = 16, |
106 | IPOIB_CM_COPYBREAK = 256, | 112 | IPOIB_CM_COPYBREAK = 256, |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 9862c76a83f7..a52c9f3f7e42 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |||
@@ -271,16 +271,27 @@ ipoib_mcast_sendonly_join_complete(int status, | |||
271 | struct ipoib_mcast *mcast = multicast->context; | 271 | struct ipoib_mcast *mcast = multicast->context; |
272 | struct net_device *dev = mcast->dev; | 272 | struct net_device *dev = mcast->dev; |
273 | 273 | ||
274 | /* | ||
275 | * We have to take the mutex to force mcast_sendonly_join to | ||
276 | * return from ib_sa_multicast_join and set mcast->mc to a | ||
277 | * valid value. Otherwise we were racing with ourselves in | ||
278 | * that we might fail here, but get a valid return from | ||
279 | * ib_sa_multicast_join after we had cleared mcast->mc here, | ||
280 | * resulting in mis-matched joins and leaves and a deadlock | ||
281 | */ | ||
282 | mutex_lock(&mcast_mutex); | ||
283 | |||
274 | /* We trap for port events ourselves. */ | 284 | /* We trap for port events ourselves. */ |
275 | if (status == -ENETRESET) | 285 | if (status == -ENETRESET) |
276 | return 0; | 286 | goto out; |
277 | 287 | ||
278 | if (!status) | 288 | if (!status) |
279 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); | 289 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); |
280 | 290 | ||
281 | if (status) { | 291 | if (status) { |
282 | if (mcast->logcount++ < 20) | 292 | if (mcast->logcount++ < 20) |
283 | ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n", | 293 | ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast " |
294 | "join failed for %pI6, status %d\n", | ||
284 | mcast->mcmember.mgid.raw, status); | 295 | mcast->mcmember.mgid.raw, status); |
285 | 296 | ||
286 | /* Flush out any queued packets */ | 297 | /* Flush out any queued packets */ |
@@ -290,11 +301,15 @@ ipoib_mcast_sendonly_join_complete(int status, | |||
290 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); | 301 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); |
291 | } | 302 | } |
292 | netif_tx_unlock_bh(dev); | 303 | netif_tx_unlock_bh(dev); |
293 | |||
294 | /* Clear the busy flag so we try again */ | ||
295 | status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, | ||
296 | &mcast->flags); | ||
297 | } | 304 | } |
305 | out: | ||
306 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | ||
307 | if (status) | ||
308 | mcast->mc = NULL; | ||
309 | complete(&mcast->done); | ||
310 | if (status == -ENETRESET) | ||
311 | status = 0; | ||
312 | mutex_unlock(&mcast_mutex); | ||
298 | return status; | 313 | return status; |
299 | } | 314 | } |
300 | 315 | ||
@@ -312,12 +327,14 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) | |||
312 | int ret = 0; | 327 | int ret = 0; |
313 | 328 | ||
314 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { | 329 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { |
315 | ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); | 330 | ipoib_dbg_mcast(priv, "device shutting down, no sendonly " |
331 | "multicast joins\n"); | ||
316 | return -ENODEV; | 332 | return -ENODEV; |
317 | } | 333 | } |
318 | 334 | ||
319 | if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { | 335 | if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { |
320 | ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); | 336 | ipoib_dbg_mcast(priv, "multicast entry busy, skipping " |
337 | "sendonly join\n"); | ||
321 | return -EBUSY; | 338 | return -EBUSY; |
322 | } | 339 | } |
323 | 340 | ||
@@ -325,6 +342,9 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) | |||
325 | rec.port_gid = priv->local_gid; | 342 | rec.port_gid = priv->local_gid; |
326 | rec.pkey = cpu_to_be16(priv->pkey); | 343 | rec.pkey = cpu_to_be16(priv->pkey); |
327 | 344 | ||
345 | mutex_lock(&mcast_mutex); | ||
346 | init_completion(&mcast->done); | ||
347 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | ||
328 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, | 348 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, |
329 | priv->port, &rec, | 349 | priv->port, &rec, |
330 | IB_SA_MCMEMBER_REC_MGID | | 350 | IB_SA_MCMEMBER_REC_MGID | |
@@ -337,12 +357,14 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) | |||
337 | if (IS_ERR(mcast->mc)) { | 357 | if (IS_ERR(mcast->mc)) { |
338 | ret = PTR_ERR(mcast->mc); | 358 | ret = PTR_ERR(mcast->mc); |
339 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | 359 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
340 | ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", | 360 | complete(&mcast->done); |
341 | ret); | 361 | ipoib_warn(priv, "ib_sa_join_multicast for sendonly join " |
362 | "failed (ret = %d)\n", ret); | ||
342 | } else { | 363 | } else { |
343 | ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n", | 364 | ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting " |
344 | mcast->mcmember.mgid.raw); | 365 | "sendonly join\n", mcast->mcmember.mgid.raw); |
345 | } | 366 | } |
367 | mutex_unlock(&mcast_mutex); | ||
346 | 368 | ||
347 | return ret; | 369 | return ret; |
348 | } | 370 | } |
@@ -390,22 +412,28 @@ static int ipoib_mcast_join_complete(int status, | |||
390 | ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", | 412 | ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", |
391 | mcast->mcmember.mgid.raw, status); | 413 | mcast->mcmember.mgid.raw, status); |
392 | 414 | ||
415 | /* | ||
416 | * We have to take the mutex to force mcast_join to | ||
417 | * return from ib_sa_multicast_join and set mcast->mc to a | ||
418 | * valid value. Otherwise we were racing with ourselves in | ||
419 | * that we might fail here, but get a valid return from | ||
420 | * ib_sa_multicast_join after we had cleared mcast->mc here, | ||
421 | * resulting in mis-matched joins and leaves and a deadlock | ||
422 | */ | ||
423 | mutex_lock(&mcast_mutex); | ||
424 | |||
393 | /* We trap for port events ourselves. */ | 425 | /* We trap for port events ourselves. */ |
394 | if (status == -ENETRESET) { | 426 | if (status == -ENETRESET) |
395 | status = 0; | ||
396 | goto out; | 427 | goto out; |
397 | } | ||
398 | 428 | ||
399 | if (!status) | 429 | if (!status) |
400 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); | 430 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); |
401 | 431 | ||
402 | if (!status) { | 432 | if (!status) { |
403 | mcast->backoff = 1; | 433 | mcast->backoff = 1; |
404 | mutex_lock(&mcast_mutex); | ||
405 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) | 434 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
406 | queue_delayed_work(ipoib_workqueue, | 435 | queue_delayed_work(ipoib_workqueue, |
407 | &priv->mcast_task, 0); | 436 | &priv->mcast_task, 0); |
408 | mutex_unlock(&mcast_mutex); | ||
409 | 437 | ||
410 | /* | 438 | /* |
411 | * Defer carrier on work to ipoib_workqueue to avoid a | 439 | * Defer carrier on work to ipoib_workqueue to avoid a |
@@ -413,37 +441,35 @@ static int ipoib_mcast_join_complete(int status, | |||
413 | */ | 441 | */ |
414 | if (mcast == priv->broadcast) | 442 | if (mcast == priv->broadcast) |
415 | queue_work(ipoib_workqueue, &priv->carrier_on_task); | 443 | queue_work(ipoib_workqueue, &priv->carrier_on_task); |
416 | 444 | } else { | |
417 | status = 0; | 445 | if (mcast->logcount++ < 20) { |
418 | goto out; | 446 | if (status == -ETIMEDOUT || status == -EAGAIN) { |
419 | } | 447 | ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", |
420 | 448 | mcast->mcmember.mgid.raw, status); | |
421 | if (mcast->logcount++ < 20) { | 449 | } else { |
422 | if (status == -ETIMEDOUT || status == -EAGAIN) { | 450 | ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", |
423 | ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", | 451 | mcast->mcmember.mgid.raw, status); |
424 | mcast->mcmember.mgid.raw, status); | 452 | } |
425 | } else { | ||
426 | ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", | ||
427 | mcast->mcmember.mgid.raw, status); | ||
428 | } | 453 | } |
429 | } | ||
430 | |||
431 | mcast->backoff *= 2; | ||
432 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | ||
433 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | ||
434 | |||
435 | /* Clear the busy flag so we try again */ | ||
436 | status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | ||
437 | 454 | ||
438 | mutex_lock(&mcast_mutex); | 455 | mcast->backoff *= 2; |
456 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | ||
457 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | ||
458 | } | ||
459 | out: | ||
439 | spin_lock_irq(&priv->lock); | 460 | spin_lock_irq(&priv->lock); |
440 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) | 461 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
462 | if (status) | ||
463 | mcast->mc = NULL; | ||
464 | complete(&mcast->done); | ||
465 | if (status == -ENETRESET) | ||
466 | status = 0; | ||
467 | if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags)) | ||
441 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, | 468 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, |
442 | mcast->backoff * HZ); | 469 | mcast->backoff * HZ); |
443 | spin_unlock_irq(&priv->lock); | 470 | spin_unlock_irq(&priv->lock); |
444 | mutex_unlock(&mcast_mutex); | 471 | mutex_unlock(&mcast_mutex); |
445 | out: | 472 | |
446 | complete(&mcast->done); | ||
447 | return status; | 473 | return status; |
448 | } | 474 | } |
449 | 475 | ||
@@ -492,10 +518,9 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | |||
492 | rec.hop_limit = priv->broadcast->mcmember.hop_limit; | 518 | rec.hop_limit = priv->broadcast->mcmember.hop_limit; |
493 | } | 519 | } |
494 | 520 | ||
495 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | 521 | mutex_lock(&mcast_mutex); |
496 | init_completion(&mcast->done); | 522 | init_completion(&mcast->done); |
497 | set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags); | 523 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
498 | |||
499 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, | 524 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, |
500 | &rec, comp_mask, GFP_KERNEL, | 525 | &rec, comp_mask, GFP_KERNEL, |
501 | ipoib_mcast_join_complete, mcast); | 526 | ipoib_mcast_join_complete, mcast); |
@@ -509,13 +534,12 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | |||
509 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | 534 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) |
510 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | 535 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; |
511 | 536 | ||
512 | mutex_lock(&mcast_mutex); | ||
513 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) | 537 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
514 | queue_delayed_work(ipoib_workqueue, | 538 | queue_delayed_work(ipoib_workqueue, |
515 | &priv->mcast_task, | 539 | &priv->mcast_task, |
516 | mcast->backoff * HZ); | 540 | mcast->backoff * HZ); |
517 | mutex_unlock(&mcast_mutex); | ||
518 | } | 541 | } |
542 | mutex_unlock(&mcast_mutex); | ||
519 | } | 543 | } |
520 | 544 | ||
521 | void ipoib_mcast_join_task(struct work_struct *work) | 545 | void ipoib_mcast_join_task(struct work_struct *work) |
@@ -568,7 +592,8 @@ void ipoib_mcast_join_task(struct work_struct *work) | |||
568 | } | 592 | } |
569 | 593 | ||
570 | if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | 594 | if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { |
571 | if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) | 595 | if (IS_ERR_OR_NULL(priv->broadcast->mc) && |
596 | !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) | ||
572 | ipoib_mcast_join(dev, priv->broadcast, 0); | 597 | ipoib_mcast_join(dev, priv->broadcast, 0); |
573 | return; | 598 | return; |
574 | } | 599 | } |
@@ -576,23 +601,33 @@ void ipoib_mcast_join_task(struct work_struct *work) | |||
576 | while (1) { | 601 | while (1) { |
577 | struct ipoib_mcast *mcast = NULL; | 602 | struct ipoib_mcast *mcast = NULL; |
578 | 603 | ||
604 | /* | ||
605 | * Need the mutex so our flags are consistent, need the | ||
606 | * priv->lock so we don't race with list removals in either | ||
607 | * mcast_dev_flush or mcast_restart_task | ||
608 | */ | ||
609 | mutex_lock(&mcast_mutex); | ||
579 | spin_lock_irq(&priv->lock); | 610 | spin_lock_irq(&priv->lock); |
580 | list_for_each_entry(mcast, &priv->multicast_list, list) { | 611 | list_for_each_entry(mcast, &priv->multicast_list, list) { |
581 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) | 612 | if (IS_ERR_OR_NULL(mcast->mc) && |
582 | && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) | 613 | !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && |
583 | && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | 614 | !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { |
584 | /* Found the next unjoined group */ | 615 | /* Found the next unjoined group */ |
585 | break; | 616 | break; |
586 | } | 617 | } |
587 | } | 618 | } |
588 | spin_unlock_irq(&priv->lock); | 619 | spin_unlock_irq(&priv->lock); |
620 | mutex_unlock(&mcast_mutex); | ||
589 | 621 | ||
590 | if (&mcast->list == &priv->multicast_list) { | 622 | if (&mcast->list == &priv->multicast_list) { |
591 | /* All done */ | 623 | /* All done */ |
592 | break; | 624 | break; |
593 | } | 625 | } |
594 | 626 | ||
595 | ipoib_mcast_join(dev, mcast, 1); | 627 | if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) |
628 | ipoib_mcast_sendonly_join(mcast); | ||
629 | else | ||
630 | ipoib_mcast_join(dev, mcast, 1); | ||
596 | return; | 631 | return; |
597 | } | 632 | } |
598 | 633 | ||
@@ -638,6 +673,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) | |||
638 | int ret = 0; | 673 | int ret = 0; |
639 | 674 | ||
640 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) | 675 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
676 | ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n"); | ||
677 | |||
678 | if (!IS_ERR_OR_NULL(mcast->mc)) | ||
641 | ib_sa_free_multicast(mcast->mc); | 679 | ib_sa_free_multicast(mcast->mc); |
642 | 680 | ||
643 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | 681 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { |
@@ -690,6 +728,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) | |||
690 | memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); | 728 | memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); |
691 | __ipoib_mcast_add(dev, mcast); | 729 | __ipoib_mcast_add(dev, mcast); |
692 | list_add_tail(&mcast->list, &priv->multicast_list); | 730 | list_add_tail(&mcast->list, &priv->multicast_list); |
731 | if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) | ||
732 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); | ||
693 | } | 733 | } |
694 | 734 | ||
695 | if (!mcast->ah) { | 735 | if (!mcast->ah) { |
@@ -703,8 +743,6 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) | |||
703 | if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) | 743 | if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
704 | ipoib_dbg_mcast(priv, "no address vector, " | 744 | ipoib_dbg_mcast(priv, "no address vector, " |
705 | "but multicast join already started\n"); | 745 | "but multicast join already started\n"); |
706 | else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) | ||
707 | ipoib_mcast_sendonly_join(mcast); | ||
708 | 746 | ||
709 | /* | 747 | /* |
710 | * If lookup completes between here and out:, don't | 748 | * If lookup completes between here and out:, don't |
@@ -766,7 +804,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev) | |||
766 | 804 | ||
767 | /* seperate between the wait to the leave*/ | 805 | /* seperate between the wait to the leave*/ |
768 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) | 806 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) |
769 | if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags)) | 807 | if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
770 | wait_for_completion(&mcast->done); | 808 | wait_for_completion(&mcast->done); |
771 | 809 | ||
772 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | 810 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { |