aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp/ipoib
diff options
context:
space:
mode:
authorRoland Dreier <roland@purestorage.com>2015-01-30 18:39:20 -0500
committerRoland Dreier <roland@purestorage.com>2015-01-30 18:39:20 -0500
commite7a623d2df28a477efb6cf60471a4e2225e2e8e4 (patch)
tree6f1b66ba26913ccaba9b94537ee4934e5d9fce27 /drivers/infiniband/ulp/ipoib
parent962121b4fcd3e022c276104aec50b56d7ed1f71b (diff)
Revert "IPoIB: fix MCAST_FLAG_BUSY usage"
This reverts commit 016d9fb25cd9817ea9c723f4f7ecd978636b4489. The series of IPoIB bug fixes that went into 3.19-rc1 introduce regressions, and after trying to sort things out, we decided to revert to 3.18's IPoIB driver and get things right for 3.20. Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/ulp/ipoib')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c148
2 files changed, 57 insertions, 101 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index f4c1b20b23b2..d7562beb5423 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -98,15 +98,9 @@ enum {
98 98
99 IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ 99 IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
100 IPOIB_MCAST_FLAG_SENDONLY = 1, 100 IPOIB_MCAST_FLAG_SENDONLY = 1,
101 /* 101 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
102 * For IPOIB_MCAST_FLAG_BUSY
103 * When set, in flight join and mcast->mc is unreliable
104 * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
105 * haven't started yet
106 * When clear and mcast->mc is valid pointer, join was successful
107 */
108 IPOIB_MCAST_FLAG_BUSY = 2,
109 IPOIB_MCAST_FLAG_ATTACHED = 3, 102 IPOIB_MCAST_FLAG_ATTACHED = 3,
103 IPOIB_MCAST_JOIN_STARTED = 4,
110 104
111 MAX_SEND_CQE = 16, 105 MAX_SEND_CQE = 16,
112 IPOIB_CM_COPYBREAK = 256, 106 IPOIB_CM_COPYBREAK = 256,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index a52c9f3f7e42..9862c76a83f7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -271,27 +271,16 @@ ipoib_mcast_sendonly_join_complete(int status,
271 struct ipoib_mcast *mcast = multicast->context; 271 struct ipoib_mcast *mcast = multicast->context;
272 struct net_device *dev = mcast->dev; 272 struct net_device *dev = mcast->dev;
273 273
274 /*
275 * We have to take the mutex to force mcast_sendonly_join to
276 * return from ib_sa_multicast_join and set mcast->mc to a
277 * valid value. Otherwise we were racing with ourselves in
278 * that we might fail here, but get a valid return from
279 * ib_sa_multicast_join after we had cleared mcast->mc here,
280 * resulting in mis-matched joins and leaves and a deadlock
281 */
282 mutex_lock(&mcast_mutex);
283
284 /* We trap for port events ourselves. */ 274 /* We trap for port events ourselves. */
285 if (status == -ENETRESET) 275 if (status == -ENETRESET)
286 goto out; 276 return 0;
287 277
288 if (!status) 278 if (!status)
289 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 279 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
290 280
291 if (status) { 281 if (status) {
292 if (mcast->logcount++ < 20) 282 if (mcast->logcount++ < 20)
293 ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast " 283 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
294 "join failed for %pI6, status %d\n",
295 mcast->mcmember.mgid.raw, status); 284 mcast->mcmember.mgid.raw, status);
296 285
297 /* Flush out any queued packets */ 286 /* Flush out any queued packets */
@@ -301,15 +290,11 @@ ipoib_mcast_sendonly_join_complete(int status,
301 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); 290 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
302 } 291 }
303 netif_tx_unlock_bh(dev); 292 netif_tx_unlock_bh(dev);
293
294 /* Clear the busy flag so we try again */
295 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
296 &mcast->flags);
304 } 297 }
305out:
306 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
307 if (status)
308 mcast->mc = NULL;
309 complete(&mcast->done);
310 if (status == -ENETRESET)
311 status = 0;
312 mutex_unlock(&mcast_mutex);
313 return status; 298 return status;
314} 299}
315 300
@@ -327,14 +312,12 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
327 int ret = 0; 312 int ret = 0;
328 313
329 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { 314 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
330 ipoib_dbg_mcast(priv, "device shutting down, no sendonly " 315 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
331 "multicast joins\n");
332 return -ENODEV; 316 return -ENODEV;
333 } 317 }
334 318
335 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { 319 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
336 ipoib_dbg_mcast(priv, "multicast entry busy, skipping " 320 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
337 "sendonly join\n");
338 return -EBUSY; 321 return -EBUSY;
339 } 322 }
340 323
@@ -342,9 +325,6 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
342 rec.port_gid = priv->local_gid; 325 rec.port_gid = priv->local_gid;
343 rec.pkey = cpu_to_be16(priv->pkey); 326 rec.pkey = cpu_to_be16(priv->pkey);
344 327
345 mutex_lock(&mcast_mutex);
346 init_completion(&mcast->done);
347 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
348 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, 328 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
349 priv->port, &rec, 329 priv->port, &rec,
350 IB_SA_MCMEMBER_REC_MGID | 330 IB_SA_MCMEMBER_REC_MGID |
@@ -357,14 +337,12 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
357 if (IS_ERR(mcast->mc)) { 337 if (IS_ERR(mcast->mc)) {
358 ret = PTR_ERR(mcast->mc); 338 ret = PTR_ERR(mcast->mc);
359 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 339 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
360 complete(&mcast->done); 340 ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
361 ipoib_warn(priv, "ib_sa_join_multicast for sendonly join " 341 ret);
362 "failed (ret = %d)\n", ret);
363 } else { 342 } else {
364 ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting " 343 ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
365 "sendonly join\n", mcast->mcmember.mgid.raw); 344 mcast->mcmember.mgid.raw);
366 } 345 }
367 mutex_unlock(&mcast_mutex);
368 346
369 return ret; 347 return ret;
370} 348}
@@ -412,28 +390,22 @@ static int ipoib_mcast_join_complete(int status,
412 ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", 390 ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
413 mcast->mcmember.mgid.raw, status); 391 mcast->mcmember.mgid.raw, status);
414 392
415 /*
416 * We have to take the mutex to force mcast_join to
417 * return from ib_sa_multicast_join and set mcast->mc to a
418 * valid value. Otherwise we were racing with ourselves in
419 * that we might fail here, but get a valid return from
420 * ib_sa_multicast_join after we had cleared mcast->mc here,
421 * resulting in mis-matched joins and leaves and a deadlock
422 */
423 mutex_lock(&mcast_mutex);
424
425 /* We trap for port events ourselves. */ 393 /* We trap for port events ourselves. */
426 if (status == -ENETRESET) 394 if (status == -ENETRESET) {
395 status = 0;
427 goto out; 396 goto out;
397 }
428 398
429 if (!status) 399 if (!status)
430 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 400 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
431 401
432 if (!status) { 402 if (!status) {
433 mcast->backoff = 1; 403 mcast->backoff = 1;
404 mutex_lock(&mcast_mutex);
434 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 405 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
435 queue_delayed_work(ipoib_workqueue, 406 queue_delayed_work(ipoib_workqueue,
436 &priv->mcast_task, 0); 407 &priv->mcast_task, 0);
408 mutex_unlock(&mcast_mutex);
437 409
438 /* 410 /*
439 * Defer carrier on work to ipoib_workqueue to avoid a 411 * Defer carrier on work to ipoib_workqueue to avoid a
@@ -441,35 +413,37 @@ static int ipoib_mcast_join_complete(int status,
441 */ 413 */
442 if (mcast == priv->broadcast) 414 if (mcast == priv->broadcast)
443 queue_work(ipoib_workqueue, &priv->carrier_on_task); 415 queue_work(ipoib_workqueue, &priv->carrier_on_task);
444 } else {
445 if (mcast->logcount++ < 20) {
446 if (status == -ETIMEDOUT || status == -EAGAIN) {
447 ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
448 mcast->mcmember.mgid.raw, status);
449 } else {
450 ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
451 mcast->mcmember.mgid.raw, status);
452 }
453 }
454 416
455 mcast->backoff *= 2; 417 status = 0;
456 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 418 goto out;
457 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
458 } 419 }
459out: 420
421 if (mcast->logcount++ < 20) {
422 if (status == -ETIMEDOUT || status == -EAGAIN) {
423 ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
424 mcast->mcmember.mgid.raw, status);
425 } else {
426 ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
427 mcast->mcmember.mgid.raw, status);
428 }
429 }
430
431 mcast->backoff *= 2;
432 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
433 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
434
435 /* Clear the busy flag so we try again */
436 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
437
438 mutex_lock(&mcast_mutex);
460 spin_lock_irq(&priv->lock); 439 spin_lock_irq(&priv->lock);
461 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 440 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
462 if (status)
463 mcast->mc = NULL;
464 complete(&mcast->done);
465 if (status == -ENETRESET)
466 status = 0;
467 if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags))
468 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 441 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
469 mcast->backoff * HZ); 442 mcast->backoff * HZ);
470 spin_unlock_irq(&priv->lock); 443 spin_unlock_irq(&priv->lock);
471 mutex_unlock(&mcast_mutex); 444 mutex_unlock(&mcast_mutex);
472 445out:
446 complete(&mcast->done);
473 return status; 447 return status;
474} 448}
475 449
@@ -518,9 +492,10 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
518 rec.hop_limit = priv->broadcast->mcmember.hop_limit; 492 rec.hop_limit = priv->broadcast->mcmember.hop_limit;
519 } 493 }
520 494
521 mutex_lock(&mcast_mutex);
522 init_completion(&mcast->done);
523 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 495 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
496 init_completion(&mcast->done);
497 set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
498
524 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, 499 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
525 &rec, comp_mask, GFP_KERNEL, 500 &rec, comp_mask, GFP_KERNEL,
526 ipoib_mcast_join_complete, mcast); 501 ipoib_mcast_join_complete, mcast);
@@ -534,12 +509,13 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
534 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 509 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
535 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 510 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
536 511
512 mutex_lock(&mcast_mutex);
537 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 513 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
538 queue_delayed_work(ipoib_workqueue, 514 queue_delayed_work(ipoib_workqueue,
539 &priv->mcast_task, 515 &priv->mcast_task,
540 mcast->backoff * HZ); 516 mcast->backoff * HZ);
517 mutex_unlock(&mcast_mutex);
541 } 518 }
542 mutex_unlock(&mcast_mutex);
543} 519}
544 520
545void ipoib_mcast_join_task(struct work_struct *work) 521void ipoib_mcast_join_task(struct work_struct *work)
@@ -592,8 +568,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
592 } 568 }
593 569
594 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 570 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
595 if (IS_ERR_OR_NULL(priv->broadcast->mc) && 571 if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
596 !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
597 ipoib_mcast_join(dev, priv->broadcast, 0); 572 ipoib_mcast_join(dev, priv->broadcast, 0);
598 return; 573 return;
599 } 574 }
@@ -601,33 +576,23 @@ void ipoib_mcast_join_task(struct work_struct *work)
601 while (1) { 576 while (1) {
602 struct ipoib_mcast *mcast = NULL; 577 struct ipoib_mcast *mcast = NULL;
603 578
604 /*
605 * Need the mutex so our flags are consistent, need the
606 * priv->lock so we don't race with list removals in either
607 * mcast_dev_flush or mcast_restart_task
608 */
609 mutex_lock(&mcast_mutex);
610 spin_lock_irq(&priv->lock); 579 spin_lock_irq(&priv->lock);
611 list_for_each_entry(mcast, &priv->multicast_list, list) { 580 list_for_each_entry(mcast, &priv->multicast_list, list) {
612 if (IS_ERR_OR_NULL(mcast->mc) && 581 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
613 !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && 582 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
614 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 583 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
615 /* Found the next unjoined group */ 584 /* Found the next unjoined group */
616 break; 585 break;
617 } 586 }
618 } 587 }
619 spin_unlock_irq(&priv->lock); 588 spin_unlock_irq(&priv->lock);
620 mutex_unlock(&mcast_mutex);
621 589
622 if (&mcast->list == &priv->multicast_list) { 590 if (&mcast->list == &priv->multicast_list) {
623 /* All done */ 591 /* All done */
624 break; 592 break;
625 } 593 }
626 594
627 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 595 ipoib_mcast_join(dev, mcast, 1);
628 ipoib_mcast_sendonly_join(mcast);
629 else
630 ipoib_mcast_join(dev, mcast, 1);
631 return; 596 return;
632 } 597 }
633 598
@@ -673,9 +638,6 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
673 int ret = 0; 638 int ret = 0;
674 639
675 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 640 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
676 ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
677
678 if (!IS_ERR_OR_NULL(mcast->mc))
679 ib_sa_free_multicast(mcast->mc); 641 ib_sa_free_multicast(mcast->mc);
680 642
681 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 643 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -728,8 +690,6 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
728 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); 690 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
729 __ipoib_mcast_add(dev, mcast); 691 __ipoib_mcast_add(dev, mcast);
730 list_add_tail(&mcast->list, &priv->multicast_list); 692 list_add_tail(&mcast->list, &priv->multicast_list);
731 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
732 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
733 } 693 }
734 694
735 if (!mcast->ah) { 695 if (!mcast->ah) {
@@ -743,6 +703,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
743 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 703 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
744 ipoib_dbg_mcast(priv, "no address vector, " 704 ipoib_dbg_mcast(priv, "no address vector, "
745 "but multicast join already started\n"); 705 "but multicast join already started\n");
706 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
707 ipoib_mcast_sendonly_join(mcast);
746 708
747 /* 709 /*
748 * If lookup completes between here and out:, don't 710 * If lookup completes between here and out:, don't
@@ -804,7 +766,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
804 766
805 /* seperate between the wait to the leave*/ 767 /* seperate between the wait to the leave*/
806 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) 768 list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
807 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 769 if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
808 wait_for_completion(&mcast->done); 770 wait_for_completion(&mcast->done);
809 771
810 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 772 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {