aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp/ipoib/ipoib_main.c
diff options
context:
space:
mode:
authorMoni Shoua <monis@Voltaire.COM>2008-07-15 02:48:49 -0400
committerRoland Dreier <rolandd@cisco.com>2008-07-15 02:48:49 -0400
commitee1e2c82c245a5fb2864e9dbcdaab3390fde3fcc (patch)
tree2bd6686dcee9524352c1afce3cb772373ec83d5f /drivers/infiniband/ulp/ipoib/ipoib_main.c
parent038919f29682b00ea95506e959210fc72d1aaf64 (diff)
IPoIB: Refresh paths instead of flushing them on SM change events
The patch tries to solve the problem of device going down and paths being flushed on an SM change event. The method is to mark the paths as candidates for refresh (by setting the new valid flag to 0), and wait for an ARP probe a new path record query. The solution requires a different and less intrusive handling of SM change event. For that, the second argument of the flush function changes its meaning from a boolean flag to a level. In most cases, SM failover doesn't cause LID change so traffic won't stop. In the rare cases of LID change, the remote host (the one that hadn't changed its LID) will lose connectivity until paths are refreshed. This is no worse than the current state. In fact, preventing the device from going down saves packets that otherwise would be lost. Signed-off-by: Moni Levy <monil@voltaire.com> Signed-off-by: Moni Shoua <monis@voltaire.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/ulp/ipoib/ipoib_main.c')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c44
1 files changed, 40 insertions, 4 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index fead88f7fb17..b3fd7e8333cf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -357,6 +357,23 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
357 357
358#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 358#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
359 359
360void ipoib_mark_paths_invalid(struct net_device *dev)
361{
362 struct ipoib_dev_priv *priv = netdev_priv(dev);
363 struct ipoib_path *path, *tp;
364
365 spin_lock_irq(&priv->lock);
366
367 list_for_each_entry_safe(path, tp, &priv->path_list, list) {
368 ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n",
369 be16_to_cpu(path->pathrec.dlid),
370 IPOIB_GID_ARG(path->pathrec.dgid));
371 path->valid = 0;
372 }
373
374 spin_unlock_irq(&priv->lock);
375}
376
360void ipoib_flush_paths(struct net_device *dev) 377void ipoib_flush_paths(struct net_device *dev)
361{ 378{
362 struct ipoib_dev_priv *priv = netdev_priv(dev); 379 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -393,6 +410,7 @@ static void path_rec_completion(int status,
393 struct net_device *dev = path->dev; 410 struct net_device *dev = path->dev;
394 struct ipoib_dev_priv *priv = netdev_priv(dev); 411 struct ipoib_dev_priv *priv = netdev_priv(dev);
395 struct ipoib_ah *ah = NULL; 412 struct ipoib_ah *ah = NULL;
413 struct ipoib_ah *old_ah;
396 struct ipoib_neigh *neigh, *tn; 414 struct ipoib_neigh *neigh, *tn;
397 struct sk_buff_head skqueue; 415 struct sk_buff_head skqueue;
398 struct sk_buff *skb; 416 struct sk_buff *skb;
@@ -416,6 +434,7 @@ static void path_rec_completion(int status,
416 434
417 spin_lock_irqsave(&priv->lock, flags); 435 spin_lock_irqsave(&priv->lock, flags);
418 436
437 old_ah = path->ah;
419 path->ah = ah; 438 path->ah = ah;
420 439
421 if (ah) { 440 if (ah) {
@@ -428,6 +447,17 @@ static void path_rec_completion(int status,
428 __skb_queue_tail(&skqueue, skb); 447 __skb_queue_tail(&skqueue, skb);
429 448
430 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { 449 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
450 if (neigh->ah) {
451 WARN_ON(neigh->ah != old_ah);
452 /*
453 * Dropping the ah reference inside
454 * priv->lock is safe here, because we
455 * will hold one more reference from
456 * the original value of path->ah (ie
457 * old_ah).
458 */
459 ipoib_put_ah(neigh->ah);
460 }
431 kref_get(&path->ah->ref); 461 kref_get(&path->ah->ref);
432 neigh->ah = path->ah; 462 neigh->ah = path->ah;
433 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, 463 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
@@ -450,6 +480,7 @@ static void path_rec_completion(int status,
450 while ((skb = __skb_dequeue(&neigh->queue))) 480 while ((skb = __skb_dequeue(&neigh->queue)))
451 __skb_queue_tail(&skqueue, skb); 481 __skb_queue_tail(&skqueue, skb);
452 } 482 }
483 path->valid = 1;
453 } 484 }
454 485
455 path->query = NULL; 486 path->query = NULL;
@@ -457,6 +488,9 @@ static void path_rec_completion(int status,
457 488
458 spin_unlock_irqrestore(&priv->lock, flags); 489 spin_unlock_irqrestore(&priv->lock, flags);
459 490
491 if (old_ah)
492 ipoib_put_ah(old_ah);
493
460 while ((skb = __skb_dequeue(&skqueue))) { 494 while ((skb = __skb_dequeue(&skqueue))) {
461 skb->dev = dev; 495 skb->dev = dev;
462 if (dev_queue_xmit(skb)) 496 if (dev_queue_xmit(skb))
@@ -630,8 +664,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
630 spin_lock(&priv->lock); 664 spin_lock(&priv->lock);
631 665
632 path = __path_find(dev, phdr->hwaddr + 4); 666 path = __path_find(dev, phdr->hwaddr + 4);
633 if (!path) { 667 if (!path || !path->valid) {
634 path = path_rec_create(dev, phdr->hwaddr + 4); 668 if (!path)
669 path = path_rec_create(dev, phdr->hwaddr + 4);
635 if (path) { 670 if (path) {
636 /* put pseudoheader back on for next time */ 671 /* put pseudoheader back on for next time */
637 skb_push(skb, sizeof *phdr); 672 skb_push(skb, sizeof *phdr);
@@ -1046,9 +1081,10 @@ static void ipoib_setup(struct net_device *dev)
1046 INIT_LIST_HEAD(&priv->multicast_list); 1081 INIT_LIST_HEAD(&priv->multicast_list);
1047 1082
1048 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); 1083 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
1049 INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
1050 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 1084 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
1051 INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); 1085 INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
1086 INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal);
1087 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
1052 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 1088 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
1053 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 1089 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
1054} 1090}