diff options
author | Faisal Latif <faisal.latif@intel.com> | 2009-04-08 17:23:55 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2009-04-08 17:23:55 -0400 |
commit | 5962c2c8036b4dcf10ec6c481be656ae4700b664 (patch) | |
tree | a20bfcbb93e52f7a1dc161d7c6333ceadc5ba046 /drivers/infiniband/hw/nes/nes_cm.c | |
parent | 79fc3d7410c861c8ced5b81a5c3759f6bbf891dc (diff) |
RDMA/nes: Fix nes_nic_cm_xmit() error handling
We are getting crash or hung situation when we are running network
cable pull tests during RDMA traffic.
In schedule_nes_timer(), we return an error if nes_nic_cm_xmit()
returns failure. This is changed to success as skb is being put on
the timer routines to be processed later. In send_syn() case, we are
indicating connect failure once from nes_connect() and the other when
the rexmit retries expires.
The other issue is skb->users which we are incrementing before calling
nes_nic_cm_xmit() which calls dev_queue_xmit() but in case of failure
we are decrementing the skb->users at the same time putting the skb on
the rexmit path. Even if dev_queue_xmit() fails, the skb->users is
decremented already. We are removing the decrement of skb->users in
case of failure from both schedule_nes_timer() as well as from
nes_cm_timer_tick().
There is also extra check in nes_cm_timer_tick() for rexmit failure
which does a break from the loop is removed. This causes problem as
the other nodes have their cm_node->ref_count incremented and are not
processed.
Signed-off-by: Faisal Latif <faisal.latif@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/nes/nes_cm.c')
-rw-r--r-- | drivers/infiniband/hw/nes/nes_cm.c | 8 |
1 files changed, 1 insertions, 7 deletions
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index a09caf5b387d..dbd9a75474e3 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c | |||
@@ -446,8 +446,8 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, | |||
446 | if (ret != NETDEV_TX_OK) { | 446 | if (ret != NETDEV_TX_OK) { |
447 | nes_debug(NES_DBG_CM, "Error sending packet %p " | 447 | nes_debug(NES_DBG_CM, "Error sending packet %p " |
448 | "(jiffies = %lu)\n", new_send, jiffies); | 448 | "(jiffies = %lu)\n", new_send, jiffies); |
449 | atomic_dec(&new_send->skb->users); | ||
450 | new_send->timetosend = jiffies; | 449 | new_send->timetosend = jiffies; |
450 | ret = NETDEV_TX_OK; | ||
451 | } else { | 451 | } else { |
452 | cm_packets_sent++; | 452 | cm_packets_sent++; |
453 | if (!send_retrans) { | 453 | if (!send_retrans) { |
@@ -631,7 +631,6 @@ static void nes_cm_timer_tick(unsigned long pass) | |||
631 | nes_debug(NES_DBG_CM, "rexmit failed for " | 631 | nes_debug(NES_DBG_CM, "rexmit failed for " |
632 | "node=%p\n", cm_node); | 632 | "node=%p\n", cm_node); |
633 | cm_packets_bounced++; | 633 | cm_packets_bounced++; |
634 | atomic_dec(&send_entry->skb->users); | ||
635 | send_entry->retrycount--; | 634 | send_entry->retrycount--; |
636 | nexttimeout = jiffies + NES_SHORT_TIME; | 635 | nexttimeout = jiffies + NES_SHORT_TIME; |
637 | settimer = 1; | 636 | settimer = 1; |
@@ -667,11 +666,6 @@ static void nes_cm_timer_tick(unsigned long pass) | |||
667 | 666 | ||
668 | spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); | 667 | spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); |
669 | rem_ref_cm_node(cm_node->cm_core, cm_node); | 668 | rem_ref_cm_node(cm_node->cm_core, cm_node); |
670 | if (ret != NETDEV_TX_OK) { | ||
671 | nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n", | ||
672 | cm_node); | ||
673 | break; | ||
674 | } | ||
675 | } | 669 | } |
676 | 670 | ||
677 | if (settimer) { | 671 | if (settimer) { |