aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorFaisal Latif <faisal.latif@intel.com>2009-04-08 17:23:55 -0400
committerRoland Dreier <rolandd@cisco.com>2009-04-08 17:23:55 -0400
commit5962c2c8036b4dcf10ec6c481be656ae4700b664 (patch)
treea20bfcbb93e52f7a1dc161d7c6333ceadc5ba046 /drivers/infiniband
parent79fc3d7410c861c8ced5b81a5c3759f6bbf891dc (diff)
RDMA/nes: Fix nes_nic_cm_xmit() error handling
We are getting crash or hung situation when we are running network cable pull tests during RDMA traffic. In schedule_nes_timer(), we return an error if nes_nic_cm_xmit() returns failure. This is changed to success as skb is being put on the timer routines to be processed later. In send_syn() case, we are indicating connect failure once from nes_connect() and the other when the rexmit retries expires. The other issue is skb->users which we are incrementing before calling nes_nic_cm_xmit() which calls dev_queue_xmit() but in case of failure we are decrementing the skb->users at the same time putting the skb on the rexmit path. Even if dev_queue_xmit() fails, the skb->users is decremented already. We are removing the decrement of skb->users in case of failure from both schedule_nes_timer() as well as from nes_cm_timer_tick(). There is also extra check in nes_cm_timer_tick() for rexmit failure which does a break from the loop is removed. This causes problem as the other nodes have their cm_node->ref_count incremented and are not processed. Signed-off-by: Faisal Latif <faisal.latif@intel.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c8
1 files changed, 1 insertions, 7 deletions
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index a09caf5b387d..dbd9a75474e3 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -446,8 +446,8 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
446 if (ret != NETDEV_TX_OK) { 446 if (ret != NETDEV_TX_OK) {
447 nes_debug(NES_DBG_CM, "Error sending packet %p " 447 nes_debug(NES_DBG_CM, "Error sending packet %p "
448 "(jiffies = %lu)\n", new_send, jiffies); 448 "(jiffies = %lu)\n", new_send, jiffies);
449 atomic_dec(&new_send->skb->users);
450 new_send->timetosend = jiffies; 449 new_send->timetosend = jiffies;
450 ret = NETDEV_TX_OK;
451 } else { 451 } else {
452 cm_packets_sent++; 452 cm_packets_sent++;
453 if (!send_retrans) { 453 if (!send_retrans) {
@@ -631,7 +631,6 @@ static void nes_cm_timer_tick(unsigned long pass)
631 nes_debug(NES_DBG_CM, "rexmit failed for " 631 nes_debug(NES_DBG_CM, "rexmit failed for "
632 "node=%p\n", cm_node); 632 "node=%p\n", cm_node);
633 cm_packets_bounced++; 633 cm_packets_bounced++;
634 atomic_dec(&send_entry->skb->users);
635 send_entry->retrycount--; 634 send_entry->retrycount--;
636 nexttimeout = jiffies + NES_SHORT_TIME; 635 nexttimeout = jiffies + NES_SHORT_TIME;
637 settimer = 1; 636 settimer = 1;
@@ -667,11 +666,6 @@ static void nes_cm_timer_tick(unsigned long pass)
667 666
668 spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); 667 spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
669 rem_ref_cm_node(cm_node->cm_core, cm_node); 668 rem_ref_cm_node(cm_node->cm_core, cm_node);
670 if (ret != NETDEV_TX_OK) {
671 nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n",
672 cm_node);
673 break;
674 }
675 } 669 }
676 670
677 if (settimer) { 671 if (settimer) {