aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ixgbe
diff options
context:
space:
mode:
authorJohn Fastabend <john.r.fastabend@intel.com>2010-11-16 22:27:12 -0500
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2010-11-16 22:27:12 -0500
commitc84d324c770dc81acebc1042163da33c8ded2364 (patch)
treebed8d4eace15895f9a194f9b114ba6f2b6ca5578 /drivers/net/ixgbe
parente3de4b7bdfd2c06884c95cfb4ad4d64be046595e (diff)
ixgbe: rework Tx hang detection to fix reoccurring false Tx hangs
The Tx hang logic has been known to detect false hangs when the device is receiving pause frames or has delayed processing for some other reason. This patch makes the logic more robust and resolves these known issues. The old logic checked to see if the device was paused by querying the HW then the hang logic was aborted if the device was currently paused. This check was racy because the device could have been in the pause state any time up to this check. The other operation of the hang logic is to verify the Tx ring is still advancing the old logic checked the EOP timestamp. This is not sufficient to determine the ring is not advancing but only infers that it may be moving slowly. Here we add logic to track the number of completed Tx descriptors and use the adapter stats to check if any pause frames have been received since the previous Tx hang check. This way we avoid racing with the HW register and do not detect false hangs if the ring is advancing slowly. This patch is primarily the work of Jesse Brandeburg. I clean it up some and fixed the PFC checking. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Tested-by: Ross Brattain <ross.b.brattain@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net/ixgbe')
-rw-r--r--drivers/net/ixgbe/ixgbe.h4
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c250
2 files changed, 175 insertions, 79 deletions
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index ce43c9352681..2b8cbb3a81fa 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -149,6 +149,8 @@ struct ixgbe_queue_stats {
149struct ixgbe_tx_queue_stats { 149struct ixgbe_tx_queue_stats {
150 u64 restart_queue; 150 u64 restart_queue;
151 u64 tx_busy; 151 u64 tx_busy;
152 u64 completed;
153 u64 tx_done_old;
152}; 154};
153 155
154struct ixgbe_rx_queue_stats { 156struct ixgbe_rx_queue_stats {
@@ -162,6 +164,7 @@ struct ixgbe_rx_queue_stats {
162enum ixbge_ring_state_t { 164enum ixbge_ring_state_t {
163 __IXGBE_TX_FDIR_INIT_DONE, 165 __IXGBE_TX_FDIR_INIT_DONE,
164 __IXGBE_TX_DETECT_HANG, 166 __IXGBE_TX_DETECT_HANG,
167 __IXGBE_HANG_CHECK_ARMED,
165 __IXGBE_RX_PS_ENABLED, 168 __IXGBE_RX_PS_ENABLED,
166 __IXGBE_RX_RSC_ENABLED, 169 __IXGBE_RX_RSC_ENABLED,
167}; 170};
@@ -514,6 +517,7 @@ extern void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *,
514extern void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16); 517extern void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16);
515extern void ixgbe_write_eitr(struct ixgbe_q_vector *); 518extern void ixgbe_write_eitr(struct ixgbe_q_vector *);
516extern int ethtool_ioctl(struct ifreq *ifr); 519extern int ethtool_ioctl(struct ifreq *ifr);
520extern u8 ixgbe_dcb_txq_to_tc(struct ixgbe_adapter *adapter, u8 index);
517extern s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw); 521extern s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
518extern s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc); 522extern s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc);
519extern s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc); 523extern s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc);
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index def5c6e047cf..6e56f7b7c8fd 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -630,93 +630,166 @@ void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *tx_ring,
630} 630}
631 631
632/** 632/**
633 * ixgbe_tx_xon_state - check the tx ring xon state 633 * ixgbe_dcb_txq_to_tc - convert a reg index to a traffic class
634 * @adapter: the ixgbe adapter 634 * @adapter: driver private struct
635 * @tx_ring: the corresponding tx_ring 635 * @index: reg idx of queue to query (0-127)
636 * 636 *
637 * If not in DCB mode, checks TFCS.TXOFF, otherwise, find out the 637 * Helper function to determine the traffic index for a paticular
638 * corresponding TC of this tx_ring when checking TFCS. 638 * register index.
639 * 639 *
640 * Returns : true if in xon state (currently not paused) 640 * Returns : a tc index for use in range 0-7, or 0-3
641 */ 641 */
642static inline bool ixgbe_tx_xon_state(struct ixgbe_adapter *adapter, 642u8 ixgbe_dcb_txq_to_tc(struct ixgbe_adapter *adapter, u8 reg_idx)
643 struct ixgbe_ring *tx_ring)
644{ 643{
645 u32 txoff = IXGBE_TFCS_TXOFF; 644 int tc = -1;
645 int dcb_i = adapter->ring_feature[RING_F_DCB].indices;
646 646
647#ifdef CONFIG_IXGBE_DCB 647 /* if DCB is not enabled the queues have no TC */
648 if (adapter->dcb_cfg.pfc_mode_enable) { 648 if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
649 int tc; 649 return tc;
650 int dcb_i = adapter->ring_feature[RING_F_DCB].indices;
651 u8 reg_idx = tx_ring->reg_idx;
652 650
653 switch (adapter->hw.mac.type) { 651 /* check valid range */
654 case ixgbe_mac_82598EB: 652 if (reg_idx >= adapter->hw.mac.max_tx_queues)
655 tc = reg_idx >> 2; 653 return tc;
656 txoff = IXGBE_TFCS_TXOFF0; 654
655 switch (adapter->hw.mac.type) {
656 case ixgbe_mac_82598EB:
657 tc = reg_idx >> 2;
658 break;
659 default:
660 if (dcb_i != 4 && dcb_i != 8)
657 break; 661 break;
658 case ixgbe_mac_82599EB: 662
659 tc = 0; 663 /* if VMDq is enabled the lowest order bits determine TC */
660 txoff = IXGBE_TFCS_TXOFF; 664 if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED |
661 if (dcb_i == 8) { 665 IXGBE_FLAG_VMDQ_ENABLED)) {
662 /* TC0, TC1 */ 666 tc = reg_idx & (dcb_i - 1);
663 tc = reg_idx >> 5; 667 break;
664 if (tc == 2) /* TC2, TC3 */ 668 }
665 tc += (reg_idx - 64) >> 4; 669
666 else if (tc == 3) /* TC4, TC5, TC6, TC7 */ 670 /*
667 tc += 1 + ((reg_idx - 96) >> 3); 671 * Convert the reg_idx into the correct TC. This bitmask
668 } else if (dcb_i == 4) { 672 * targets the last full 32 ring traffic class and assigns
669 /* TC0, TC1 */ 673 * it a value of 1. From there the rest of the rings are
670 tc = reg_idx >> 6; 674 * based on shifting the mask further up to include the
671 if (tc == 1) { 675 * reg_idx / 16 and then reg_idx / 8. It assumes dcB_i
672 tc += (reg_idx - 64) >> 5; 676 * will only ever be 8 or 4 and that reg_idx will never
673 if (tc == 2) /* TC2, TC3 */ 677 * be greater then 128. The code without the power of 2
674 tc += (reg_idx - 96) >> 4; 678 * optimizations would be:
675 } 679 * (((reg_idx % 32) + 32) * dcb_i) >> (9 - reg_idx / 32)
676 } 680 */
681 tc = ((reg_idx & 0X1F) + 0x20) * dcb_i;
682 tc >>= 9 - (reg_idx >> 5);
683 }
684
685 return tc;
686}
687
688static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
689{
690 struct ixgbe_hw *hw = &adapter->hw;
691 struct ixgbe_hw_stats *hwstats = &adapter->stats;
692 u32 data = 0;
693 u32 xoff[8] = {0};
694 int i;
695
696 if ((hw->fc.current_mode == ixgbe_fc_full) ||
697 (hw->fc.current_mode == ixgbe_fc_rx_pause)) {
698 switch (hw->mac.type) {
699 case ixgbe_mac_82598EB:
700 data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
677 break; 701 break;
678 default: 702 default:
679 tc = 0; 703 data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
704 }
705 hwstats->lxoffrxc += data;
706
707 /* refill credits (no tx hang) if we received xoff */
708 if (!data)
709 return;
710
711 for (i = 0; i < adapter->num_tx_queues; i++)
712 clear_bit(__IXGBE_HANG_CHECK_ARMED,
713 &adapter->tx_ring[i]->state);
714 return;
715 } else if (!(adapter->dcb_cfg.pfc_mode_enable))
716 return;
717
718 /* update stats for each tc, only valid with PFC enabled */
719 for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
720 switch (hw->mac.type) {
721 case ixgbe_mac_82598EB:
722 xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
680 break; 723 break;
724 default:
725 xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
681 } 726 }
682 txoff <<= tc; 727 hwstats->pxoffrxc[i] += xoff[i];
728 }
729
730 /* disarm tx queues that have received xoff frames */
731 for (i = 0; i < adapter->num_tx_queues; i++) {
732 struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
733 u32 tc = ixgbe_dcb_txq_to_tc(adapter, tx_ring->reg_idx);
734
735 if (xoff[tc])
736 clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
683 } 737 }
684#endif
685 return IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & txoff;
686} 738}
687 739
688static inline bool ixgbe_check_tx_hang(struct ixgbe_adapter *adapter, 740static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring)
689 struct ixgbe_ring *tx_ring,
690 unsigned int eop)
691{ 741{
742 return ring->tx_stats.completed;
743}
744
745static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring)
746{
747 struct ixgbe_adapter *adapter = netdev_priv(ring->netdev);
692 struct ixgbe_hw *hw = &adapter->hw; 748 struct ixgbe_hw *hw = &adapter->hw;
693 749
694 /* Detect a transmit hang in hardware, this serializes the 750 u32 head = IXGBE_READ_REG(hw, IXGBE_TDH(ring->reg_idx));
695 * check with the clearing of time_stamp and movement of eop */ 751 u32 tail = IXGBE_READ_REG(hw, IXGBE_TDT(ring->reg_idx));
752
753 if (head != tail)
754 return (head < tail) ?
755 tail - head : (tail + ring->count - head);
756
757 return 0;
758}
759
760static inline bool ixgbe_check_tx_hang(struct ixgbe_ring *tx_ring)
761{
762 u32 tx_done = ixgbe_get_tx_completed(tx_ring);
763 u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
764 u32 tx_pending = ixgbe_get_tx_pending(tx_ring);
765 bool ret = false;
766
696 clear_check_for_tx_hang(tx_ring); 767 clear_check_for_tx_hang(tx_ring);
697 if (tx_ring->tx_buffer_info[eop].time_stamp && 768
698 time_after(jiffies, tx_ring->tx_buffer_info[eop].time_stamp + HZ) && 769 /*
699 ixgbe_tx_xon_state(adapter, tx_ring)) { 770 * Check for a hung queue, but be thorough. This verifies
700 /* detected Tx unit hang */ 771 * that a transmit has been completed since the previous
701 union ixgbe_adv_tx_desc *tx_desc; 772 * check AND there is at least one packet pending. The
702 tx_desc = IXGBE_TX_DESC_ADV(tx_ring, eop); 773 * ARMED bit is set to indicate a potential hang. The
703 e_err(drv, "Detected Tx Unit Hang\n" 774 * bit is cleared if a pause frame is received to remove
704 " Tx Queue <%d>\n" 775 * false hang detection due to PFC or 802.3x frames. By
705 " TDH, TDT <%x>, <%x>\n" 776 * requiring this to fail twice we avoid races with
706 " next_to_use <%x>\n" 777 * pfc clearing the ARMED bit and conditions where we
707 " next_to_clean <%x>\n" 778 * run the check_tx_hang logic with a transmit completion
708 "tx_buffer_info[next_to_clean]\n" 779 * pending but without time to complete it yet.
709 " time_stamp <%lx>\n" 780 */
710 " jiffies <%lx>\n", 781 if ((tx_done_old == tx_done) && tx_pending) {
711 tx_ring->queue_index, 782 /* make sure it is true for two checks in a row */
712 IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)), 783 ret = test_and_set_bit(__IXGBE_HANG_CHECK_ARMED,
713 IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)), 784 &tx_ring->state);
714 tx_ring->next_to_use, eop, 785 } else {
715 tx_ring->tx_buffer_info[eop].time_stamp, jiffies); 786 /* update completed stats and continue */
716 return true; 787 tx_ring->tx_stats.tx_done_old = tx_done;
788 /* reset the countdown */
789 clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
717 } 790 }
718 791
719 return false; 792 return ret;
720} 793}
721 794
722#define IXGBE_MAX_TXD_PWR 14 795#define IXGBE_MAX_TXD_PWR 14
@@ -772,6 +845,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
772 tx_buffer_info); 845 tx_buffer_info);
773 } 846 }
774 847
848 tx_ring->tx_stats.completed++;
775 eop = tx_ring->tx_buffer_info[i].next_to_watch; 849 eop = tx_ring->tx_buffer_info[i].next_to_watch;
776 eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop); 850 eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop);
777 } 851 }
@@ -784,11 +858,31 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
784 tx_ring->stats.bytes += total_bytes; 858 tx_ring->stats.bytes += total_bytes;
785 u64_stats_update_end(&tx_ring->syncp); 859 u64_stats_update_end(&tx_ring->syncp);
786 860
787 if (check_for_tx_hang(tx_ring) && 861 if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
788 ixgbe_check_tx_hang(adapter, tx_ring, i)) { 862 /* schedule immediate reset if we believe we hung */
863 struct ixgbe_hw *hw = &adapter->hw;
864 tx_desc = IXGBE_TX_DESC_ADV(tx_ring, eop);
865 e_err(drv, "Detected Tx Unit Hang\n"
866 " Tx Queue <%d>\n"
867 " TDH, TDT <%x>, <%x>\n"
868 " next_to_use <%x>\n"
869 " next_to_clean <%x>\n"
870 "tx_buffer_info[next_to_clean]\n"
871 " time_stamp <%lx>\n"
872 " jiffies <%lx>\n",
873 tx_ring->queue_index,
874 IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)),
875 IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)),
876 tx_ring->next_to_use, eop,
877 tx_ring->tx_buffer_info[eop].time_stamp, jiffies);
878
879 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
880
881 e_info(probe,
882 "tx hang %d detected on queue %d, resetting adapter\n",
883 adapter->tx_timeout_count + 1, tx_ring->queue_index);
884
789 /* schedule immediate reset if we believe we hung */ 885 /* schedule immediate reset if we believe we hung */
790 e_info(probe, "tx hang %d detected, resetting "
791 "adapter\n", adapter->tx_timeout_count + 1);
792 ixgbe_tx_timeout(adapter->netdev); 886 ixgbe_tx_timeout(adapter->netdev);
793 887
794 /* the adapter is about to reset, no point in enabling stuff */ 888 /* the adapter is about to reset, no point in enabling stuff */
@@ -2599,6 +2693,8 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
2599 ring->atr_sample_rate = 0; 2693 ring->atr_sample_rate = 0;
2600 } 2694 }
2601 2695
2696 clear_bit(__IXGBE_HANG_CHECK_ARMED, &ring->state);
2697
2602 /* enable queue */ 2698 /* enable queue */
2603 txdctl |= IXGBE_TXDCTL_ENABLE; 2699 txdctl |= IXGBE_TXDCTL_ENABLE;
2604 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), txdctl); 2700 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), txdctl);
@@ -4034,6 +4130,8 @@ static void ixgbe_tx_timeout(struct net_device *netdev)
4034{ 4130{
4035 struct ixgbe_adapter *adapter = netdev_priv(netdev); 4131 struct ixgbe_adapter *adapter = netdev_priv(netdev);
4036 4132
4133 adapter->tx_timeout_count++;
4134
4037 /* Do the reset outside of interrupt context */ 4135 /* Do the reset outside of interrupt context */
4038 schedule_work(&adapter->reset_task); 4136 schedule_work(&adapter->reset_task);
4039} 4137}
@@ -4048,8 +4146,6 @@ static void ixgbe_reset_task(struct work_struct *work)
4048 test_bit(__IXGBE_RESETTING, &adapter->state)) 4146 test_bit(__IXGBE_RESETTING, &adapter->state))
4049 return; 4147 return;
4050 4148
4051 adapter->tx_timeout_count++;
4052
4053 ixgbe_dump(adapter); 4149 ixgbe_dump(adapter);
4054 netdev_err(adapter->netdev, "Reset adapter\n"); 4150 netdev_err(adapter->netdev, "Reset adapter\n");
4055 ixgbe_reinit_locked(adapter); 4151 ixgbe_reinit_locked(adapter);
@@ -5597,14 +5693,10 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
5597 case ixgbe_mac_82598EB: 5693 case ixgbe_mac_82598EB:
5598 hwstats->pxonrxc[i] += 5694 hwstats->pxonrxc[i] +=
5599 IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); 5695 IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5600 hwstats->pxoffrxc[i] +=
5601 IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5602 break; 5696 break;
5603 case ixgbe_mac_82599EB: 5697 case ixgbe_mac_82599EB:
5604 hwstats->pxonrxc[i] += 5698 hwstats->pxonrxc[i] +=
5605 IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); 5699 IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5606 hwstats->pxoffrxc[i] +=
5607 IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
5608 break; 5700 break;
5609 default: 5701 default:
5610 break; 5702 break;
@@ -5616,11 +5708,12 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
5616 /* work around hardware counting issue */ 5708 /* work around hardware counting issue */
5617 hwstats->gprc -= missed_rx; 5709 hwstats->gprc -= missed_rx;
5618 5710
5711 ixgbe_update_xoff_received(adapter);
5712
5619 /* 82598 hardware only has a 32 bit counter in the high register */ 5713 /* 82598 hardware only has a 32 bit counter in the high register */
5620 switch (hw->mac.type) { 5714 switch (hw->mac.type) {
5621 case ixgbe_mac_82598EB: 5715 case ixgbe_mac_82598EB:
5622 hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); 5716 hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5623 hwstats->lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5624 hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); 5717 hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5625 hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); 5718 hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5626 hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH); 5719 hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH);
@@ -5633,7 +5726,6 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
5633 hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORL); 5726 hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORL);
5634 IXGBE_READ_REG(hw, IXGBE_TORH); /* to clear */ 5727 IXGBE_READ_REG(hw, IXGBE_TORH); /* to clear */
5635 hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); 5728 hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5636 hwstats->lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5637 hwstats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH); 5729 hwstats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
5638 hwstats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS); 5730 hwstats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
5639#ifdef IXGBE_FCOE 5731#ifdef IXGBE_FCOE