aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruce Allan <bruce.w.allan@intel.com>2012-03-19 23:47:52 -0400
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2012-04-27 05:25:13 -0400
commitbdc125f73f3c810754e858b942d54faf4ba6bffe (patch)
treead071aff2b90fc56efcb9552bc503e515a7dff42
parent36ceeb43cecaf98a488b94bb318a1f3dd5a87033 (diff)
e1000e: 82579 potential system hang on stress when ME enabled
Previously, a workaround was added to address a hardware bug in the PCIm2PCI arbiter where a write by the driver of the Transmit/Receive Descriptor Tail register could happen concurrently with a write of any MAC CSR register by the Manageability Engine (ME) which could cause the Tail register to have an incorrect value. The arbiter is supposed to prevent the concurrent writes but there is a bug that can cause the Host (driver) access to be acknowledged later than it should. After further investigation, it was discovered that a driver write access of any MAC CSR register after being idle for some time can be lost when ME is accessing a MAC CSR register. When this happens, no further target access is claimed by the MAC which could hang the system. The workaround to check bit 24 in the FWSM register (set only when ME is accessing a MAC CSR register) and delay for a limited amount of time until it is cleared is now done for all driver writes of MAC CSR registers on 82579 with ME enabled. In the rare case when the driver is writing the Tail register and ME is accessing any MAC CSR register for a duration longer than the maximum delay, write the register and verify it has the correct value before continuing, otherwise reset the device. This patch also moves some pre-existing macros from the hardware-specific header file to the more appropriate generic driver header file. Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
-rw-r--r--drivers/net/ethernet/intel/e1000e/e1000.h37
-rw-r--r--drivers/net/ethernet/intel/e1000e/hw.h10
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c51
3 files changed, 53 insertions, 45 deletions
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index b83897f76ee3..1dc2067d3f28 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -735,9 +735,46 @@ static inline u32 __er32(struct e1000_hw *hw, unsigned long reg)
735 return readl(hw->hw_addr + reg); 735 return readl(hw->hw_addr + reg);
736} 736}
737 737
738#define er32(reg) __er32(hw, E1000_##reg)
739
740/**
741 * __ew32_prepare - prepare to write to MAC CSR register on certain parts
742 * @hw: pointer to the HW structure
743 *
744 * When updating the MAC CSR registers, the Manageability Engine (ME) could
745 * be accessing the registers at the same time. Normally, this is handled in
746 * h/w by an arbiter but on some parts there is a bug that acknowledges Host
747 * accesses later than it should which could result in the register to have
748 * an incorrect value. Workaround this by checking the FWSM register which
749 * has bit 24 set while ME is accessing MAC CSR registers, wait if it is set
750 * and try again a number of times.
751 **/
752static inline s32 __ew32_prepare(struct e1000_hw *hw)
753{
754 s32 i = E1000_ICH_FWSM_PCIM2PCI_COUNT;
755
756 while ((er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI) && --i)
757 udelay(50);
758
759 return i;
760}
761
738static inline void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val) 762static inline void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val)
739{ 763{
764 if (hw->adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
765 __ew32_prepare(hw);
766
740 writel(val, hw->hw_addr + reg); 767 writel(val, hw->hw_addr + reg);
741} 768}
742 769
770#define ew32(reg, val) __ew32(hw, E1000_##reg, (val))
771
772#define e1e_flush() er32(STATUS)
773
774#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) \
775 (__ew32((a), (reg + ((offset) << 2)), (value)))
776
777#define E1000_READ_REG_ARRAY(a, reg, offset) \
778 (readl((a)->hw_addr + reg + ((offset) << 2)))
779
743#endif /* _E1000_H_ */ 780#endif /* _E1000_H_ */
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index 923d3fd6ce11..7ca1b68e2e3d 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -36,16 +36,6 @@ struct e1000_adapter;
36 36
37#include "defines.h" 37#include "defines.h"
38 38
39#define er32(reg) __er32(hw, E1000_##reg)
40#define ew32(reg,val) __ew32(hw, E1000_##reg, (val))
41#define e1e_flush() er32(STATUS)
42
43#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) \
44 (writel((value), ((a)->hw_addr + reg + ((offset) << 2))))
45
46#define E1000_READ_REG_ARRAY(a, reg, offset) \
47 (readl((a)->hw_addr + reg + ((offset) << 2)))
48
49enum e1e_registers { 39enum e1e_registers {
50 E1000_CTRL = 0x00000, /* Device Control - RW */ 40 E1000_CTRL = 0x00000, /* Device Control - RW */
51 E1000_STATUS = 0x00008, /* Device Status - RO */ 41 E1000_STATUS = 0x00008, /* Device Status - RO */
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 851f7937db29..cdfb1d68fbd8 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -538,43 +538,15 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
538 adapter->hw_csum_good++; 538 adapter->hw_csum_good++;
539} 539}
540 540
541/**
542 * e1000e_update_tail_wa - helper function for e1000e_update_[rt]dt_wa()
543 * @hw: pointer to the HW structure
544 * @tail: address of tail descriptor register
545 * @i: value to write to tail descriptor register
546 *
547 * When updating the tail register, the ME could be accessing Host CSR
548 * registers at the same time. Normally, this is handled in h/w by an
549 * arbiter but on some parts there is a bug that acknowledges Host accesses
550 * later than it should which could result in the descriptor register to
551 * have an incorrect value. Workaround this by checking the FWSM register
552 * which has bit 24 set while ME is accessing Host CSR registers, wait
553 * if it is set and try again a number of times.
554 **/
555static inline s32 e1000e_update_tail_wa(struct e1000_hw *hw, void __iomem *tail,
556 unsigned int i)
557{
558 unsigned int j = 0;
559
560 while ((j++ < E1000_ICH_FWSM_PCIM2PCI_COUNT) &&
561 (er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI))
562 udelay(50);
563
564 writel(i, tail);
565
566 if ((j == E1000_ICH_FWSM_PCIM2PCI_COUNT) && (i != readl(tail)))
567 return E1000_ERR_SWFW_SYNC;
568
569 return 0;
570}
571
572static void e1000e_update_rdt_wa(struct e1000_ring *rx_ring, unsigned int i) 541static void e1000e_update_rdt_wa(struct e1000_ring *rx_ring, unsigned int i)
573{ 542{
574 struct e1000_adapter *adapter = rx_ring->adapter; 543 struct e1000_adapter *adapter = rx_ring->adapter;
575 struct e1000_hw *hw = &adapter->hw; 544 struct e1000_hw *hw = &adapter->hw;
545 s32 ret_val = __ew32_prepare(hw);
546
547 writel(i, rx_ring->tail);
576 548
577 if (e1000e_update_tail_wa(hw, rx_ring->tail, i)) { 549 if (unlikely(!ret_val && (i != readl(rx_ring->tail)))) {
578 u32 rctl = er32(RCTL); 550 u32 rctl = er32(RCTL);
579 ew32(RCTL, rctl & ~E1000_RCTL_EN); 551 ew32(RCTL, rctl & ~E1000_RCTL_EN);
580 e_err("ME firmware caused invalid RDT - resetting\n"); 552 e_err("ME firmware caused invalid RDT - resetting\n");
@@ -586,8 +558,11 @@ static void e1000e_update_tdt_wa(struct e1000_ring *tx_ring, unsigned int i)
586{ 558{
587 struct e1000_adapter *adapter = tx_ring->adapter; 559 struct e1000_adapter *adapter = tx_ring->adapter;
588 struct e1000_hw *hw = &adapter->hw; 560 struct e1000_hw *hw = &adapter->hw;
561 s32 ret_val = __ew32_prepare(hw);
589 562
590 if (e1000e_update_tail_wa(hw, tx_ring->tail, i)) { 563 writel(i, tx_ring->tail);
564
565 if (unlikely(!ret_val && (i != readl(tx_ring->tail)))) {
591 u32 tctl = er32(TCTL); 566 u32 tctl = er32(TCTL);
592 ew32(TCTL, tctl & ~E1000_TCTL_EN); 567 ew32(TCTL, tctl & ~E1000_TCTL_EN);
593 e_err("ME firmware caused invalid TDT - resetting\n"); 568 e_err("ME firmware caused invalid TDT - resetting\n");
@@ -1646,7 +1621,10 @@ static void e1000_clean_rx_ring(struct e1000_ring *rx_ring)
1646 adapter->flags2 &= ~FLAG2_IS_DISCARDING; 1621 adapter->flags2 &= ~FLAG2_IS_DISCARDING;
1647 1622
1648 writel(0, rx_ring->head); 1623 writel(0, rx_ring->head);
1649 writel(0, rx_ring->tail); 1624 if (rx_ring->adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
1625 e1000e_update_rdt_wa(rx_ring, 0);
1626 else
1627 writel(0, rx_ring->tail);
1650} 1628}
1651 1629
1652static void e1000e_downshift_workaround(struct work_struct *work) 1630static void e1000e_downshift_workaround(struct work_struct *work)
@@ -2319,7 +2297,10 @@ static void e1000_clean_tx_ring(struct e1000_ring *tx_ring)
2319 tx_ring->next_to_clean = 0; 2297 tx_ring->next_to_clean = 0;
2320 2298
2321 writel(0, tx_ring->head); 2299 writel(0, tx_ring->head);
2322 writel(0, tx_ring->tail); 2300 if (tx_ring->adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
2301 e1000e_update_tdt_wa(tx_ring, 0);
2302 else
2303 writel(0, tx_ring->tail);
2323} 2304}
2324 2305
2325/** 2306/**