diff options
author | Alexander Duyck <alexander.h.duyck@intel.com> | 2013-01-31 02:15:51 -0500 |
---|---|---|
committer | Jeff Kirsher <jeffrey.t.kirsher@intel.com> | 2013-02-16 00:46:51 -0500 |
commit | 3eb1a40f4b6d0c41feb60e55b53d11f70fc8ee8e (patch) | |
tree | 832de933f9b809bb53ec4071442ebb9d7e300aeb /drivers/net/ethernet/intel/igbvf/netdev.c | |
parent | e792cd916cf74315bddb2c8b2323ef498cd7bfde (diff) |
igbvf: Make next_to_watch a pointer and adjust memory barriers to avoid races
This change is meant to address several race issues that become possible
because next_to_watch could possibly be set to a value that shows that the
descriptor is done when it is not. In order to correct that we instead make
next_to_watch a pointer that is set to NULL during cleanup, and set to the
eop_desc after the descriptor rings have been written.
To enforce proper ordering the next_to_watch pointer is not set until after
a wmb writing the values to the last descriptor in a transmit. In order to
guarantee that the descriptor is not read until after the eop_desc we use the
read_barrier_depends which is only really necessary on the alpha architecture.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net/ethernet/intel/igbvf/netdev.c')
-rw-r--r-- | drivers/net/ethernet/intel/igbvf/netdev.c | 52 |
1 files changed, 30 insertions, 22 deletions
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index f53f7136e508..d60cd4393415 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c | |||
@@ -797,20 +797,31 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring) | |||
797 | struct sk_buff *skb; | 797 | struct sk_buff *skb; |
798 | union e1000_adv_tx_desc *tx_desc, *eop_desc; | 798 | union e1000_adv_tx_desc *tx_desc, *eop_desc; |
799 | unsigned int total_bytes = 0, total_packets = 0; | 799 | unsigned int total_bytes = 0, total_packets = 0; |
800 | unsigned int i, eop, count = 0; | 800 | unsigned int i, count = 0; |
801 | bool cleaned = false; | 801 | bool cleaned = false; |
802 | 802 | ||
803 | i = tx_ring->next_to_clean; | 803 | i = tx_ring->next_to_clean; |
804 | eop = tx_ring->buffer_info[i].next_to_watch; | 804 | buffer_info = &tx_ring->buffer_info[i]; |
805 | eop_desc = IGBVF_TX_DESC_ADV(*tx_ring, eop); | 805 | eop_desc = buffer_info->next_to_watch; |
806 | |||
807 | do { | ||
808 | /* if next_to_watch is not set then there is no work pending */ | ||
809 | if (!eop_desc) | ||
810 | break; | ||
811 | |||
812 | /* prevent any other reads prior to eop_desc */ | ||
813 | read_barrier_depends(); | ||
814 | |||
815 | /* if DD is not set pending work has not been completed */ | ||
816 | if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) | ||
817 | break; | ||
818 | |||
819 | /* clear next_to_watch to prevent false hangs */ | ||
820 | buffer_info->next_to_watch = NULL; | ||
806 | 821 | ||
807 | while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) && | ||
808 | (count < tx_ring->count)) { | ||
809 | rmb(); /* read buffer_info after eop_desc status */ | ||
810 | for (cleaned = false; !cleaned; count++) { | 822 | for (cleaned = false; !cleaned; count++) { |
811 | tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i); | 823 | tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i); |
812 | buffer_info = &tx_ring->buffer_info[i]; | 824 | cleaned = (tx_desc == eop_desc); |
813 | cleaned = (i == eop); | ||
814 | skb = buffer_info->skb; | 825 | skb = buffer_info->skb; |
815 | 826 | ||
816 | if (skb) { | 827 | if (skb) { |
@@ -831,10 +842,12 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring) | |||
831 | i++; | 842 | i++; |
832 | if (i == tx_ring->count) | 843 | if (i == tx_ring->count) |
833 | i = 0; | 844 | i = 0; |
845 | |||
846 | buffer_info = &tx_ring->buffer_info[i]; | ||
834 | } | 847 | } |
835 | eop = tx_ring->buffer_info[i].next_to_watch; | 848 | |
836 | eop_desc = IGBVF_TX_DESC_ADV(*tx_ring, eop); | 849 | eop_desc = buffer_info->next_to_watch; |
837 | } | 850 | } while (count < tx_ring->count); |
838 | 851 | ||
839 | tx_ring->next_to_clean = i; | 852 | tx_ring->next_to_clean = i; |
840 | 853 | ||
@@ -1961,7 +1974,6 @@ static int igbvf_tso(struct igbvf_adapter *adapter, | |||
1961 | context_desc->seqnum_seed = 0; | 1974 | context_desc->seqnum_seed = 0; |
1962 | 1975 | ||
1963 | buffer_info->time_stamp = jiffies; | 1976 | buffer_info->time_stamp = jiffies; |
1964 | buffer_info->next_to_watch = i; | ||
1965 | buffer_info->dma = 0; | 1977 | buffer_info->dma = 0; |
1966 | i++; | 1978 | i++; |
1967 | if (i == tx_ring->count) | 1979 | if (i == tx_ring->count) |
@@ -2021,7 +2033,6 @@ static inline bool igbvf_tx_csum(struct igbvf_adapter *adapter, | |||
2021 | context_desc->mss_l4len_idx = 0; | 2033 | context_desc->mss_l4len_idx = 0; |
2022 | 2034 | ||
2023 | buffer_info->time_stamp = jiffies; | 2035 | buffer_info->time_stamp = jiffies; |
2024 | buffer_info->next_to_watch = i; | ||
2025 | buffer_info->dma = 0; | 2036 | buffer_info->dma = 0; |
2026 | i++; | 2037 | i++; |
2027 | if (i == tx_ring->count) | 2038 | if (i == tx_ring->count) |
@@ -2061,8 +2072,7 @@ static int igbvf_maybe_stop_tx(struct net_device *netdev, int size) | |||
2061 | 2072 | ||
2062 | static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, | 2073 | static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, |
2063 | struct igbvf_ring *tx_ring, | 2074 | struct igbvf_ring *tx_ring, |
2064 | struct sk_buff *skb, | 2075 | struct sk_buff *skb) |
2065 | unsigned int first) | ||
2066 | { | 2076 | { |
2067 | struct igbvf_buffer *buffer_info; | 2077 | struct igbvf_buffer *buffer_info; |
2068 | struct pci_dev *pdev = adapter->pdev; | 2078 | struct pci_dev *pdev = adapter->pdev; |
@@ -2077,7 +2087,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, | |||
2077 | buffer_info->length = len; | 2087 | buffer_info->length = len; |
2078 | /* set time_stamp *before* dma to help avoid a possible race */ | 2088 | /* set time_stamp *before* dma to help avoid a possible race */ |
2079 | buffer_info->time_stamp = jiffies; | 2089 | buffer_info->time_stamp = jiffies; |
2080 | buffer_info->next_to_watch = i; | ||
2081 | buffer_info->mapped_as_page = false; | 2090 | buffer_info->mapped_as_page = false; |
2082 | buffer_info->dma = dma_map_single(&pdev->dev, skb->data, len, | 2091 | buffer_info->dma = dma_map_single(&pdev->dev, skb->data, len, |
2083 | DMA_TO_DEVICE); | 2092 | DMA_TO_DEVICE); |
@@ -2100,7 +2109,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, | |||
2100 | BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD); | 2109 | BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD); |
2101 | buffer_info->length = len; | 2110 | buffer_info->length = len; |
2102 | buffer_info->time_stamp = jiffies; | 2111 | buffer_info->time_stamp = jiffies; |
2103 | buffer_info->next_to_watch = i; | ||
2104 | buffer_info->mapped_as_page = true; | 2112 | buffer_info->mapped_as_page = true; |
2105 | buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, 0, len, | 2113 | buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, 0, len, |
2106 | DMA_TO_DEVICE); | 2114 | DMA_TO_DEVICE); |
@@ -2109,7 +2117,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, | |||
2109 | } | 2117 | } |
2110 | 2118 | ||
2111 | tx_ring->buffer_info[i].skb = skb; | 2119 | tx_ring->buffer_info[i].skb = skb; |
2112 | tx_ring->buffer_info[first].next_to_watch = i; | ||
2113 | 2120 | ||
2114 | return ++count; | 2121 | return ++count; |
2115 | 2122 | ||
@@ -2120,7 +2127,6 @@ dma_error: | |||
2120 | buffer_info->dma = 0; | 2127 | buffer_info->dma = 0; |
2121 | buffer_info->time_stamp = 0; | 2128 | buffer_info->time_stamp = 0; |
2122 | buffer_info->length = 0; | 2129 | buffer_info->length = 0; |
2123 | buffer_info->next_to_watch = 0; | ||
2124 | buffer_info->mapped_as_page = false; | 2130 | buffer_info->mapped_as_page = false; |
2125 | if (count) | 2131 | if (count) |
2126 | count--; | 2132 | count--; |
@@ -2139,7 +2145,8 @@ dma_error: | |||
2139 | 2145 | ||
2140 | static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter, | 2146 | static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter, |
2141 | struct igbvf_ring *tx_ring, | 2147 | struct igbvf_ring *tx_ring, |
2142 | int tx_flags, int count, u32 paylen, | 2148 | int tx_flags, int count, |
2149 | unsigned int first, u32 paylen, | ||
2143 | u8 hdr_len) | 2150 | u8 hdr_len) |
2144 | { | 2151 | { |
2145 | union e1000_adv_tx_desc *tx_desc = NULL; | 2152 | union e1000_adv_tx_desc *tx_desc = NULL; |
@@ -2189,6 +2196,7 @@ static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter, | |||
2189 | * such as IA-64). */ | 2196 | * such as IA-64). */ |
2190 | wmb(); | 2197 | wmb(); |
2191 | 2198 | ||
2199 | tx_ring->buffer_info[first].next_to_watch = tx_desc; | ||
2192 | tx_ring->next_to_use = i; | 2200 | tx_ring->next_to_use = i; |
2193 | writel(i, adapter->hw.hw_addr + tx_ring->tail); | 2201 | writel(i, adapter->hw.hw_addr + tx_ring->tail); |
2194 | /* we need this if more than one processor can write to our tail | 2202 | /* we need this if more than one processor can write to our tail |
@@ -2255,11 +2263,11 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb, | |||
2255 | * count reflects descriptors mapped, if 0 then mapping error | 2263 | * count reflects descriptors mapped, if 0 then mapping error |
2256 | * has occurred and we need to rewind the descriptor queue | 2264 | * has occurred and we need to rewind the descriptor queue |
2257 | */ | 2265 | */ |
2258 | count = igbvf_tx_map_adv(adapter, tx_ring, skb, first); | 2266 | count = igbvf_tx_map_adv(adapter, tx_ring, skb); |
2259 | 2267 | ||
2260 | if (count) { | 2268 | if (count) { |
2261 | igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count, | 2269 | igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count, |
2262 | skb->len, hdr_len); | 2270 | first, skb->len, hdr_len); |
2263 | /* Make sure there is space in the ring for the next send. */ | 2271 | /* Make sure there is space in the ring for the next send. */ |
2264 | igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4); | 2272 | igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4); |
2265 | } else { | 2273 | } else { |