diff options
author | Alexander Duyck <alexander.h.duyck@intel.com> | 2013-01-31 02:43:22 -0500 |
---|---|---|
committer | Jeff Kirsher <jeffrey.t.kirsher@intel.com> | 2013-03-08 03:03:26 -0500 |
commit | e757e3e198795bfc56a28b41c494bcb27c0ee2ab (patch) | |
tree | 3a1996635f2c530e798f34f495bab308199b42ba | |
parent | 7f0e44ac9f7f12a2519bfed9ea4df3c1471bd8bb (diff) |
ixgbevf: Make next_to_watch a pointer and adjust memory barriers to avoid races
This change is meant to address several race issues that become possible
because next_to_watch could possibly be set to a value that shows that the
descriptor is done when it is not. In order to correct that we instead make
next_to_watch a pointer that is set to NULL during cleanup, and set to the
eop_desc after the descriptor rings have been written.
To enforce proper ordering the next_to_watch pointer is not set until after
a wmb writing the values to the last descriptor in a transmit. In order to
guarantee that the descriptor is not read until after the eop_desc we use the
read_barrier_depends which is only really necessary on the alpha architecture.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
-rw-r--r-- | drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 71 |
2 files changed, 40 insertions, 33 deletions
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index fc0af9a3bb35..fff0d9867529 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | |||
@@ -44,8 +44,8 @@ struct ixgbevf_tx_buffer { | |||
44 | struct sk_buff *skb; | 44 | struct sk_buff *skb; |
45 | dma_addr_t dma; | 45 | dma_addr_t dma; |
46 | unsigned long time_stamp; | 46 | unsigned long time_stamp; |
47 | union ixgbe_adv_tx_desc *next_to_watch; | ||
47 | u16 length; | 48 | u16 length; |
48 | u16 next_to_watch; | ||
49 | u16 mapped_as_page; | 49 | u16 mapped_as_page; |
50 | }; | 50 | }; |
51 | 51 | ||
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index c3db6cd69b68..20736eb567d8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | |||
@@ -190,28 +190,37 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, | |||
190 | struct ixgbevf_adapter *adapter = q_vector->adapter; | 190 | struct ixgbevf_adapter *adapter = q_vector->adapter; |
191 | union ixgbe_adv_tx_desc *tx_desc, *eop_desc; | 191 | union ixgbe_adv_tx_desc *tx_desc, *eop_desc; |
192 | struct ixgbevf_tx_buffer *tx_buffer_info; | 192 | struct ixgbevf_tx_buffer *tx_buffer_info; |
193 | unsigned int i, eop, count = 0; | 193 | unsigned int i, count = 0; |
194 | unsigned int total_bytes = 0, total_packets = 0; | 194 | unsigned int total_bytes = 0, total_packets = 0; |
195 | 195 | ||
196 | if (test_bit(__IXGBEVF_DOWN, &adapter->state)) | 196 | if (test_bit(__IXGBEVF_DOWN, &adapter->state)) |
197 | return true; | 197 | return true; |
198 | 198 | ||
199 | i = tx_ring->next_to_clean; | 199 | i = tx_ring->next_to_clean; |
200 | eop = tx_ring->tx_buffer_info[i].next_to_watch; | 200 | tx_buffer_info = &tx_ring->tx_buffer_info[i]; |
201 | eop_desc = IXGBEVF_TX_DESC(tx_ring, eop); | 201 | eop_desc = tx_buffer_info->next_to_watch; |
202 | 202 | ||
203 | while ((eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)) && | 203 | do { |
204 | (count < tx_ring->count)) { | ||
205 | bool cleaned = false; | 204 | bool cleaned = false; |
206 | rmb(); /* read buffer_info after eop_desc */ | 205 | |
207 | /* eop could change between read and DD-check */ | 206 | /* if next_to_watch is not set then there is no work pending */ |
208 | if (unlikely(eop != tx_ring->tx_buffer_info[i].next_to_watch)) | 207 | if (!eop_desc) |
209 | goto cont_loop; | 208 | break; |
209 | |||
210 | /* prevent any other reads prior to eop_desc */ | ||
211 | read_barrier_depends(); | ||
212 | |||
213 | /* if DD is not set pending work has not been completed */ | ||
214 | if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) | ||
215 | break; | ||
216 | |||
217 | /* clear next_to_watch to prevent false hangs */ | ||
218 | tx_buffer_info->next_to_watch = NULL; | ||
219 | |||
210 | for ( ; !cleaned; count++) { | 220 | for ( ; !cleaned; count++) { |
211 | struct sk_buff *skb; | 221 | struct sk_buff *skb; |
212 | tx_desc = IXGBEVF_TX_DESC(tx_ring, i); | 222 | tx_desc = IXGBEVF_TX_DESC(tx_ring, i); |
213 | tx_buffer_info = &tx_ring->tx_buffer_info[i]; | 223 | cleaned = (tx_desc == eop_desc); |
214 | cleaned = (i == eop); | ||
215 | skb = tx_buffer_info->skb; | 224 | skb = tx_buffer_info->skb; |
216 | 225 | ||
217 | if (cleaned && skb) { | 226 | if (cleaned && skb) { |
@@ -234,12 +243,12 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, | |||
234 | i++; | 243 | i++; |
235 | if (i == tx_ring->count) | 244 | if (i == tx_ring->count) |
236 | i = 0; | 245 | i = 0; |
246 | |||
247 | tx_buffer_info = &tx_ring->tx_buffer_info[i]; | ||
237 | } | 248 | } |
238 | 249 | ||
239 | cont_loop: | 250 | eop_desc = tx_buffer_info->next_to_watch; |
240 | eop = tx_ring->tx_buffer_info[i].next_to_watch; | 251 | } while (count < tx_ring->count); |
241 | eop_desc = IXGBEVF_TX_DESC(tx_ring, eop); | ||
242 | } | ||
243 | 252 | ||
244 | tx_ring->next_to_clean = i; | 253 | tx_ring->next_to_clean = i; |
245 | 254 | ||
@@ -2806,8 +2815,7 @@ static bool ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, | |||
2806 | } | 2815 | } |
2807 | 2816 | ||
2808 | static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, | 2817 | static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, |
2809 | struct sk_buff *skb, u32 tx_flags, | 2818 | struct sk_buff *skb, u32 tx_flags) |
2810 | unsigned int first) | ||
2811 | { | 2819 | { |
2812 | struct ixgbevf_tx_buffer *tx_buffer_info; | 2820 | struct ixgbevf_tx_buffer *tx_buffer_info; |
2813 | unsigned int len; | 2821 | unsigned int len; |
@@ -2832,7 +2840,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, | |||
2832 | size, DMA_TO_DEVICE); | 2840 | size, DMA_TO_DEVICE); |
2833 | if (dma_mapping_error(tx_ring->dev, tx_buffer_info->dma)) | 2841 | if (dma_mapping_error(tx_ring->dev, tx_buffer_info->dma)) |
2834 | goto dma_error; | 2842 | goto dma_error; |
2835 | tx_buffer_info->next_to_watch = i; | ||
2836 | 2843 | ||
2837 | len -= size; | 2844 | len -= size; |
2838 | total -= size; | 2845 | total -= size; |
@@ -2862,7 +2869,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, | |||
2862 | tx_buffer_info->dma)) | 2869 | tx_buffer_info->dma)) |
2863 | goto dma_error; | 2870 | goto dma_error; |
2864 | tx_buffer_info->mapped_as_page = true; | 2871 | tx_buffer_info->mapped_as_page = true; |
2865 | tx_buffer_info->next_to_watch = i; | ||
2866 | 2872 | ||
2867 | len -= size; | 2873 | len -= size; |
2868 | total -= size; | 2874 | total -= size; |
@@ -2881,8 +2887,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, | |||
2881 | else | 2887 | else |
2882 | i = i - 1; | 2888 | i = i - 1; |
2883 | tx_ring->tx_buffer_info[i].skb = skb; | 2889 | tx_ring->tx_buffer_info[i].skb = skb; |
2884 | tx_ring->tx_buffer_info[first].next_to_watch = i; | ||
2885 | tx_ring->tx_buffer_info[first].time_stamp = jiffies; | ||
2886 | 2890 | ||
2887 | return count; | 2891 | return count; |
2888 | 2892 | ||
@@ -2891,7 +2895,6 @@ dma_error: | |||
2891 | 2895 | ||
2892 | /* clear timestamp and dma mappings for failed tx_buffer_info map */ | 2896 | /* clear timestamp and dma mappings for failed tx_buffer_info map */ |
2893 | tx_buffer_info->dma = 0; | 2897 | tx_buffer_info->dma = 0; |
2894 | tx_buffer_info->next_to_watch = 0; | ||
2895 | count--; | 2898 | count--; |
2896 | 2899 | ||
2897 | /* clear timestamp and dma mappings for remaining portion of packet */ | 2900 | /* clear timestamp and dma mappings for remaining portion of packet */ |
@@ -2908,7 +2911,8 @@ dma_error: | |||
2908 | } | 2911 | } |
2909 | 2912 | ||
2910 | static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags, | 2913 | static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags, |
2911 | int count, u32 paylen, u8 hdr_len) | 2914 | int count, unsigned int first, u32 paylen, |
2915 | u8 hdr_len) | ||
2912 | { | 2916 | { |
2913 | union ixgbe_adv_tx_desc *tx_desc = NULL; | 2917 | union ixgbe_adv_tx_desc *tx_desc = NULL; |
2914 | struct ixgbevf_tx_buffer *tx_buffer_info; | 2918 | struct ixgbevf_tx_buffer *tx_buffer_info; |
@@ -2959,6 +2963,16 @@ static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags, | |||
2959 | 2963 | ||
2960 | tx_desc->read.cmd_type_len |= cpu_to_le32(txd_cmd); | 2964 | tx_desc->read.cmd_type_len |= cpu_to_le32(txd_cmd); |
2961 | 2965 | ||
2966 | tx_ring->tx_buffer_info[first].time_stamp = jiffies; | ||
2967 | |||
2968 | /* Force memory writes to complete before letting h/w | ||
2969 | * know there are new descriptors to fetch. (Only | ||
2970 | * applicable for weak-ordered memory model archs, | ||
2971 | * such as IA-64). | ||
2972 | */ | ||
2973 | wmb(); | ||
2974 | |||
2975 | tx_ring->tx_buffer_info[first].next_to_watch = tx_desc; | ||
2962 | tx_ring->next_to_use = i; | 2976 | tx_ring->next_to_use = i; |
2963 | } | 2977 | } |
2964 | 2978 | ||
@@ -3050,15 +3064,8 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) | |||
3050 | tx_flags |= IXGBE_TX_FLAGS_CSUM; | 3064 | tx_flags |= IXGBE_TX_FLAGS_CSUM; |
3051 | 3065 | ||
3052 | ixgbevf_tx_queue(tx_ring, tx_flags, | 3066 | ixgbevf_tx_queue(tx_ring, tx_flags, |
3053 | ixgbevf_tx_map(tx_ring, skb, tx_flags, first), | 3067 | ixgbevf_tx_map(tx_ring, skb, tx_flags), |
3054 | skb->len, hdr_len); | 3068 | first, skb->len, hdr_len); |
3055 | /* | ||
3056 | * Force memory writes to complete before letting h/w | ||
3057 | * know there are new descriptors to fetch. (Only | ||
3058 | * applicable for weak-ordered memory model archs, | ||
3059 | * such as IA-64). | ||
3060 | */ | ||
3061 | wmb(); | ||
3062 | 3069 | ||
3063 | writel(tx_ring->next_to_use, adapter->hw.hw_addr + tx_ring->tail); | 3070 | writel(tx_ring->next_to_use, adapter->hw.hw_addr + tx_ring->tail); |
3064 | 3071 | ||