aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/i40e/i40e_txrx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c433
1 files changed, 223 insertions, 210 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 49d2cfa9b0cc..f1f03bc5c729 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -37,6 +37,7 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
37 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); 37 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
38} 38}
39 39
40#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
40/** 41/**
41 * i40e_program_fdir_filter - Program a Flow Director filter 42 * i40e_program_fdir_filter - Program a Flow Director filter
42 * @fdir_input: Packet data that will be filter parameters 43 * @fdir_input: Packet data that will be filter parameters
@@ -50,6 +51,7 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
50 struct i40e_tx_buffer *tx_buf; 51 struct i40e_tx_buffer *tx_buf;
51 struct i40e_tx_desc *tx_desc; 52 struct i40e_tx_desc *tx_desc;
52 struct i40e_ring *tx_ring; 53 struct i40e_ring *tx_ring;
54 unsigned int fpt, dcc;
53 struct i40e_vsi *vsi; 55 struct i40e_vsi *vsi;
54 struct device *dev; 56 struct device *dev;
55 dma_addr_t dma; 57 dma_addr_t dma;
@@ -64,93 +66,78 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
64 if (!vsi) 66 if (!vsi)
65 return -ENOENT; 67 return -ENOENT;
66 68
67 tx_ring = &vsi->tx_rings[0]; 69 tx_ring = vsi->tx_rings[0];
68 dev = tx_ring->dev; 70 dev = tx_ring->dev;
69 71
70 dma = dma_map_single(dev, fdir_data->raw_packet, 72 dma = dma_map_single(dev, fdir_data->raw_packet,
71 I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE); 73 I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE);
72 if (dma_mapping_error(dev, dma)) 74 if (dma_mapping_error(dev, dma))
73 goto dma_fail; 75 goto dma_fail;
74 76
75 /* grab the next descriptor */ 77 /* grab the next descriptor */
76 fdir_desc = I40E_TX_FDIRDESC(tx_ring, tx_ring->next_to_use); 78 i = tx_ring->next_to_use;
77 tx_buf = &tx_ring->tx_bi[tx_ring->next_to_use]; 79 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
78 tx_ring->next_to_use++; 80 tx_buf = &tx_ring->tx_bi[i];
79 if (tx_ring->next_to_use == tx_ring->count) 81
80 tx_ring->next_to_use = 0; 82 tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0;
81 83
82 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32((fdir_data->q_index 84 fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
83 << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) 85 I40E_TXD_FLTR_QW0_QINDEX_MASK;
84 & I40E_TXD_FLTR_QW0_QINDEX_MASK);
85 86
86 fdir_desc->qindex_flex_ptype_vsi |= cpu_to_le32((fdir_data->flex_off 87 fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
87 << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) 88 I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
88 & I40E_TXD_FLTR_QW0_FLEXOFF_MASK);
89 89
90 fdir_desc->qindex_flex_ptype_vsi |= cpu_to_le32((fdir_data->pctype 90 fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
91 << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) 91 I40E_TXD_FLTR_QW0_PCTYPE_MASK;
92 & I40E_TXD_FLTR_QW0_PCTYPE_MASK);
93 92
94 /* Use LAN VSI Id if not programmed by user */ 93 /* Use LAN VSI Id if not programmed by user */
95 if (fdir_data->dest_vsi == 0) 94 if (fdir_data->dest_vsi == 0)
96 fdir_desc->qindex_flex_ptype_vsi |= 95 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
97 cpu_to_le32((pf->vsi[pf->lan_vsi]->id) 96 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
98 << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT);
99 else 97 else
100 fdir_desc->qindex_flex_ptype_vsi |= 98 fpt |= ((u32)fdir_data->dest_vsi <<
101 cpu_to_le32((fdir_data->dest_vsi 99 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
102 << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) 100 I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
103 & I40E_TXD_FLTR_QW0_DEST_VSI_MASK);
104 101
105 fdir_desc->dtype_cmd_cntindex = 102 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
106 cpu_to_le32(I40E_TX_DESC_DTYPE_FILTER_PROG); 103
104 dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
107 105
108 if (add) 106 if (add)
109 fdir_desc->dtype_cmd_cntindex |= cpu_to_le32( 107 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
110 I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE 108 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
111 << I40E_TXD_FLTR_QW1_PCMD_SHIFT);
112 else 109 else
113 fdir_desc->dtype_cmd_cntindex |= cpu_to_le32( 110 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
114 I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE 111 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
115 << I40E_TXD_FLTR_QW1_PCMD_SHIFT);
116 112
117 fdir_desc->dtype_cmd_cntindex |= cpu_to_le32((fdir_data->dest_ctl 113 dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
118 << I40E_TXD_FLTR_QW1_DEST_SHIFT) 114 I40E_TXD_FLTR_QW1_DEST_MASK;
119 & I40E_TXD_FLTR_QW1_DEST_MASK);
120 115
121 fdir_desc->dtype_cmd_cntindex |= cpu_to_le32( 116 dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
122 (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) 117 I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
123 & I40E_TXD_FLTR_QW1_FD_STATUS_MASK);
124 118
125 if (fdir_data->cnt_index != 0) { 119 if (fdir_data->cnt_index != 0) {
126 fdir_desc->dtype_cmd_cntindex |= 120 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
127 cpu_to_le32(I40E_TXD_FLTR_QW1_CNT_ENA_MASK); 121 dcc |= ((u32)fdir_data->cnt_index <<
128 fdir_desc->dtype_cmd_cntindex |= 122 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
129 cpu_to_le32((fdir_data->cnt_index 123 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
130 << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT)
131 & I40E_TXD_FLTR_QW1_CNTINDEX_MASK);
132 } 124 }
133 125
126 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
134 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id); 127 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
135 128
136 /* Now program a dummy descriptor */ 129 /* Now program a dummy descriptor */
137 tx_desc = I40E_TX_DESC(tx_ring, tx_ring->next_to_use); 130 i = tx_ring->next_to_use;
138 tx_buf = &tx_ring->tx_bi[tx_ring->next_to_use]; 131 tx_desc = I40E_TX_DESC(tx_ring, i);
139 tx_ring->next_to_use++; 132
140 if (tx_ring->next_to_use == tx_ring->count) 133 tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0;
141 tx_ring->next_to_use = 0;
142 134
143 tx_desc->buffer_addr = cpu_to_le64(dma); 135 tx_desc->buffer_addr = cpu_to_le64(dma);
144 td_cmd = I40E_TX_DESC_CMD_EOP | 136 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
145 I40E_TX_DESC_CMD_RS |
146 I40E_TX_DESC_CMD_DUMMY;
147 137
148 tx_desc->cmd_type_offset_bsz = 138 tx_desc->cmd_type_offset_bsz =
149 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0); 139 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0);
150 140
151 /* Mark the data descriptor to be watched */
152 tx_buf->next_to_watch = tx_desc;
153
154 /* Force memory writes to complete before letting h/w 141 /* Force memory writes to complete before letting h/w
155 * know there are new descriptors to fetch. (Only 142 * know there are new descriptors to fetch. (Only
156 * applicable for weak-ordered memory model archs, 143 * applicable for weak-ordered memory model archs,
@@ -158,6 +145,9 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
158 */ 145 */
159 wmb(); 146 wmb();
160 147
148 /* Mark the data descriptor to be watched */
149 tx_buf->next_to_watch = tx_desc;
150
161 writel(tx_ring->next_to_use, tx_ring->tail); 151 writel(tx_ring->next_to_use, tx_ring->tail);
162 return 0; 152 return 0;
163 153
@@ -188,27 +178,30 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u32 qw, u8 prog_id)
188} 178}
189 179
190/** 180/**
191 * i40e_unmap_tx_resource - Release a Tx buffer 181 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
192 * @ring: the ring that owns the buffer 182 * @ring: the ring that owns the buffer
193 * @tx_buffer: the buffer to free 183 * @tx_buffer: the buffer to free
194 **/ 184 **/
195static inline void i40e_unmap_tx_resource(struct i40e_ring *ring, 185static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
196 struct i40e_tx_buffer *tx_buffer) 186 struct i40e_tx_buffer *tx_buffer)
197{ 187{
198 if (tx_buffer->dma) { 188 if (tx_buffer->skb) {
199 if (tx_buffer->tx_flags & I40E_TX_FLAGS_MAPPED_AS_PAGE) 189 dev_kfree_skb_any(tx_buffer->skb);
200 dma_unmap_page(ring->dev, 190 if (dma_unmap_len(tx_buffer, len))
201 tx_buffer->dma,
202 tx_buffer->length,
203 DMA_TO_DEVICE);
204 else
205 dma_unmap_single(ring->dev, 191 dma_unmap_single(ring->dev,
206 tx_buffer->dma, 192 dma_unmap_addr(tx_buffer, dma),
207 tx_buffer->length, 193 dma_unmap_len(tx_buffer, len),
208 DMA_TO_DEVICE); 194 DMA_TO_DEVICE);
195 } else if (dma_unmap_len(tx_buffer, len)) {
196 dma_unmap_page(ring->dev,
197 dma_unmap_addr(tx_buffer, dma),
198 dma_unmap_len(tx_buffer, len),
199 DMA_TO_DEVICE);
209 } 200 }
210 tx_buffer->dma = 0; 201 tx_buffer->next_to_watch = NULL;
211 tx_buffer->time_stamp = 0; 202 tx_buffer->skb = NULL;
203 dma_unmap_len_set(tx_buffer, len, 0);
204 /* tx_buffer must be completely set up in the transmit path */
212} 205}
213 206
214/** 207/**
@@ -217,7 +210,6 @@ static inline void i40e_unmap_tx_resource(struct i40e_ring *ring,
217 **/ 210 **/
218void i40e_clean_tx_ring(struct i40e_ring *tx_ring) 211void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
219{ 212{
220 struct i40e_tx_buffer *tx_buffer;
221 unsigned long bi_size; 213 unsigned long bi_size;
222 u16 i; 214 u16 i;
223 215
@@ -226,13 +218,8 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
226 return; 218 return;
227 219
228 /* Free all the Tx ring sk_buffs */ 220 /* Free all the Tx ring sk_buffs */
229 for (i = 0; i < tx_ring->count; i++) { 221 for (i = 0; i < tx_ring->count; i++)
230 tx_buffer = &tx_ring->tx_bi[i]; 222 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
231 i40e_unmap_tx_resource(tx_ring, tx_buffer);
232 if (tx_buffer->skb)
233 dev_kfree_skb_any(tx_buffer->skb);
234 tx_buffer->skb = NULL;
235 }
236 223
237 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 224 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
238 memset(tx_ring->tx_bi, 0, bi_size); 225 memset(tx_ring->tx_bi, 0, bi_size);
@@ -242,6 +229,13 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
242 229
243 tx_ring->next_to_use = 0; 230 tx_ring->next_to_use = 0;
244 tx_ring->next_to_clean = 0; 231 tx_ring->next_to_clean = 0;
232
233 if (!tx_ring->netdev)
234 return;
235
236 /* cleanup Tx queue statistics */
237 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
238 tx_ring->queue_index));
245} 239}
246 240
247/** 241/**
@@ -300,14 +294,14 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
300 * run the check_tx_hang logic with a transmit completion 294 * run the check_tx_hang logic with a transmit completion
301 * pending but without time to complete it yet. 295 * pending but without time to complete it yet.
302 */ 296 */
303 if ((tx_ring->tx_stats.tx_done_old == tx_ring->tx_stats.packets) && 297 if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) &&
304 tx_pending) { 298 tx_pending) {
305 /* make sure it is true for two checks in a row */ 299 /* make sure it is true for two checks in a row */
306 ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, 300 ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
307 &tx_ring->state); 301 &tx_ring->state);
308 } else { 302 } else {
309 /* update completed stats and disarm the hang check */ 303 /* update completed stats and disarm the hang check */
310 tx_ring->tx_stats.tx_done_old = tx_ring->tx_stats.packets; 304 tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets;
311 clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); 305 clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
312 } 306 }
313 307
@@ -331,62 +325,88 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
331 325
332 tx_buf = &tx_ring->tx_bi[i]; 326 tx_buf = &tx_ring->tx_bi[i];
333 tx_desc = I40E_TX_DESC(tx_ring, i); 327 tx_desc = I40E_TX_DESC(tx_ring, i);
328 i -= tx_ring->count;
334 329
335 for (; budget; budget--) { 330 do {
336 struct i40e_tx_desc *eop_desc; 331 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
337
338 eop_desc = tx_buf->next_to_watch;
339 332
340 /* if next_to_watch is not set then there is no work pending */ 333 /* if next_to_watch is not set then there is no work pending */
341 if (!eop_desc) 334 if (!eop_desc)
342 break; 335 break;
343 336
337 /* prevent any other reads prior to eop_desc */
338 read_barrier_depends();
339
344 /* if the descriptor isn't done, no work yet to do */ 340 /* if the descriptor isn't done, no work yet to do */
345 if (!(eop_desc->cmd_type_offset_bsz & 341 if (!(eop_desc->cmd_type_offset_bsz &
346 cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) 342 cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
347 break; 343 break;
348 344
349 /* count the packet as being completed */ 345 /* clear next_to_watch to prevent false hangs */
350 tx_ring->tx_stats.completed++;
351 tx_buf->next_to_watch = NULL; 346 tx_buf->next_to_watch = NULL;
352 tx_buf->time_stamp = 0;
353
354 /* set memory barrier before eop_desc is verified */
355 rmb();
356 347
357 do { 348 /* update the statistics for this packet */
358 i40e_unmap_tx_resource(tx_ring, tx_buf); 349 total_bytes += tx_buf->bytecount;
350 total_packets += tx_buf->gso_segs;
359 351
360 /* clear dtype status */ 352 /* free the skb */
361 tx_desc->cmd_type_offset_bsz &= 353 dev_kfree_skb_any(tx_buf->skb);
362 ~cpu_to_le64(I40E_TXD_QW1_DTYPE_MASK);
363 354
364 if (likely(tx_desc == eop_desc)) { 355 /* unmap skb header data */
365 eop_desc = NULL; 356 dma_unmap_single(tx_ring->dev,
357 dma_unmap_addr(tx_buf, dma),
358 dma_unmap_len(tx_buf, len),
359 DMA_TO_DEVICE);
366 360
367 dev_kfree_skb_any(tx_buf->skb); 361 /* clear tx_buffer data */
368 tx_buf->skb = NULL; 362 tx_buf->skb = NULL;
363 dma_unmap_len_set(tx_buf, len, 0);
369 364
370 total_bytes += tx_buf->bytecount; 365 /* unmap remaining buffers */
371 total_packets += tx_buf->gso_segs; 366 while (tx_desc != eop_desc) {
372 }
373 367
374 tx_buf++; 368 tx_buf++;
375 tx_desc++; 369 tx_desc++;
376 i++; 370 i++;
377 if (unlikely(i == tx_ring->count)) { 371 if (unlikely(!i)) {
378 i = 0; 372 i -= tx_ring->count;
379 tx_buf = tx_ring->tx_bi; 373 tx_buf = tx_ring->tx_bi;
380 tx_desc = I40E_TX_DESC(tx_ring, 0); 374 tx_desc = I40E_TX_DESC(tx_ring, 0);
381 } 375 }
382 } while (eop_desc);
383 }
384 376
377 /* unmap any remaining paged data */
378 if (dma_unmap_len(tx_buf, len)) {
379 dma_unmap_page(tx_ring->dev,
380 dma_unmap_addr(tx_buf, dma),
381 dma_unmap_len(tx_buf, len),
382 DMA_TO_DEVICE);
383 dma_unmap_len_set(tx_buf, len, 0);
384 }
385 }
386
387 /* move us one more past the eop_desc for start of next pkt */
388 tx_buf++;
389 tx_desc++;
390 i++;
391 if (unlikely(!i)) {
392 i -= tx_ring->count;
393 tx_buf = tx_ring->tx_bi;
394 tx_desc = I40E_TX_DESC(tx_ring, 0);
395 }
396
397 /* update budget accounting */
398 budget--;
399 } while (likely(budget));
400
401 i += tx_ring->count;
385 tx_ring->next_to_clean = i; 402 tx_ring->next_to_clean = i;
386 tx_ring->tx_stats.bytes += total_bytes; 403 u64_stats_update_begin(&tx_ring->syncp);
387 tx_ring->tx_stats.packets += total_packets; 404 tx_ring->stats.bytes += total_bytes;
405 tx_ring->stats.packets += total_packets;
406 u64_stats_update_end(&tx_ring->syncp);
388 tx_ring->q_vector->tx.total_bytes += total_bytes; 407 tx_ring->q_vector->tx.total_bytes += total_bytes;
389 tx_ring->q_vector->tx.total_packets += total_packets; 408 tx_ring->q_vector->tx.total_packets += total_packets;
409
390 if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { 410 if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
391 /* schedule immediate reset if we believe we hung */ 411 /* schedule immediate reset if we believe we hung */
392 dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" 412 dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
@@ -414,6 +434,10 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
414 return true; 434 return true;
415 } 435 }
416 436
437 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
438 tx_ring->queue_index),
439 total_packets, total_bytes);
440
417#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 441#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
418 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && 442 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
419 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { 443 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
@@ -524,8 +548,6 @@ static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
524 i40e_set_new_dynamic_itr(&q_vector->tx); 548 i40e_set_new_dynamic_itr(&q_vector->tx);
525 if (old_itr != q_vector->tx.itr) 549 if (old_itr != q_vector->tx.itr)
526 wr32(hw, reg_addr, q_vector->tx.itr); 550 wr32(hw, reg_addr, q_vector->tx.itr);
527
528 i40e_flush(hw);
529} 551}
530 552
531/** 553/**
@@ -1042,8 +1064,10 @@ next_desc:
1042 } 1064 }
1043 1065
1044 rx_ring->next_to_clean = i; 1066 rx_ring->next_to_clean = i;
1045 rx_ring->rx_stats.packets += total_rx_packets; 1067 u64_stats_update_begin(&rx_ring->syncp);
1046 rx_ring->rx_stats.bytes += total_rx_bytes; 1068 rx_ring->stats.packets += total_rx_packets;
1069 rx_ring->stats.bytes += total_rx_bytes;
1070 u64_stats_update_end(&rx_ring->syncp);
1047 rx_ring->q_vector->rx.total_packets += total_rx_packets; 1071 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1048 rx_ring->q_vector->rx.total_bytes += total_rx_bytes; 1072 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1049 1073
@@ -1067,27 +1091,28 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
1067 struct i40e_q_vector *q_vector = 1091 struct i40e_q_vector *q_vector =
1068 container_of(napi, struct i40e_q_vector, napi); 1092 container_of(napi, struct i40e_q_vector, napi);
1069 struct i40e_vsi *vsi = q_vector->vsi; 1093 struct i40e_vsi *vsi = q_vector->vsi;
1094 struct i40e_ring *ring;
1070 bool clean_complete = true; 1095 bool clean_complete = true;
1071 int budget_per_ring; 1096 int budget_per_ring;
1072 int i;
1073 1097
1074 if (test_bit(__I40E_DOWN, &vsi->state)) { 1098 if (test_bit(__I40E_DOWN, &vsi->state)) {
1075 napi_complete(napi); 1099 napi_complete(napi);
1076 return 0; 1100 return 0;
1077 } 1101 }
1078 1102
1103 /* Since the actual Tx work is minimal, we can give the Tx a larger
1104 * budget and be more aggressive about cleaning up the Tx descriptors.
1105 */
1106 i40e_for_each_ring(ring, q_vector->tx)
1107 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1108
1079 /* We attempt to distribute budget to each Rx queue fairly, but don't 1109 /* We attempt to distribute budget to each Rx queue fairly, but don't
1080 * allow the budget to go below 1 because that would exit polling early. 1110 * allow the budget to go below 1 because that would exit polling early.
1081 * Since the actual Tx work is minimal, we can give the Tx a larger
1082 * budget and be more aggressive about cleaning up the Tx descriptors.
1083 */ 1111 */
1084 budget_per_ring = max(budget/q_vector->num_ringpairs, 1); 1112 budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1085 for (i = 0; i < q_vector->num_ringpairs; i++) { 1113
1086 clean_complete &= i40e_clean_tx_irq(q_vector->tx.ring[i], 1114 i40e_for_each_ring(ring, q_vector->rx)
1087 vsi->work_limit); 1115 clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
1088 clean_complete &= i40e_clean_rx_irq(q_vector->rx.ring[i],
1089 budget_per_ring);
1090 }
1091 1116
1092 /* If work not completed, return budget and polling will return */ 1117 /* If work not completed, return budget and polling will return */
1093 if (!clean_complete) 1118 if (!clean_complete)
@@ -1117,7 +1142,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
1117 qval = rd32(hw, I40E_QINT_TQCTL(0)); 1142 qval = rd32(hw, I40E_QINT_TQCTL(0));
1118 qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; 1143 qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1119 wr32(hw, I40E_QINT_TQCTL(0), qval); 1144 wr32(hw, I40E_QINT_TQCTL(0), qval);
1120 i40e_flush(hw); 1145
1146 i40e_irq_dynamic_enable_icr0(vsi->back);
1121 } 1147 }
1122 } 1148 }
1123 1149
@@ -1144,6 +1170,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1144 struct tcphdr *th; 1170 struct tcphdr *th;
1145 unsigned int hlen; 1171 unsigned int hlen;
1146 u32 flex_ptype, dtype_cmd; 1172 u32 flex_ptype, dtype_cmd;
1173 u16 i;
1147 1174
1148 /* make sure ATR is enabled */ 1175 /* make sure ATR is enabled */
1149 if (!(pf->flags & I40E_FLAG_FDIR_ATR_ENABLED)) 1176 if (!(pf->flags & I40E_FLAG_FDIR_ATR_ENABLED))
@@ -1183,10 +1210,11 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1183 tx_ring->atr_count = 0; 1210 tx_ring->atr_count = 0;
1184 1211
1185 /* grab the next descriptor */ 1212 /* grab the next descriptor */
1186 fdir_desc = I40E_TX_FDIRDESC(tx_ring, tx_ring->next_to_use); 1213 i = tx_ring->next_to_use;
1187 tx_ring->next_to_use++; 1214 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
1188 if (tx_ring->next_to_use == tx_ring->count) 1215
1189 tx_ring->next_to_use = 0; 1216 i++;
1217 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1190 1218
1191 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & 1219 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
1192 I40E_TXD_FLTR_QW0_QINDEX_MASK; 1220 I40E_TXD_FLTR_QW0_QINDEX_MASK;
@@ -1216,7 +1244,6 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1216 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); 1244 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
1217} 1245}
1218 1246
1219#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
1220/** 1247/**
1221 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW 1248 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
1222 * @skb: send buffer 1249 * @skb: send buffer
@@ -1276,27 +1303,6 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
1276} 1303}
1277 1304
1278/** 1305/**
1279 * i40e_tx_csum - is checksum offload requested
1280 * @tx_ring: ptr to the ring to send
1281 * @skb: ptr to the skb we're sending
1282 * @tx_flags: the collected send information
1283 * @protocol: the send protocol
1284 *
1285 * Returns true if checksum offload is requested
1286 **/
1287static bool i40e_tx_csum(struct i40e_ring *tx_ring, struct sk_buff *skb,
1288 u32 tx_flags, __be16 protocol)
1289{
1290 if ((skb->ip_summed != CHECKSUM_PARTIAL) &&
1291 !(tx_flags & I40E_TX_FLAGS_TXSW)) {
1292 if (!(tx_flags & I40E_TX_FLAGS_HW_VLAN))
1293 return false;
1294 }
1295
1296 return skb->ip_summed == CHECKSUM_PARTIAL;
1297}
1298
1299/**
1300 * i40e_tso - set up the tso context descriptor 1306 * i40e_tso - set up the tso context descriptor
1301 * @tx_ring: ptr to the ring to send 1307 * @tx_ring: ptr to the ring to send
1302 * @skb: ptr to the skb we're sending 1308 * @skb: ptr to the skb we're sending
@@ -1482,15 +1488,16 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
1482 const u32 cd_tunneling, const u32 cd_l2tag2) 1488 const u32 cd_tunneling, const u32 cd_l2tag2)
1483{ 1489{
1484 struct i40e_tx_context_desc *context_desc; 1490 struct i40e_tx_context_desc *context_desc;
1491 int i = tx_ring->next_to_use;
1485 1492
1486 if (!cd_type_cmd_tso_mss && !cd_tunneling && !cd_l2tag2) 1493 if (!cd_type_cmd_tso_mss && !cd_tunneling && !cd_l2tag2)
1487 return; 1494 return;
1488 1495
1489 /* grab the next descriptor */ 1496 /* grab the next descriptor */
1490 context_desc = I40E_TX_CTXTDESC(tx_ring, tx_ring->next_to_use); 1497 context_desc = I40E_TX_CTXTDESC(tx_ring, i);
1491 tx_ring->next_to_use++; 1498
1492 if (tx_ring->next_to_use == tx_ring->count) 1499 i++;
1493 tx_ring->next_to_use = 0; 1500 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1494 1501
1495 /* cpu_to_le32 and assign to struct fields */ 1502 /* cpu_to_le32 and assign to struct fields */
1496 context_desc->tunneling_params = cpu_to_le32(cd_tunneling); 1503 context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
@@ -1512,68 +1519,71 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
1512 struct i40e_tx_buffer *first, u32 tx_flags, 1519 struct i40e_tx_buffer *first, u32 tx_flags,
1513 const u8 hdr_len, u32 td_cmd, u32 td_offset) 1520 const u8 hdr_len, u32 td_cmd, u32 td_offset)
1514{ 1521{
1515 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
1516 unsigned int data_len = skb->data_len; 1522 unsigned int data_len = skb->data_len;
1517 unsigned int size = skb_headlen(skb); 1523 unsigned int size = skb_headlen(skb);
1518 struct device *dev = tx_ring->dev; 1524 struct skb_frag_struct *frag;
1519 u32 paylen = skb->len - hdr_len;
1520 u16 i = tx_ring->next_to_use;
1521 struct i40e_tx_buffer *tx_bi; 1525 struct i40e_tx_buffer *tx_bi;
1522 struct i40e_tx_desc *tx_desc; 1526 struct i40e_tx_desc *tx_desc;
1523 u32 buf_offset = 0; 1527 u16 i = tx_ring->next_to_use;
1524 u32 td_tag = 0; 1528 u32 td_tag = 0;
1525 dma_addr_t dma; 1529 dma_addr_t dma;
1526 u16 gso_segs; 1530 u16 gso_segs;
1527 1531
1528 dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
1529 if (dma_mapping_error(dev, dma))
1530 goto dma_error;
1531
1532 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { 1532 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
1533 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; 1533 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
1534 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> 1534 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
1535 I40E_TX_FLAGS_VLAN_SHIFT; 1535 I40E_TX_FLAGS_VLAN_SHIFT;
1536 } 1536 }
1537 1537
1538 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
1539 gso_segs = skb_shinfo(skb)->gso_segs;
1540 else
1541 gso_segs = 1;
1542
1543 /* multiply data chunks by size of headers */
1544 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
1545 first->gso_segs = gso_segs;
1546 first->skb = skb;
1547 first->tx_flags = tx_flags;
1548
1549 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
1550
1538 tx_desc = I40E_TX_DESC(tx_ring, i); 1551 tx_desc = I40E_TX_DESC(tx_ring, i);
1539 for (;;) { 1552 tx_bi = first;
1540 while (size > I40E_MAX_DATA_PER_TXD) { 1553
1541 tx_desc->buffer_addr = cpu_to_le64(dma + buf_offset); 1554 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
1555 if (dma_mapping_error(tx_ring->dev, dma))
1556 goto dma_error;
1557
1558 /* record length, and DMA address */
1559 dma_unmap_len_set(tx_bi, len, size);
1560 dma_unmap_addr_set(tx_bi, dma, dma);
1561
1562 tx_desc->buffer_addr = cpu_to_le64(dma);
1563
1564 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
1542 tx_desc->cmd_type_offset_bsz = 1565 tx_desc->cmd_type_offset_bsz =
1543 build_ctob(td_cmd, td_offset, 1566 build_ctob(td_cmd, td_offset,
1544 I40E_MAX_DATA_PER_TXD, td_tag); 1567 I40E_MAX_DATA_PER_TXD, td_tag);
1545 1568
1546 buf_offset += I40E_MAX_DATA_PER_TXD;
1547 size -= I40E_MAX_DATA_PER_TXD;
1548
1549 tx_desc++; 1569 tx_desc++;
1550 i++; 1570 i++;
1551 if (i == tx_ring->count) { 1571 if (i == tx_ring->count) {
1552 tx_desc = I40E_TX_DESC(tx_ring, 0); 1572 tx_desc = I40E_TX_DESC(tx_ring, 0);
1553 i = 0; 1573 i = 0;
1554 } 1574 }
1555 }
1556 1575
1557 tx_bi = &tx_ring->tx_bi[i]; 1576 dma += I40E_MAX_DATA_PER_TXD;
1558 tx_bi->length = buf_offset + size; 1577 size -= I40E_MAX_DATA_PER_TXD;
1559 tx_bi->tx_flags = tx_flags;
1560 tx_bi->dma = dma;
1561 1578
1562 tx_desc->buffer_addr = cpu_to_le64(dma + buf_offset); 1579 tx_desc->buffer_addr = cpu_to_le64(dma);
1563 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, 1580 }
1564 size, td_tag);
1565 1581
1566 if (likely(!data_len)) 1582 if (likely(!data_len))
1567 break; 1583 break;
1568 1584
1569 size = skb_frag_size(frag); 1585 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
1570 data_len -= size; 1586 size, td_tag);
1571 buf_offset = 0;
1572 tx_flags |= I40E_TX_FLAGS_MAPPED_AS_PAGE;
1573
1574 dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
1575 if (dma_mapping_error(dev, dma))
1576 goto dma_error;
1577 1587
1578 tx_desc++; 1588 tx_desc++;
1579 i++; 1589 i++;
@@ -1582,31 +1592,25 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
1582 i = 0; 1592 i = 0;
1583 } 1593 }
1584 1594
1585 frag++; 1595 size = skb_frag_size(frag);
1586 } 1596 data_len -= size;
1587
1588 tx_desc->cmd_type_offset_bsz |=
1589 cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
1590 1597
1591 i++; 1598 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
1592 if (i == tx_ring->count) 1599 DMA_TO_DEVICE);
1593 i = 0;
1594 1600
1595 tx_ring->next_to_use = i; 1601 tx_bi = &tx_ring->tx_bi[i];
1602 }
1596 1603
1597 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) 1604 tx_desc->cmd_type_offset_bsz =
1598 gso_segs = skb_shinfo(skb)->gso_segs; 1605 build_ctob(td_cmd, td_offset, size, td_tag) |
1599 else 1606 cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
1600 gso_segs = 1;
1601 1607
1602 /* multiply data chunks by size of headers */ 1608 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
1603 tx_bi->bytecount = paylen + (gso_segs * hdr_len); 1609 tx_ring->queue_index),
1604 tx_bi->gso_segs = gso_segs; 1610 first->bytecount);
1605 tx_bi->skb = skb;
1606 1611
1607 /* set the timestamp and next to watch values */ 1612 /* set the timestamp */
1608 first->time_stamp = jiffies; 1613 first->time_stamp = jiffies;
1609 first->next_to_watch = tx_desc;
1610 1614
1611 /* Force memory writes to complete before letting h/w 1615 /* Force memory writes to complete before letting h/w
1612 * know there are new descriptors to fetch. (Only 1616 * know there are new descriptors to fetch. (Only
@@ -1615,16 +1619,27 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
1615 */ 1619 */
1616 wmb(); 1620 wmb();
1617 1621
1622 /* set next_to_watch value indicating a packet is present */
1623 first->next_to_watch = tx_desc;
1624
1625 i++;
1626 if (i == tx_ring->count)
1627 i = 0;
1628
1629 tx_ring->next_to_use = i;
1630
1631 /* notify HW of packet */
1618 writel(i, tx_ring->tail); 1632 writel(i, tx_ring->tail);
1633
1619 return; 1634 return;
1620 1635
1621dma_error: 1636dma_error:
1622 dev_info(dev, "TX DMA map failed\n"); 1637 dev_info(tx_ring->dev, "TX DMA map failed\n");
1623 1638
1624 /* clear dma mappings for failed tx_bi map */ 1639 /* clear dma mappings for failed tx_bi map */
1625 for (;;) { 1640 for (;;) {
1626 tx_bi = &tx_ring->tx_bi[i]; 1641 tx_bi = &tx_ring->tx_bi[i];
1627 i40e_unmap_tx_resource(tx_ring, tx_bi); 1642 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
1628 if (tx_bi == first) 1643 if (tx_bi == first)
1629 break; 1644 break;
1630 if (i == 0) 1645 if (i == 0)
@@ -1632,8 +1647,6 @@ dma_error:
1632 i--; 1647 i--;
1633 } 1648 }
1634 1649
1635 dev_kfree_skb_any(skb);
1636
1637 tx_ring->next_to_use = i; 1650 tx_ring->next_to_use = i;
1638} 1651}
1639 1652
@@ -1758,16 +1771,16 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
1758 1771
1759 skb_tx_timestamp(skb); 1772 skb_tx_timestamp(skb);
1760 1773
1774 /* always enable CRC insertion offload */
1775 td_cmd |= I40E_TX_DESC_CMD_ICRC;
1776
1761 /* Always offload the checksum, since it's in the data descriptor */ 1777 /* Always offload the checksum, since it's in the data descriptor */
1762 if (i40e_tx_csum(tx_ring, skb, tx_flags, protocol)) 1778 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1763 tx_flags |= I40E_TX_FLAGS_CSUM; 1779 tx_flags |= I40E_TX_FLAGS_CSUM;
1764 1780
1765 /* always enable offload insertion */
1766 td_cmd |= I40E_TX_DESC_CMD_ICRC;
1767
1768 if (tx_flags & I40E_TX_FLAGS_CSUM)
1769 i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset, 1781 i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset,
1770 tx_ring, &cd_tunneling); 1782 tx_ring, &cd_tunneling);
1783 }
1771 1784
1772 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, 1785 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
1773 cd_tunneling, cd_l2tag2); 1786 cd_tunneling, cd_l2tag2);
@@ -1801,7 +1814,7 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1801{ 1814{
1802 struct i40e_netdev_priv *np = netdev_priv(netdev); 1815 struct i40e_netdev_priv *np = netdev_priv(netdev);
1803 struct i40e_vsi *vsi = np->vsi; 1816 struct i40e_vsi *vsi = np->vsi;
1804 struct i40e_ring *tx_ring = &vsi->tx_rings[skb->queue_mapping]; 1817 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
1805 1818
1806 /* hardware can't handle really short frames, hardware padding works 1819 /* hardware can't handle really short frames, hardware padding works
1807 * beyond this point 1820 * beyond this point