aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k.h5
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c938
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c94
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pci.c3
4 files changed, 1038 insertions, 2 deletions
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index a0e833135449..257287c07279 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -391,6 +391,11 @@ extern char fm10k_driver_name[];
391extern const char fm10k_driver_version[]; 391extern const char fm10k_driver_version[];
392int fm10k_init_queueing_scheme(struct fm10k_intfc *interface); 392int fm10k_init_queueing_scheme(struct fm10k_intfc *interface);
393void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface); 393void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface);
394netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
395 struct fm10k_ring *tx_ring);
396void fm10k_tx_timeout_reset(struct fm10k_intfc *interface);
397bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring);
398void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count);
394 399
395/* PCI */ 400/* PCI */
396void fm10k_mbx_free_irq(struct fm10k_intfc *); 401void fm10k_mbx_free_irq(struct fm10k_intfc *);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index bf84c263df0e..f7220d841336 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -24,6 +24,7 @@
24#include <net/ip.h> 24#include <net/ip.h>
25#include <net/tcp.h> 25#include <net/tcp.h>
26#include <linux/if_macvlan.h> 26#include <linux/if_macvlan.h>
27#include <linux/prefetch.h>
27 28
28#include "fm10k.h" 29#include "fm10k.h"
29 30
@@ -67,6 +68,921 @@ static void __exit fm10k_exit_module(void)
67} 68}
68module_exit(fm10k_exit_module); 69module_exit(fm10k_exit_module);
69 70
71static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring,
72 struct fm10k_rx_buffer *bi)
73{
74 struct page *page = bi->page;
75 dma_addr_t dma;
76
77 /* Only page will be NULL if buffer was consumed */
78 if (likely(page))
79 return true;
80
81 /* alloc new page for storage */
82 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
83 if (unlikely(!page)) {
84 rx_ring->rx_stats.alloc_failed++;
85 return false;
86 }
87
88 /* map page for use */
89 dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
90
91 /* if mapping failed free memory back to system since
92 * there isn't much point in holding memory we can't use
93 */
94 if (dma_mapping_error(rx_ring->dev, dma)) {
95 __free_page(page);
96 bi->page = NULL;
97
98 rx_ring->rx_stats.alloc_failed++;
99 return false;
100 }
101
102 bi->dma = dma;
103 bi->page = page;
104 bi->page_offset = 0;
105
106 return true;
107}
108
109/**
110 * fm10k_alloc_rx_buffers - Replace used receive buffers
111 * @rx_ring: ring to place buffers on
112 * @cleaned_count: number of buffers to replace
113 **/
114void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count)
115{
116 union fm10k_rx_desc *rx_desc;
117 struct fm10k_rx_buffer *bi;
118 u16 i = rx_ring->next_to_use;
119
120 /* nothing to do */
121 if (!cleaned_count)
122 return;
123
124 rx_desc = FM10K_RX_DESC(rx_ring, i);
125 bi = &rx_ring->rx_buffer[i];
126 i -= rx_ring->count;
127
128 do {
129 if (!fm10k_alloc_mapped_page(rx_ring, bi))
130 break;
131
132 /* Refresh the desc even if buffer_addrs didn't change
133 * because each write-back erases this info.
134 */
135 rx_desc->q.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
136
137 rx_desc++;
138 bi++;
139 i++;
140 if (unlikely(!i)) {
141 rx_desc = FM10K_RX_DESC(rx_ring, 0);
142 bi = rx_ring->rx_buffer;
143 i -= rx_ring->count;
144 }
145
146 /* clear the hdr_addr for the next_to_use descriptor */
147 rx_desc->q.hdr_addr = 0;
148
149 cleaned_count--;
150 } while (cleaned_count);
151
152 i += rx_ring->count;
153
154 if (rx_ring->next_to_use != i) {
155 /* record the next descriptor to use */
156 rx_ring->next_to_use = i;
157
158 /* update next to alloc since we have filled the ring */
159 rx_ring->next_to_alloc = i;
160
161 /* Force memory writes to complete before letting h/w
162 * know there are new descriptors to fetch. (Only
163 * applicable for weak-ordered memory model archs,
164 * such as IA-64).
165 */
166 wmb();
167
168 /* notify hardware of new descriptors */
169 writel(i, rx_ring->tail);
170 }
171}
172
173/**
174 * fm10k_reuse_rx_page - page flip buffer and store it back on the ring
175 * @rx_ring: rx descriptor ring to store buffers on
176 * @old_buff: donor buffer to have page reused
177 *
178 * Synchronizes page for reuse by the interface
179 **/
180static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring,
181 struct fm10k_rx_buffer *old_buff)
182{
183 struct fm10k_rx_buffer *new_buff;
184 u16 nta = rx_ring->next_to_alloc;
185
186 new_buff = &rx_ring->rx_buffer[nta];
187
188 /* update, and store next to alloc */
189 nta++;
190 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
191
192 /* transfer page from old buffer to new buffer */
193 memcpy(new_buff, old_buff, sizeof(struct fm10k_rx_buffer));
194
195 /* sync the buffer for use by the device */
196 dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
197 old_buff->page_offset,
198 FM10K_RX_BUFSZ,
199 DMA_FROM_DEVICE);
200}
201
202static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer,
203 struct page *page,
204 unsigned int truesize)
205{
206 /* avoid re-using remote pages */
207 if (unlikely(page_to_nid(page) != numa_mem_id()))
208 return false;
209
210#if (PAGE_SIZE < 8192)
211 /* if we are only owner of page we can reuse it */
212 if (unlikely(page_count(page) != 1))
213 return false;
214
215 /* flip page offset to other buffer */
216 rx_buffer->page_offset ^= FM10K_RX_BUFSZ;
217
218 /* since we are the only owner of the page and we need to
219 * increment it, just set the value to 2 in order to avoid
220 * an unnecessary locked operation
221 */
222 atomic_set(&page->_count, 2);
223#else
224 /* move offset up to the next cache line */
225 rx_buffer->page_offset += truesize;
226
227 if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ))
228 return false;
229
230 /* bump ref count on page before it is given to the stack */
231 get_page(page);
232#endif
233
234 return true;
235}
236
237/**
238 * fm10k_add_rx_frag - Add contents of Rx buffer to sk_buff
239 * @rx_ring: rx descriptor ring to transact packets on
240 * @rx_buffer: buffer containing page to add
241 * @rx_desc: descriptor containing length of buffer written by hardware
242 * @skb: sk_buff to place the data into
243 *
244 * This function will add the data contained in rx_buffer->page to the skb.
245 * This is done either through a direct copy if the data in the buffer is
246 * less than the skb header size, otherwise it will just attach the page as
247 * a frag to the skb.
248 *
249 * The function will then update the page offset if necessary and return
250 * true if the buffer can be reused by the interface.
251 **/
252static bool fm10k_add_rx_frag(struct fm10k_ring *rx_ring,
253 struct fm10k_rx_buffer *rx_buffer,
254 union fm10k_rx_desc *rx_desc,
255 struct sk_buff *skb)
256{
257 struct page *page = rx_buffer->page;
258 unsigned int size = le16_to_cpu(rx_desc->w.length);
259#if (PAGE_SIZE < 8192)
260 unsigned int truesize = FM10K_RX_BUFSZ;
261#else
262 unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
263#endif
264
265 if ((size <= FM10K_RX_HDR_LEN) && !skb_is_nonlinear(skb)) {
266 unsigned char *va = page_address(page) + rx_buffer->page_offset;
267
268 memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
269
270 /* we can reuse buffer as-is, just make sure it is local */
271 if (likely(page_to_nid(page) == numa_mem_id()))
272 return true;
273
274 /* this page cannot be reused so discard it */
275 put_page(page);
276 return false;
277 }
278
279 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
280 rx_buffer->page_offset, size, truesize);
281
282 return fm10k_can_reuse_rx_page(rx_buffer, page, truesize);
283}
284
285static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
286 union fm10k_rx_desc *rx_desc,
287 struct sk_buff *skb)
288{
289 struct fm10k_rx_buffer *rx_buffer;
290 struct page *page;
291
292 rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean];
293
294 page = rx_buffer->page;
295 prefetchw(page);
296
297 if (likely(!skb)) {
298 void *page_addr = page_address(page) +
299 rx_buffer->page_offset;
300
301 /* prefetch first cache line of first page */
302 prefetch(page_addr);
303#if L1_CACHE_BYTES < 128
304 prefetch(page_addr + L1_CACHE_BYTES);
305#endif
306
307 /* allocate a skb to store the frags */
308 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
309 FM10K_RX_HDR_LEN);
310 if (unlikely(!skb)) {
311 rx_ring->rx_stats.alloc_failed++;
312 return NULL;
313 }
314
315 /* we will be copying header into skb->data in
316 * pskb_may_pull so it is in our interest to prefetch
317 * it now to avoid a possible cache miss
318 */
319 prefetchw(skb->data);
320 }
321
322 /* we are reusing so sync this buffer for CPU use */
323 dma_sync_single_range_for_cpu(rx_ring->dev,
324 rx_buffer->dma,
325 rx_buffer->page_offset,
326 FM10K_RX_BUFSZ,
327 DMA_FROM_DEVICE);
328
329 /* pull page into skb */
330 if (fm10k_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
331 /* hand second half of page back to the ring */
332 fm10k_reuse_rx_page(rx_ring, rx_buffer);
333 } else {
334 /* we are not reusing the buffer so unmap it */
335 dma_unmap_page(rx_ring->dev, rx_buffer->dma,
336 PAGE_SIZE, DMA_FROM_DEVICE);
337 }
338
339 /* clear contents of rx_buffer */
340 rx_buffer->page = NULL;
341
342 return skb;
343}
344
345/**
346 * fm10k_process_skb_fields - Populate skb header fields from Rx descriptor
347 * @rx_ring: rx descriptor ring packet is being transacted on
348 * @rx_desc: pointer to the EOP Rx descriptor
349 * @skb: pointer to current skb being populated
350 *
351 * This function checks the ring, descriptor, and packet information in
352 * order to populate the hash, checksum, VLAN, timestamp, protocol, and
353 * other fields within the skb.
354 **/
355static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring,
356 union fm10k_rx_desc *rx_desc,
357 struct sk_buff *skb)
358{
359 unsigned int len = skb->len;
360
361 FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan;
362
363 skb_record_rx_queue(skb, rx_ring->queue_index);
364
365 FM10K_CB(skb)->fi.d.glort = rx_desc->d.glort;
366
367 if (rx_desc->w.vlan) {
368 u16 vid = le16_to_cpu(rx_desc->w.vlan);
369
370 if (vid != rx_ring->vid)
371 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
372 }
373
374 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
375
376 return len;
377}
378
379/**
380 * fm10k_is_non_eop - process handling of non-EOP buffers
381 * @rx_ring: Rx ring being processed
382 * @rx_desc: Rx descriptor for current buffer
383 *
384 * This function updates next to clean. If the buffer is an EOP buffer
385 * this function exits returning false, otherwise it will place the
386 * sk_buff in the next buffer to be chained and return true indicating
387 * that this is in fact a non-EOP buffer.
388 **/
389static bool fm10k_is_non_eop(struct fm10k_ring *rx_ring,
390 union fm10k_rx_desc *rx_desc)
391{
392 u32 ntc = rx_ring->next_to_clean + 1;
393
394 /* fetch, update, and store next to clean */
395 ntc = (ntc < rx_ring->count) ? ntc : 0;
396 rx_ring->next_to_clean = ntc;
397
398 prefetch(FM10K_RX_DESC(rx_ring, ntc));
399
400 if (likely(fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_EOP)))
401 return false;
402
403 return true;
404}
405
406/**
407 * fm10k_pull_tail - fm10k specific version of skb_pull_tail
408 * @rx_ring: rx descriptor ring packet is being transacted on
409 * @rx_desc: pointer to the EOP Rx descriptor
410 * @skb: pointer to current skb being adjusted
411 *
412 * This function is an fm10k specific version of __pskb_pull_tail. The
413 * main difference between this version and the original function is that
414 * this function can make several assumptions about the state of things
415 * that allow for significant optimizations versus the standard function.
416 * As a result we can do things like drop a frag and maintain an accurate
417 * truesize for the skb.
418 */
419static void fm10k_pull_tail(struct fm10k_ring *rx_ring,
420 union fm10k_rx_desc *rx_desc,
421 struct sk_buff *skb)
422{
423 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
424 unsigned char *va;
425 unsigned int pull_len;
426
427 /* it is valid to use page_address instead of kmap since we are
428 * working with pages allocated out of the lomem pool per
429 * alloc_page(GFP_ATOMIC)
430 */
431 va = skb_frag_address(frag);
432
433 /* we need the header to contain the greater of either ETH_HLEN or
434 * 60 bytes if the skb->len is less than 60 for skb_pad.
435 */
436 pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN);
437
438 /* align pull length to size of long to optimize memcpy performance */
439 skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
440
441 /* update all of the pointers */
442 skb_frag_size_sub(frag, pull_len);
443 frag->page_offset += pull_len;
444 skb->data_len -= pull_len;
445 skb->tail += pull_len;
446}
447
448/**
449 * fm10k_cleanup_headers - Correct corrupted or empty headers
450 * @rx_ring: rx descriptor ring packet is being transacted on
451 * @rx_desc: pointer to the EOP Rx descriptor
452 * @skb: pointer to current skb being fixed
453 *
454 * Address the case where we are pulling data in on pages only
455 * and as such no data is present in the skb header.
456 *
457 * In addition if skb is not at least 60 bytes we need to pad it so that
458 * it is large enough to qualify as a valid Ethernet frame.
459 *
460 * Returns true if an error was encountered and skb was freed.
461 **/
462static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring,
463 union fm10k_rx_desc *rx_desc,
464 struct sk_buff *skb)
465{
466 if (unlikely((fm10k_test_staterr(rx_desc,
467 FM10K_RXD_STATUS_RXE)))) {
468 dev_kfree_skb_any(skb);
469 rx_ring->rx_stats.errors++;
470 return true;
471 }
472
473 /* place header in linear portion of buffer */
474 if (skb_is_nonlinear(skb))
475 fm10k_pull_tail(rx_ring, rx_desc, skb);
476
477 /* if skb_pad returns an error the skb was freed */
478 if (unlikely(skb->len < 60)) {
479 int pad_len = 60 - skb->len;
480
481 if (skb_pad(skb, pad_len))
482 return true;
483 __skb_put(skb, pad_len);
484 }
485
486 return false;
487}
488
489/**
490 * fm10k_receive_skb - helper function to handle rx indications
491 * @q_vector: structure containing interrupt and ring information
492 * @skb: packet to send up
493 **/
494static void fm10k_receive_skb(struct fm10k_q_vector *q_vector,
495 struct sk_buff *skb)
496{
497 napi_gro_receive(&q_vector->napi, skb);
498}
499
500static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
501 struct fm10k_ring *rx_ring,
502 int budget)
503{
504 struct sk_buff *skb = rx_ring->skb;
505 unsigned int total_bytes = 0, total_packets = 0;
506 u16 cleaned_count = fm10k_desc_unused(rx_ring);
507
508 do {
509 union fm10k_rx_desc *rx_desc;
510
511 /* return some buffers to hardware, one at a time is too slow */
512 if (cleaned_count >= FM10K_RX_BUFFER_WRITE) {
513 fm10k_alloc_rx_buffers(rx_ring, cleaned_count);
514 cleaned_count = 0;
515 }
516
517 rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
518
519 if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD))
520 break;
521
522 /* This memory barrier is needed to keep us from reading
523 * any other fields out of the rx_desc until we know the
524 * RXD_STATUS_DD bit is set
525 */
526 rmb();
527
528 /* retrieve a buffer from the ring */
529 skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
530
531 /* exit if we failed to retrieve a buffer */
532 if (!skb)
533 break;
534
535 cleaned_count++;
536
537 /* fetch next buffer in frame if non-eop */
538 if (fm10k_is_non_eop(rx_ring, rx_desc))
539 continue;
540
541 /* verify the packet layout is correct */
542 if (fm10k_cleanup_headers(rx_ring, rx_desc, skb)) {
543 skb = NULL;
544 continue;
545 }
546
547 /* populate checksum, timestamp, VLAN, and protocol */
548 total_bytes += fm10k_process_skb_fields(rx_ring, rx_desc, skb);
549
550 fm10k_receive_skb(q_vector, skb);
551
552 /* reset skb pointer */
553 skb = NULL;
554
555 /* update budget accounting */
556 total_packets++;
557 } while (likely(total_packets < budget));
558
559 /* place incomplete frames back on ring for completion */
560 rx_ring->skb = skb;
561
562 u64_stats_update_begin(&rx_ring->syncp);
563 rx_ring->stats.packets += total_packets;
564 rx_ring->stats.bytes += total_bytes;
565 u64_stats_update_end(&rx_ring->syncp);
566 q_vector->rx.total_packets += total_packets;
567 q_vector->rx.total_bytes += total_bytes;
568
569 return total_packets < budget;
570}
571
572static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring,
573 struct fm10k_tx_desc *tx_desc, u16 i,
574 dma_addr_t dma, unsigned int size, u8 desc_flags)
575{
576 /* set RS and INT for last frame in a cache line */
577 if ((++i & (FM10K_TXD_WB_FIFO_SIZE - 1)) == 0)
578 desc_flags |= FM10K_TXD_FLAG_RS | FM10K_TXD_FLAG_INT;
579
580 /* record values to descriptor */
581 tx_desc->buffer_addr = cpu_to_le64(dma);
582 tx_desc->flags = desc_flags;
583 tx_desc->buflen = cpu_to_le16(size);
584
585 /* return true if we just wrapped the ring */
586 return i == tx_ring->count;
587}
588
589static void fm10k_tx_map(struct fm10k_ring *tx_ring,
590 struct fm10k_tx_buffer *first)
591{
592 struct sk_buff *skb = first->skb;
593 struct fm10k_tx_buffer *tx_buffer;
594 struct fm10k_tx_desc *tx_desc;
595 struct skb_frag_struct *frag;
596 unsigned char *data;
597 dma_addr_t dma;
598 unsigned int data_len, size;
599 u16 i = tx_ring->next_to_use;
600 u8 flags = 0;
601
602 tx_desc = FM10K_TX_DESC(tx_ring, i);
603
604 /* add HW VLAN tag */
605 if (vlan_tx_tag_present(skb))
606 tx_desc->vlan = cpu_to_le16(vlan_tx_tag_get(skb));
607 else
608 tx_desc->vlan = 0;
609
610 size = skb_headlen(skb);
611 data = skb->data;
612
613 dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
614
615 data_len = skb->data_len;
616 tx_buffer = first;
617
618 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
619 if (dma_mapping_error(tx_ring->dev, dma))
620 goto dma_error;
621
622 /* record length, and DMA address */
623 dma_unmap_len_set(tx_buffer, len, size);
624 dma_unmap_addr_set(tx_buffer, dma, dma);
625
626 while (unlikely(size > FM10K_MAX_DATA_PER_TXD)) {
627 if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++, dma,
628 FM10K_MAX_DATA_PER_TXD, flags)) {
629 tx_desc = FM10K_TX_DESC(tx_ring, 0);
630 i = 0;
631 }
632
633 dma += FM10K_MAX_DATA_PER_TXD;
634 size -= FM10K_MAX_DATA_PER_TXD;
635 }
636
637 if (likely(!data_len))
638 break;
639
640 if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++,
641 dma, size, flags)) {
642 tx_desc = FM10K_TX_DESC(tx_ring, 0);
643 i = 0;
644 }
645
646 size = skb_frag_size(frag);
647 data_len -= size;
648
649 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
650 DMA_TO_DEVICE);
651
652 tx_buffer = &tx_ring->tx_buffer[i];
653 }
654
655 /* write last descriptor with LAST bit set */
656 flags |= FM10K_TXD_FLAG_LAST;
657
658 if (fm10k_tx_desc_push(tx_ring, tx_desc, i++, dma, size, flags))
659 i = 0;
660
661 /* record bytecount for BQL */
662 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
663
664 /* record SW timestamp if HW timestamp is not available */
665 skb_tx_timestamp(first->skb);
666
667 /* Force memory writes to complete before letting h/w know there
668 * are new descriptors to fetch. (Only applicable for weak-ordered
669 * memory model archs, such as IA-64).
670 *
671 * We also need this memory barrier to make certain all of the
672 * status bits have been updated before next_to_watch is written.
673 */
674 wmb();
675
676 /* set next_to_watch value indicating a packet is present */
677 first->next_to_watch = tx_desc;
678
679 tx_ring->next_to_use = i;
680
681 /* notify HW of packet */
682 writel(i, tx_ring->tail);
683
684 /* we need this if more than one processor can write to our tail
685 * at a time, it synchronizes IO on IA64/Altix systems
686 */
687 mmiowb();
688
689 return;
690dma_error:
691 dev_err(tx_ring->dev, "TX DMA map failed\n");
692
693 /* clear dma mappings for failed tx_buffer map */
694 for (;;) {
695 tx_buffer = &tx_ring->tx_buffer[i];
696 fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer);
697 if (tx_buffer == first)
698 break;
699 if (i == 0)
700 i = tx_ring->count;
701 i--;
702 }
703
704 tx_ring->next_to_use = i;
705}
706
707static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
708{
709 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
710
711 smp_mb();
712
713 /* We need to check again in a case another CPU has just
714 * made room available. */
715 if (likely(fm10k_desc_unused(tx_ring) < size))
716 return -EBUSY;
717
718 /* A reprieve! - use start_queue because it doesn't call schedule */
719 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
720 ++tx_ring->tx_stats.restart_queue;
721 return 0;
722}
723
724static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
725{
726 if (likely(fm10k_desc_unused(tx_ring) >= size))
727 return 0;
728 return __fm10k_maybe_stop_tx(tx_ring, size);
729}
730
731netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
732 struct fm10k_ring *tx_ring)
733{
734 struct fm10k_tx_buffer *first;
735 u32 tx_flags = 0;
736#if PAGE_SIZE > FM10K_MAX_DATA_PER_TXD
737 unsigned short f;
738#endif
739 u16 count = TXD_USE_COUNT(skb_headlen(skb));
740
741 /* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD,
742 * + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD,
743 * + 2 desc gap to keep tail from touching head
744 * otherwise try next time
745 */
746#if PAGE_SIZE > FM10K_MAX_DATA_PER_TXD
747 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
748 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
749#else
750 count += skb_shinfo(skb)->nr_frags;
751#endif
752 if (fm10k_maybe_stop_tx(tx_ring, count + 3)) {
753 tx_ring->tx_stats.tx_busy++;
754 return NETDEV_TX_BUSY;
755 }
756
757 /* record the location of the first descriptor for this packet */
758 first = &tx_ring->tx_buffer[tx_ring->next_to_use];
759 first->skb = skb;
760 first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
761 first->gso_segs = 1;
762
763 /* record initial flags and protocol */
764 first->tx_flags = tx_flags;
765
766 fm10k_tx_map(tx_ring, first);
767
768 fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED);
769
770 return NETDEV_TX_OK;
771}
772
773static u64 fm10k_get_tx_completed(struct fm10k_ring *ring)
774{
775 return ring->stats.packets;
776}
777
778static u64 fm10k_get_tx_pending(struct fm10k_ring *ring)
779{
780 /* use SW head and tail until we have real hardware */
781 u32 head = ring->next_to_clean;
782 u32 tail = ring->next_to_use;
783
784 return ((head <= tail) ? tail : tail + ring->count) - head;
785}
786
787bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring)
788{
789 u32 tx_done = fm10k_get_tx_completed(tx_ring);
790 u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
791 u32 tx_pending = fm10k_get_tx_pending(tx_ring);
792
793 clear_check_for_tx_hang(tx_ring);
794
795 /* Check for a hung queue, but be thorough. This verifies
796 * that a transmit has been completed since the previous
797 * check AND there is at least one packet pending. By
798 * requiring this to fail twice we avoid races with
799 * clearing the ARMED bit and conditions where we
800 * run the check_tx_hang logic with a transmit completion
801 * pending but without time to complete it yet.
802 */
803 if (!tx_pending || (tx_done_old != tx_done)) {
804 /* update completed stats and continue */
805 tx_ring->tx_stats.tx_done_old = tx_done;
806 /* reset the countdown */
807 clear_bit(__FM10K_HANG_CHECK_ARMED, &tx_ring->state);
808
809 return false;
810 }
811
812 /* make sure it is true for two checks in a row */
813 return test_and_set_bit(__FM10K_HANG_CHECK_ARMED, &tx_ring->state);
814}
815
816/**
817 * fm10k_tx_timeout_reset - initiate reset due to Tx timeout
818 * @interface: driver private struct
819 **/
820void fm10k_tx_timeout_reset(struct fm10k_intfc *interface)
821{
822 /* Do the reset outside of interrupt context */
823 if (!test_bit(__FM10K_DOWN, &interface->state)) {
824 netdev_err(interface->netdev, "Reset interface\n");
825 interface->tx_timeout_count++;
826 interface->flags |= FM10K_FLAG_RESET_REQUESTED;
827 fm10k_service_event_schedule(interface);
828 }
829}
830
831/**
832 * fm10k_clean_tx_irq - Reclaim resources after transmit completes
833 * @q_vector: structure containing interrupt and ring information
834 * @tx_ring: tx ring to clean
835 **/
836static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector,
837 struct fm10k_ring *tx_ring)
838{
839 struct fm10k_intfc *interface = q_vector->interface;
840 struct fm10k_tx_buffer *tx_buffer;
841 struct fm10k_tx_desc *tx_desc;
842 unsigned int total_bytes = 0, total_packets = 0;
843 unsigned int budget = q_vector->tx.work_limit;
844 unsigned int i = tx_ring->next_to_clean;
845
846 if (test_bit(__FM10K_DOWN, &interface->state))
847 return true;
848
849 tx_buffer = &tx_ring->tx_buffer[i];
850 tx_desc = FM10K_TX_DESC(tx_ring, i);
851 i -= tx_ring->count;
852
853 do {
854 struct fm10k_tx_desc *eop_desc = tx_buffer->next_to_watch;
855
856 /* if next_to_watch is not set then there is no work pending */
857 if (!eop_desc)
858 break;
859
860 /* prevent any other reads prior to eop_desc */
861 read_barrier_depends();
862
863 /* if DD is not set pending work has not been completed */
864 if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE))
865 break;
866
867 /* clear next_to_watch to prevent false hangs */
868 tx_buffer->next_to_watch = NULL;
869
870 /* update the statistics for this packet */
871 total_bytes += tx_buffer->bytecount;
872 total_packets += tx_buffer->gso_segs;
873
874 /* free the skb */
875 dev_consume_skb_any(tx_buffer->skb);
876
877 /* unmap skb header data */
878 dma_unmap_single(tx_ring->dev,
879 dma_unmap_addr(tx_buffer, dma),
880 dma_unmap_len(tx_buffer, len),
881 DMA_TO_DEVICE);
882
883 /* clear tx_buffer data */
884 tx_buffer->skb = NULL;
885 dma_unmap_len_set(tx_buffer, len, 0);
886
887 /* unmap remaining buffers */
888 while (tx_desc != eop_desc) {
889 tx_buffer++;
890 tx_desc++;
891 i++;
892 if (unlikely(!i)) {
893 i -= tx_ring->count;
894 tx_buffer = tx_ring->tx_buffer;
895 tx_desc = FM10K_TX_DESC(tx_ring, 0);
896 }
897
898 /* unmap any remaining paged data */
899 if (dma_unmap_len(tx_buffer, len)) {
900 dma_unmap_page(tx_ring->dev,
901 dma_unmap_addr(tx_buffer, dma),
902 dma_unmap_len(tx_buffer, len),
903 DMA_TO_DEVICE);
904 dma_unmap_len_set(tx_buffer, len, 0);
905 }
906 }
907
908 /* move us one more past the eop_desc for start of next pkt */
909 tx_buffer++;
910 tx_desc++;
911 i++;
912 if (unlikely(!i)) {
913 i -= tx_ring->count;
914 tx_buffer = tx_ring->tx_buffer;
915 tx_desc = FM10K_TX_DESC(tx_ring, 0);
916 }
917
918 /* issue prefetch for next Tx descriptor */
919 prefetch(tx_desc);
920
921 /* update budget accounting */
922 budget--;
923 } while (likely(budget));
924
925 i += tx_ring->count;
926 tx_ring->next_to_clean = i;
927 u64_stats_update_begin(&tx_ring->syncp);
928 tx_ring->stats.bytes += total_bytes;
929 tx_ring->stats.packets += total_packets;
930 u64_stats_update_end(&tx_ring->syncp);
931 q_vector->tx.total_bytes += total_bytes;
932 q_vector->tx.total_packets += total_packets;
933
934 if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) {
935 /* schedule immediate reset if we believe we hung */
936 struct fm10k_hw *hw = &interface->hw;
937
938 netif_err(interface, drv, tx_ring->netdev,
939 "Detected Tx Unit Hang\n"
940 " Tx Queue <%d>\n"
941 " TDH, TDT <%x>, <%x>\n"
942 " next_to_use <%x>\n"
943 " next_to_clean <%x>\n",
944 tx_ring->queue_index,
945 fm10k_read_reg(hw, FM10K_TDH(tx_ring->reg_idx)),
946 fm10k_read_reg(hw, FM10K_TDT(tx_ring->reg_idx)),
947 tx_ring->next_to_use, i);
948
949 netif_stop_subqueue(tx_ring->netdev,
950 tx_ring->queue_index);
951
952 netif_info(interface, probe, tx_ring->netdev,
953 "tx hang %d detected on queue %d, resetting interface\n",
954 interface->tx_timeout_count + 1,
955 tx_ring->queue_index);
956
957 fm10k_tx_timeout_reset(interface);
958
959 /* the netdev is about to reset, no point in enabling stuff */
960 return true;
961 }
962
963 /* notify netdev of completed buffers */
964 netdev_tx_completed_queue(txring_txq(tx_ring),
965 total_packets, total_bytes);
966
967#define TX_WAKE_THRESHOLD min_t(u16, FM10K_MIN_TXD - 1, DESC_NEEDED * 2)
968 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
969 (fm10k_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
970 /* Make sure that anybody stopping the queue after this
971 * sees the new next_to_clean.
972 */
973 smp_mb();
974 if (__netif_subqueue_stopped(tx_ring->netdev,
975 tx_ring->queue_index) &&
976 !test_bit(__FM10K_DOWN, &interface->state)) {
977 netif_wake_subqueue(tx_ring->netdev,
978 tx_ring->queue_index);
979 ++tx_ring->tx_stats.restart_queue;
980 }
981 }
982
983 return !!budget;
984}
985
70/** 986/**
71 * fm10k_update_itr - update the dynamic ITR value based on packet size 987 * fm10k_update_itr - update the dynamic ITR value based on packet size
72 * 988 *
@@ -137,6 +1053,28 @@ static int fm10k_poll(struct napi_struct *napi, int budget)
137{ 1053{
138 struct fm10k_q_vector *q_vector = 1054 struct fm10k_q_vector *q_vector =
139 container_of(napi, struct fm10k_q_vector, napi); 1055 container_of(napi, struct fm10k_q_vector, napi);
1056 struct fm10k_ring *ring;
1057 int per_ring_budget;
1058 bool clean_complete = true;
1059
1060 fm10k_for_each_ring(ring, q_vector->tx)
1061 clean_complete &= fm10k_clean_tx_irq(q_vector, ring);
1062
1063 /* attempt to distribute budget to each queue fairly, but don't
1064 * allow the budget to go below 1 because we'll exit polling
1065 */
1066 if (q_vector->rx.count > 1)
1067 per_ring_budget = max(budget/q_vector->rx.count, 1);
1068 else
1069 per_ring_budget = budget;
1070
1071 fm10k_for_each_ring(ring, q_vector->rx)
1072 clean_complete &= fm10k_clean_rx_irq(q_vector, ring,
1073 per_ring_budget);
1074
1075 /* If all work not completed, return budget and keep polling */
1076 if (!clean_complete)
1077 return budget;
140 1078
141 /* all work done, exit the polling mode */ 1079 /* all work done, exit the polling mode */
142 napi_complete(napi); 1080 napi_complete(napi);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 52673d78de8e..46df65856156 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -451,8 +451,66 @@ int fm10k_close(struct net_device *netdev)
451 451
452static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) 452static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
453{ 453{
454 dev_kfree_skb_any(skb); 454 struct fm10k_intfc *interface = netdev_priv(dev);
455 return NETDEV_TX_OK; 455 unsigned int r_idx = 0;
456 int err;
457
458 if ((skb->protocol == htons(ETH_P_8021Q)) &&
459 !vlan_tx_tag_present(skb)) {
460 /* FM10K only supports hardware tagging, any tags in frame
461 * are considered 2nd level or "outer" tags
462 */
463 struct vlan_hdr *vhdr;
464 __be16 proto;
465
466 /* make sure skb is not shared */
467 skb = skb_share_check(skb, GFP_ATOMIC);
468 if (!skb)
469 return NETDEV_TX_OK;
470
471 /* make sure there is enough room to move the ethernet header */
472 if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
473 return NETDEV_TX_OK;
474
475 /* verify the skb head is not shared */
476 err = skb_cow_head(skb, 0);
477 if (err)
478 return NETDEV_TX_OK;
479
480 /* locate vlan header */
481 vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
482
483 /* pull the 2 key pieces of data out of it */
484 __vlan_hwaccel_put_tag(skb,
485 htons(ETH_P_8021Q),
486 ntohs(vhdr->h_vlan_TCI));
487 proto = vhdr->h_vlan_encapsulated_proto;
488 skb->protocol = (ntohs(proto) >= 1536) ? proto :
489 htons(ETH_P_802_2);
490
491 /* squash it by moving the ethernet addresses up 4 bytes */
492 memmove(skb->data + VLAN_HLEN, skb->data, 12);
493 __skb_pull(skb, VLAN_HLEN);
494 skb_reset_mac_header(skb);
495 }
496
497 /* The minimum packet size for a single buffer is 17B so pad the skb
498 * in order to meet this minimum size requirement.
499 */
500 if (unlikely(skb->len < 17)) {
501 int pad_len = 17 - skb->len;
502
503 if (skb_pad(skb, pad_len))
504 return NETDEV_TX_OK;
505 __skb_put(skb, pad_len);
506 }
507
508 if (r_idx >= interface->num_tx_queues)
509 r_idx %= interface->num_tx_queues;
510
511 err = fm10k_xmit_frame_ring(skb, interface->tx_ring[r_idx]);
512
513 return err;
456} 514}
457 515
458static int fm10k_change_mtu(struct net_device *dev, int new_mtu) 516static int fm10k_change_mtu(struct net_device *dev, int new_mtu)
@@ -465,6 +523,37 @@ static int fm10k_change_mtu(struct net_device *dev, int new_mtu)
465 return 0; 523 return 0;
466} 524}
467 525
526/**
527 * fm10k_tx_timeout - Respond to a Tx Hang
528 * @netdev: network interface device structure
529 **/
530static void fm10k_tx_timeout(struct net_device *netdev)
531{
532 struct fm10k_intfc *interface = netdev_priv(netdev);
533 bool real_tx_hang = false;
534 int i;
535
536#define TX_TIMEO_LIMIT 16000
537 for (i = 0; i < interface->num_tx_queues; i++) {
538 struct fm10k_ring *tx_ring = interface->tx_ring[i];
539
540 if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring))
541 real_tx_hang = true;
542 }
543
544 if (real_tx_hang) {
545 fm10k_tx_timeout_reset(interface);
546 } else {
547 netif_info(interface, drv, netdev,
548 "Fake Tx hang detected with timeout of %d seconds\n",
549 netdev->watchdog_timeo/HZ);
550
551 /* fake Tx hang - increase the kernel timeout */
552 if (netdev->watchdog_timeo < TX_TIMEO_LIMIT)
553 netdev->watchdog_timeo *= 2;
554 }
555}
556
468static int fm10k_uc_vlan_unsync(struct net_device *netdev, 557static int fm10k_uc_vlan_unsync(struct net_device *netdev,
469 const unsigned char *uc_addr) 558 const unsigned char *uc_addr)
470{ 559{
@@ -891,6 +980,7 @@ static const struct net_device_ops fm10k_netdev_ops = {
891 .ndo_start_xmit = fm10k_xmit_frame, 980 .ndo_start_xmit = fm10k_xmit_frame,
892 .ndo_set_mac_address = fm10k_set_mac, 981 .ndo_set_mac_address = fm10k_set_mac,
893 .ndo_change_mtu = fm10k_change_mtu, 982 .ndo_change_mtu = fm10k_change_mtu,
983 .ndo_tx_timeout = fm10k_tx_timeout,
894 .ndo_vlan_rx_add_vid = fm10k_vlan_rx_add_vid, 984 .ndo_vlan_rx_add_vid = fm10k_vlan_rx_add_vid,
895 .ndo_vlan_rx_kill_vid = fm10k_vlan_rx_kill_vid, 985 .ndo_vlan_rx_kill_vid = fm10k_vlan_rx_kill_vid,
896 .ndo_set_rx_mode = fm10k_set_rx_mode, 986 .ndo_set_rx_mode = fm10k_set_rx_mode,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 7529a8498da9..229c7e491251 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -661,6 +661,9 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface,
661 661
662 /* enable queue */ 662 /* enable queue */
663 fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl); 663 fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl);
664
665 /* place buffers on ring for receive data */
666 fm10k_alloc_rx_buffers(ring, fm10k_desc_unused(ring));
664} 667}
665 668
666/** 669/**