aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@intel.com>2014-09-20 19:50:03 -0400
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2014-09-23 06:59:18 -0400
commitb101c9626477b1f9d30f31d11442a9cc80c98d7c (patch)
treecb9bf9d2994c65e6011bec82fa596187cb617c67 /drivers/net
parent3abaae42e1bf686bf5c43063a00b0f4ddbb14373 (diff)
fm10k: Add transmit and receive fastpath and interrupt handlers
This change adds the transmit and receive fastpath and interrupt handlers. With this code in place the network device is now able to send and receive frames over the network interface using a single queue. Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com> CC: Rick Jones <rick.jones2@hp.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k.h5
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c938
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c94
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pci.c3
4 files changed, 1038 insertions, 2 deletions
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index a0e833135449..257287c07279 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -391,6 +391,11 @@ extern char fm10k_driver_name[];
391extern const char fm10k_driver_version[]; 391extern const char fm10k_driver_version[];
392int fm10k_init_queueing_scheme(struct fm10k_intfc *interface); 392int fm10k_init_queueing_scheme(struct fm10k_intfc *interface);
393void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface); 393void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface);
394netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
395 struct fm10k_ring *tx_ring);
396void fm10k_tx_timeout_reset(struct fm10k_intfc *interface);
397bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring);
398void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count);
394 399
395/* PCI */ 400/* PCI */
396void fm10k_mbx_free_irq(struct fm10k_intfc *); 401void fm10k_mbx_free_irq(struct fm10k_intfc *);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index bf84c263df0e..f7220d841336 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -24,6 +24,7 @@
24#include <net/ip.h> 24#include <net/ip.h>
25#include <net/tcp.h> 25#include <net/tcp.h>
26#include <linux/if_macvlan.h> 26#include <linux/if_macvlan.h>
27#include <linux/prefetch.h>
27 28
28#include "fm10k.h" 29#include "fm10k.h"
29 30
@@ -67,6 +68,921 @@ static void __exit fm10k_exit_module(void)
67} 68}
68module_exit(fm10k_exit_module); 69module_exit(fm10k_exit_module);
69 70
71static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring,
72 struct fm10k_rx_buffer *bi)
73{
74 struct page *page = bi->page;
75 dma_addr_t dma;
76
77 /* Only page will be NULL if buffer was consumed */
78 if (likely(page))
79 return true;
80
81 /* alloc new page for storage */
82 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
83 if (unlikely(!page)) {
84 rx_ring->rx_stats.alloc_failed++;
85 return false;
86 }
87
88 /* map page for use */
89 dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
90
91 /* if mapping failed free memory back to system since
92 * there isn't much point in holding memory we can't use
93 */
94 if (dma_mapping_error(rx_ring->dev, dma)) {
95 __free_page(page);
96 bi->page = NULL;
97
98 rx_ring->rx_stats.alloc_failed++;
99 return false;
100 }
101
102 bi->dma = dma;
103 bi->page = page;
104 bi->page_offset = 0;
105
106 return true;
107}
108
109/**
110 * fm10k_alloc_rx_buffers - Replace used receive buffers
111 * @rx_ring: ring to place buffers on
112 * @cleaned_count: number of buffers to replace
113 **/
114void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count)
115{
116 union fm10k_rx_desc *rx_desc;
117 struct fm10k_rx_buffer *bi;
118 u16 i = rx_ring->next_to_use;
119
120 /* nothing to do */
121 if (!cleaned_count)
122 return;
123
124 rx_desc = FM10K_RX_DESC(rx_ring, i);
125 bi = &rx_ring->rx_buffer[i];
126 i -= rx_ring->count;
127
128 do {
129 if (!fm10k_alloc_mapped_page(rx_ring, bi))
130 break;
131
132 /* Refresh the desc even if buffer_addrs didn't change
133 * because each write-back erases this info.
134 */
135 rx_desc->q.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
136
137 rx_desc++;
138 bi++;
139 i++;
140 if (unlikely(!i)) {
141 rx_desc = FM10K_RX_DESC(rx_ring, 0);
142 bi = rx_ring->rx_buffer;
143 i -= rx_ring->count;
144 }
145
146 /* clear the hdr_addr for the next_to_use descriptor */
147 rx_desc->q.hdr_addr = 0;
148
149 cleaned_count--;
150 } while (cleaned_count);
151
152 i += rx_ring->count;
153
154 if (rx_ring->next_to_use != i) {
155 /* record the next descriptor to use */
156 rx_ring->next_to_use = i;
157
158 /* update next to alloc since we have filled the ring */
159 rx_ring->next_to_alloc = i;
160
161 /* Force memory writes to complete before letting h/w
162 * know there are new descriptors to fetch. (Only
163 * applicable for weak-ordered memory model archs,
164 * such as IA-64).
165 */
166 wmb();
167
168 /* notify hardware of new descriptors */
169 writel(i, rx_ring->tail);
170 }
171}
172
173/**
174 * fm10k_reuse_rx_page - page flip buffer and store it back on the ring
175 * @rx_ring: rx descriptor ring to store buffers on
176 * @old_buff: donor buffer to have page reused
177 *
178 * Synchronizes page for reuse by the interface
179 **/
180static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring,
181 struct fm10k_rx_buffer *old_buff)
182{
183 struct fm10k_rx_buffer *new_buff;
184 u16 nta = rx_ring->next_to_alloc;
185
186 new_buff = &rx_ring->rx_buffer[nta];
187
188 /* update, and store next to alloc */
189 nta++;
190 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
191
192 /* transfer page from old buffer to new buffer */
193 memcpy(new_buff, old_buff, sizeof(struct fm10k_rx_buffer));
194
195 /* sync the buffer for use by the device */
196 dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
197 old_buff->page_offset,
198 FM10K_RX_BUFSZ,
199 DMA_FROM_DEVICE);
200}
201
202static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer,
203 struct page *page,
204 unsigned int truesize)
205{
206 /* avoid re-using remote pages */
207 if (unlikely(page_to_nid(page) != numa_mem_id()))
208 return false;
209
210#if (PAGE_SIZE < 8192)
211 /* if we are only owner of page we can reuse it */
212 if (unlikely(page_count(page) != 1))
213 return false;
214
215 /* flip page offset to other buffer */
216 rx_buffer->page_offset ^= FM10K_RX_BUFSZ;
217
218 /* since we are the only owner of the page and we need to
219 * increment it, just set the value to 2 in order to avoid
220 * an unnecessary locked operation
221 */
222 atomic_set(&page->_count, 2);
223#else
224 /* move offset up to the next cache line */
225 rx_buffer->page_offset += truesize;
226
227 if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ))
228 return false;
229
230 /* bump ref count on page before it is given to the stack */
231 get_page(page);
232#endif
233
234 return true;
235}
236
237/**
238 * fm10k_add_rx_frag - Add contents of Rx buffer to sk_buff
239 * @rx_ring: rx descriptor ring to transact packets on
240 * @rx_buffer: buffer containing page to add
241 * @rx_desc: descriptor containing length of buffer written by hardware
242 * @skb: sk_buff to place the data into
243 *
244 * This function will add the data contained in rx_buffer->page to the skb.
245 * This is done either through a direct copy if the data in the buffer is
246 * less than the skb header size, otherwise it will just attach the page as
247 * a frag to the skb.
248 *
249 * The function will then update the page offset if necessary and return
250 * true if the buffer can be reused by the interface.
251 **/
252static bool fm10k_add_rx_frag(struct fm10k_ring *rx_ring,
253 struct fm10k_rx_buffer *rx_buffer,
254 union fm10k_rx_desc *rx_desc,
255 struct sk_buff *skb)
256{
257 struct page *page = rx_buffer->page;
258 unsigned int size = le16_to_cpu(rx_desc->w.length);
259#if (PAGE_SIZE < 8192)
260 unsigned int truesize = FM10K_RX_BUFSZ;
261#else
262 unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
263#endif
264
265 if ((size <= FM10K_RX_HDR_LEN) && !skb_is_nonlinear(skb)) {
266 unsigned char *va = page_address(page) + rx_buffer->page_offset;
267
268 memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
269
270 /* we can reuse buffer as-is, just make sure it is local */
271 if (likely(page_to_nid(page) == numa_mem_id()))
272 return true;
273
274 /* this page cannot be reused so discard it */
275 put_page(page);
276 return false;
277 }
278
279 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
280 rx_buffer->page_offset, size, truesize);
281
282 return fm10k_can_reuse_rx_page(rx_buffer, page, truesize);
283}
284
285static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
286 union fm10k_rx_desc *rx_desc,
287 struct sk_buff *skb)
288{
289 struct fm10k_rx_buffer *rx_buffer;
290 struct page *page;
291
292 rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean];
293
294 page = rx_buffer->page;
295 prefetchw(page);
296
297 if (likely(!skb)) {
298 void *page_addr = page_address(page) +
299 rx_buffer->page_offset;
300
301 /* prefetch first cache line of first page */
302 prefetch(page_addr);
303#if L1_CACHE_BYTES < 128
304 prefetch(page_addr + L1_CACHE_BYTES);
305#endif
306
307 /* allocate a skb to store the frags */
308 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
309 FM10K_RX_HDR_LEN);
310 if (unlikely(!skb)) {
311 rx_ring->rx_stats.alloc_failed++;
312 return NULL;
313 }
314
315 /* we will be copying header into skb->data in
316 * pskb_may_pull so it is in our interest to prefetch
317 * it now to avoid a possible cache miss
318 */
319 prefetchw(skb->data);
320 }
321
322 /* we are reusing so sync this buffer for CPU use */
323 dma_sync_single_range_for_cpu(rx_ring->dev,
324 rx_buffer->dma,
325 rx_buffer->page_offset,
326 FM10K_RX_BUFSZ,
327 DMA_FROM_DEVICE);
328
329 /* pull page into skb */
330 if (fm10k_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
331 /* hand second half of page back to the ring */
332 fm10k_reuse_rx_page(rx_ring, rx_buffer);
333 } else {
334 /* we are not reusing the buffer so unmap it */
335 dma_unmap_page(rx_ring->dev, rx_buffer->dma,
336 PAGE_SIZE, DMA_FROM_DEVICE);
337 }
338
339 /* clear contents of rx_buffer */
340 rx_buffer->page = NULL;
341
342 return skb;
343}
344
345/**
346 * fm10k_process_skb_fields - Populate skb header fields from Rx descriptor
347 * @rx_ring: rx descriptor ring packet is being transacted on
348 * @rx_desc: pointer to the EOP Rx descriptor
349 * @skb: pointer to current skb being populated
350 *
351 * This function checks the ring, descriptor, and packet information in
352 * order to populate the hash, checksum, VLAN, timestamp, protocol, and
353 * other fields within the skb.
354 **/
355static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring,
356 union fm10k_rx_desc *rx_desc,
357 struct sk_buff *skb)
358{
359 unsigned int len = skb->len;
360
361 FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan;
362
363 skb_record_rx_queue(skb, rx_ring->queue_index);
364
365 FM10K_CB(skb)->fi.d.glort = rx_desc->d.glort;
366
367 if (rx_desc->w.vlan) {
368 u16 vid = le16_to_cpu(rx_desc->w.vlan);
369
370 if (vid != rx_ring->vid)
371 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
372 }
373
374 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
375
376 return len;
377}
378
379/**
380 * fm10k_is_non_eop - process handling of non-EOP buffers
381 * @rx_ring: Rx ring being processed
382 * @rx_desc: Rx descriptor for current buffer
383 *
384 * This function updates next to clean. If the buffer is an EOP buffer
385 * this function exits returning false, otherwise it will place the
386 * sk_buff in the next buffer to be chained and return true indicating
387 * that this is in fact a non-EOP buffer.
388 **/
389static bool fm10k_is_non_eop(struct fm10k_ring *rx_ring,
390 union fm10k_rx_desc *rx_desc)
391{
392 u32 ntc = rx_ring->next_to_clean + 1;
393
394 /* fetch, update, and store next to clean */
395 ntc = (ntc < rx_ring->count) ? ntc : 0;
396 rx_ring->next_to_clean = ntc;
397
398 prefetch(FM10K_RX_DESC(rx_ring, ntc));
399
400 if (likely(fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_EOP)))
401 return false;
402
403 return true;
404}
405
406/**
407 * fm10k_pull_tail - fm10k specific version of skb_pull_tail
408 * @rx_ring: rx descriptor ring packet is being transacted on
409 * @rx_desc: pointer to the EOP Rx descriptor
410 * @skb: pointer to current skb being adjusted
411 *
412 * This function is an fm10k specific version of __pskb_pull_tail. The
413 * main difference between this version and the original function is that
414 * this function can make several assumptions about the state of things
415 * that allow for significant optimizations versus the standard function.
416 * As a result we can do things like drop a frag and maintain an accurate
417 * truesize for the skb.
418 */
419static void fm10k_pull_tail(struct fm10k_ring *rx_ring,
420 union fm10k_rx_desc *rx_desc,
421 struct sk_buff *skb)
422{
423 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
424 unsigned char *va;
425 unsigned int pull_len;
426
427 /* it is valid to use page_address instead of kmap since we are
428 * working with pages allocated out of the lomem pool per
429 * alloc_page(GFP_ATOMIC)
430 */
431 va = skb_frag_address(frag);
432
433 /* we need the header to contain the greater of either ETH_HLEN or
434 * 60 bytes if the skb->len is less than 60 for skb_pad.
435 */
436 pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN);
437
438 /* align pull length to size of long to optimize memcpy performance */
439 skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
440
441 /* update all of the pointers */
442 skb_frag_size_sub(frag, pull_len);
443 frag->page_offset += pull_len;
444 skb->data_len -= pull_len;
445 skb->tail += pull_len;
446}
447
448/**
449 * fm10k_cleanup_headers - Correct corrupted or empty headers
450 * @rx_ring: rx descriptor ring packet is being transacted on
451 * @rx_desc: pointer to the EOP Rx descriptor
452 * @skb: pointer to current skb being fixed
453 *
454 * Address the case where we are pulling data in on pages only
455 * and as such no data is present in the skb header.
456 *
457 * In addition if skb is not at least 60 bytes we need to pad it so that
458 * it is large enough to qualify as a valid Ethernet frame.
459 *
460 * Returns true if an error was encountered and skb was freed.
461 **/
462static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring,
463 union fm10k_rx_desc *rx_desc,
464 struct sk_buff *skb)
465{
466 if (unlikely((fm10k_test_staterr(rx_desc,
467 FM10K_RXD_STATUS_RXE)))) {
468 dev_kfree_skb_any(skb);
469 rx_ring->rx_stats.errors++;
470 return true;
471 }
472
473 /* place header in linear portion of buffer */
474 if (skb_is_nonlinear(skb))
475 fm10k_pull_tail(rx_ring, rx_desc, skb);
476
477 /* if skb_pad returns an error the skb was freed */
478 if (unlikely(skb->len < 60)) {
479 int pad_len = 60 - skb->len;
480
481 if (skb_pad(skb, pad_len))
482 return true;
483 __skb_put(skb, pad_len);
484 }
485
486 return false;
487}
488
489/**
490 * fm10k_receive_skb - helper function to handle rx indications
491 * @q_vector: structure containing interrupt and ring information
492 * @skb: packet to send up
493 **/
494static void fm10k_receive_skb(struct fm10k_q_vector *q_vector,
495 struct sk_buff *skb)
496{
497 napi_gro_receive(&q_vector->napi, skb);
498}
499
500static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
501 struct fm10k_ring *rx_ring,
502 int budget)
503{
504 struct sk_buff *skb = rx_ring->skb;
505 unsigned int total_bytes = 0, total_packets = 0;
506 u16 cleaned_count = fm10k_desc_unused(rx_ring);
507
508 do {
509 union fm10k_rx_desc *rx_desc;
510
511 /* return some buffers to hardware, one at a time is too slow */
512 if (cleaned_count >= FM10K_RX_BUFFER_WRITE) {
513 fm10k_alloc_rx_buffers(rx_ring, cleaned_count);
514 cleaned_count = 0;
515 }
516
517 rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
518
519 if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD))
520 break;
521
522 /* This memory barrier is needed to keep us from reading
523 * any other fields out of the rx_desc until we know the
524 * RXD_STATUS_DD bit is set
525 */
526 rmb();
527
528 /* retrieve a buffer from the ring */
529 skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
530
531 /* exit if we failed to retrieve a buffer */
532 if (!skb)
533 break;
534
535 cleaned_count++;
536
537 /* fetch next buffer in frame if non-eop */
538 if (fm10k_is_non_eop(rx_ring, rx_desc))
539 continue;
540
541 /* verify the packet layout is correct */
542 if (fm10k_cleanup_headers(rx_ring, rx_desc, skb)) {
543 skb = NULL;
544 continue;
545 }
546
547 /* populate checksum, timestamp, VLAN, and protocol */
548 total_bytes += fm10k_process_skb_fields(rx_ring, rx_desc, skb);
549
550 fm10k_receive_skb(q_vector, skb);
551
552 /* reset skb pointer */
553 skb = NULL;
554
555 /* update budget accounting */
556 total_packets++;
557 } while (likely(total_packets < budget));
558
559 /* place incomplete frames back on ring for completion */
560 rx_ring->skb = skb;
561
562 u64_stats_update_begin(&rx_ring->syncp);
563 rx_ring->stats.packets += total_packets;
564 rx_ring->stats.bytes += total_bytes;
565 u64_stats_update_end(&rx_ring->syncp);
566 q_vector->rx.total_packets += total_packets;
567 q_vector->rx.total_bytes += total_bytes;
568
569 return total_packets < budget;
570}
571
572static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring,
573 struct fm10k_tx_desc *tx_desc, u16 i,
574 dma_addr_t dma, unsigned int size, u8 desc_flags)
575{
576 /* set RS and INT for last frame in a cache line */
577 if ((++i & (FM10K_TXD_WB_FIFO_SIZE - 1)) == 0)
578 desc_flags |= FM10K_TXD_FLAG_RS | FM10K_TXD_FLAG_INT;
579
580 /* record values to descriptor */
581 tx_desc->buffer_addr = cpu_to_le64(dma);
582 tx_desc->flags = desc_flags;
583 tx_desc->buflen = cpu_to_le16(size);
584
585 /* return true if we just wrapped the ring */
586 return i == tx_ring->count;
587}
588
589static void fm10k_tx_map(struct fm10k_ring *tx_ring,
590 struct fm10k_tx_buffer *first)
591{
592 struct sk_buff *skb = first->skb;
593 struct fm10k_tx_buffer *tx_buffer;
594 struct fm10k_tx_desc *tx_desc;
595 struct skb_frag_struct *frag;
596 unsigned char *data;
597 dma_addr_t dma;
598 unsigned int data_len, size;
599 u16 i = tx_ring->next_to_use;
600 u8 flags = 0;
601
602 tx_desc = FM10K_TX_DESC(tx_ring, i);
603
604 /* add HW VLAN tag */
605 if (vlan_tx_tag_present(skb))
606 tx_desc->vlan = cpu_to_le16(vlan_tx_tag_get(skb));
607 else
608 tx_desc->vlan = 0;
609
610 size = skb_headlen(skb);
611 data = skb->data;
612
613 dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
614
615 data_len = skb->data_len;
616 tx_buffer = first;
617
618 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
619 if (dma_mapping_error(tx_ring->dev, dma))
620 goto dma_error;
621
622 /* record length, and DMA address */
623 dma_unmap_len_set(tx_buffer, len, size);
624 dma_unmap_addr_set(tx_buffer, dma, dma);
625
626 while (unlikely(size > FM10K_MAX_DATA_PER_TXD)) {
627 if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++, dma,
628 FM10K_MAX_DATA_PER_TXD, flags)) {
629 tx_desc = FM10K_TX_DESC(tx_ring, 0);
630 i = 0;
631 }
632
633 dma += FM10K_MAX_DATA_PER_TXD;
634 size -= FM10K_MAX_DATA_PER_TXD;
635 }
636
637 if (likely(!data_len))
638 break;
639
640 if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++,
641 dma, size, flags)) {
642 tx_desc = FM10K_TX_DESC(tx_ring, 0);
643 i = 0;
644 }
645
646 size = skb_frag_size(frag);
647 data_len -= size;
648
649 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
650 DMA_TO_DEVICE);
651
652 tx_buffer = &tx_ring->tx_buffer[i];
653 }
654
655 /* write last descriptor with LAST bit set */
656 flags |= FM10K_TXD_FLAG_LAST;
657
658 if (fm10k_tx_desc_push(tx_ring, tx_desc, i++, dma, size, flags))
659 i = 0;
660
661 /* record bytecount for BQL */
662 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
663
664 /* record SW timestamp if HW timestamp is not available */
665 skb_tx_timestamp(first->skb);
666
667 /* Force memory writes to complete before letting h/w know there
668 * are new descriptors to fetch. (Only applicable for weak-ordered
669 * memory model archs, such as IA-64).
670 *
671 * We also need this memory barrier to make certain all of the
672 * status bits have been updated before next_to_watch is written.
673 */
674 wmb();
675
676 /* set next_to_watch value indicating a packet is present */
677 first->next_to_watch = tx_desc;
678
679 tx_ring->next_to_use = i;
680
681 /* notify HW of packet */
682 writel(i, tx_ring->tail);
683
684 /* we need this if more than one processor can write to our tail
685 * at a time, it synchronizes IO on IA64/Altix systems
686 */
687 mmiowb();
688
689 return;
690dma_error:
691 dev_err(tx_ring->dev, "TX DMA map failed\n");
692
693 /* clear dma mappings for failed tx_buffer map */
694 for (;;) {
695 tx_buffer = &tx_ring->tx_buffer[i];
696 fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer);
697 if (tx_buffer == first)
698 break;
699 if (i == 0)
700 i = tx_ring->count;
701 i--;
702 }
703
704 tx_ring->next_to_use = i;
705}
706
707static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
708{
709 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
710
711 smp_mb();
712
713 /* We need to check again in a case another CPU has just
714 * made room available. */
715 if (likely(fm10k_desc_unused(tx_ring) < size))
716 return -EBUSY;
717
718 /* A reprieve! - use start_queue because it doesn't call schedule */
719 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
720 ++tx_ring->tx_stats.restart_queue;
721 return 0;
722}
723
724static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
725{
726 if (likely(fm10k_desc_unused(tx_ring) >= size))
727 return 0;
728 return __fm10k_maybe_stop_tx(tx_ring, size);
729}
730
731netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
732 struct fm10k_ring *tx_ring)
733{
734 struct fm10k_tx_buffer *first;
735 u32 tx_flags = 0;
736#if PAGE_SIZE > FM10K_MAX_DATA_PER_TXD
737 unsigned short f;
738#endif
739 u16 count = TXD_USE_COUNT(skb_headlen(skb));
740
741 /* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD,
742 * + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD,
743 * + 2 desc gap to keep tail from touching head
744 * otherwise try next time
745 */
746#if PAGE_SIZE > FM10K_MAX_DATA_PER_TXD
747 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
748 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
749#else
750 count += skb_shinfo(skb)->nr_frags;
751#endif
752 if (fm10k_maybe_stop_tx(tx_ring, count + 3)) {
753 tx_ring->tx_stats.tx_busy++;
754 return NETDEV_TX_BUSY;
755 }
756
757 /* record the location of the first descriptor for this packet */
758 first = &tx_ring->tx_buffer[tx_ring->next_to_use];
759 first->skb = skb;
760 first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
761 first->gso_segs = 1;
762
763 /* record initial flags and protocol */
764 first->tx_flags = tx_flags;
765
766 fm10k_tx_map(tx_ring, first);
767
768 fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED);
769
770 return NETDEV_TX_OK;
771}
772
773static u64 fm10k_get_tx_completed(struct fm10k_ring *ring)
774{
775 return ring->stats.packets;
776}
777
778static u64 fm10k_get_tx_pending(struct fm10k_ring *ring)
779{
780 /* use SW head and tail until we have real hardware */
781 u32 head = ring->next_to_clean;
782 u32 tail = ring->next_to_use;
783
784 return ((head <= tail) ? tail : tail + ring->count) - head;
785}
786
787bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring)
788{
789 u32 tx_done = fm10k_get_tx_completed(tx_ring);
790 u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
791 u32 tx_pending = fm10k_get_tx_pending(tx_ring);
792
793 clear_check_for_tx_hang(tx_ring);
794
795 /* Check for a hung queue, but be thorough. This verifies
796 * that a transmit has been completed since the previous
797 * check AND there is at least one packet pending. By
798 * requiring this to fail twice we avoid races with
799 * clearing the ARMED bit and conditions where we
800 * run the check_tx_hang logic with a transmit completion
801 * pending but without time to complete it yet.
802 */
803 if (!tx_pending || (tx_done_old != tx_done)) {
804 /* update completed stats and continue */
805 tx_ring->tx_stats.tx_done_old = tx_done;
806 /* reset the countdown */
807 clear_bit(__FM10K_HANG_CHECK_ARMED, &tx_ring->state);
808
809 return false;
810 }
811
812 /* make sure it is true for two checks in a row */
813 return test_and_set_bit(__FM10K_HANG_CHECK_ARMED, &tx_ring->state);
814}
815
816/**
817 * fm10k_tx_timeout_reset - initiate reset due to Tx timeout
818 * @interface: driver private struct
819 **/
820void fm10k_tx_timeout_reset(struct fm10k_intfc *interface)
821{
822 /* Do the reset outside of interrupt context */
823 if (!test_bit(__FM10K_DOWN, &interface->state)) {
824 netdev_err(interface->netdev, "Reset interface\n");
825 interface->tx_timeout_count++;
826 interface->flags |= FM10K_FLAG_RESET_REQUESTED;
827 fm10k_service_event_schedule(interface);
828 }
829}
830
831/**
832 * fm10k_clean_tx_irq - Reclaim resources after transmit completes
833 * @q_vector: structure containing interrupt and ring information
834 * @tx_ring: tx ring to clean
835 **/
836static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector,
837 struct fm10k_ring *tx_ring)
838{
839 struct fm10k_intfc *interface = q_vector->interface;
840 struct fm10k_tx_buffer *tx_buffer;
841 struct fm10k_tx_desc *tx_desc;
842 unsigned int total_bytes = 0, total_packets = 0;
843 unsigned int budget = q_vector->tx.work_limit;
844 unsigned int i = tx_ring->next_to_clean;
845
846 if (test_bit(__FM10K_DOWN, &interface->state))
847 return true;
848
849 tx_buffer = &tx_ring->tx_buffer[i];
850 tx_desc = FM10K_TX_DESC(tx_ring, i);
851 i -= tx_ring->count;
852
853 do {
854 struct fm10k_tx_desc *eop_desc = tx_buffer->next_to_watch;
855
856 /* if next_to_watch is not set then there is no work pending */
857 if (!eop_desc)
858 break;
859
860 /* prevent any other reads prior to eop_desc */
861 read_barrier_depends();
862
863 /* if DD is not set pending work has not been completed */
864 if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE))
865 break;
866
867 /* clear next_to_watch to prevent false hangs */
868 tx_buffer->next_to_watch = NULL;
869
870 /* update the statistics for this packet */
871 total_bytes += tx_buffer->bytecount;
872 total_packets += tx_buffer->gso_segs;
873
874 /* free the skb */
875 dev_consume_skb_any(tx_buffer->skb);
876
877 /* unmap skb header data */
878 dma_unmap_single(tx_ring->dev,
879 dma_unmap_addr(tx_buffer, dma),
880 dma_unmap_len(tx_buffer, len),
881 DMA_TO_DEVICE);
882
883 /* clear tx_buffer data */
884 tx_buffer->skb = NULL;
885 dma_unmap_len_set(tx_buffer, len, 0);
886
887 /* unmap remaining buffers */
888 while (tx_desc != eop_desc) {
889 tx_buffer++;
890 tx_desc++;
891 i++;
892 if (unlikely(!i)) {
893 i -= tx_ring->count;
894 tx_buffer = tx_ring->tx_buffer;
895 tx_desc = FM10K_TX_DESC(tx_ring, 0);
896 }
897
898 /* unmap any remaining paged data */
899 if (dma_unmap_len(tx_buffer, len)) {
900 dma_unmap_page(tx_ring->dev,
901 dma_unmap_addr(tx_buffer, dma),
902 dma_unmap_len(tx_buffer, len),
903 DMA_TO_DEVICE);
904 dma_unmap_len_set(tx_buffer, len, 0);
905 }
906 }
907
908 /* move us one more past the eop_desc for start of next pkt */
909 tx_buffer++;
910 tx_desc++;
911 i++;
912 if (unlikely(!i)) {
913 i -= tx_ring->count;
914 tx_buffer = tx_ring->tx_buffer;
915 tx_desc = FM10K_TX_DESC(tx_ring, 0);
916 }
917
918 /* issue prefetch for next Tx descriptor */
919 prefetch(tx_desc);
920
921 /* update budget accounting */
922 budget--;
923 } while (likely(budget));
924
925 i += tx_ring->count;
926 tx_ring->next_to_clean = i;
927 u64_stats_update_begin(&tx_ring->syncp);
928 tx_ring->stats.bytes += total_bytes;
929 tx_ring->stats.packets += total_packets;
930 u64_stats_update_end(&tx_ring->syncp);
931 q_vector->tx.total_bytes += total_bytes;
932 q_vector->tx.total_packets += total_packets;
933
934 if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) {
935 /* schedule immediate reset if we believe we hung */
936 struct fm10k_hw *hw = &interface->hw;
937
938 netif_err(interface, drv, tx_ring->netdev,
939 "Detected Tx Unit Hang\n"
940 " Tx Queue <%d>\n"
941 " TDH, TDT <%x>, <%x>\n"
942 " next_to_use <%x>\n"
943 " next_to_clean <%x>\n",
944 tx_ring->queue_index,
945 fm10k_read_reg(hw, FM10K_TDH(tx_ring->reg_idx)),
946 fm10k_read_reg(hw, FM10K_TDT(tx_ring->reg_idx)),
947 tx_ring->next_to_use, i);
948
949 netif_stop_subqueue(tx_ring->netdev,
950 tx_ring->queue_index);
951
952 netif_info(interface, probe, tx_ring->netdev,
953 "tx hang %d detected on queue %d, resetting interface\n",
954 interface->tx_timeout_count + 1,
955 tx_ring->queue_index);
956
957 fm10k_tx_timeout_reset(interface);
958
959 /* the netdev is about to reset, no point in enabling stuff */
960 return true;
961 }
962
963 /* notify netdev of completed buffers */
964 netdev_tx_completed_queue(txring_txq(tx_ring),
965 total_packets, total_bytes);
966
967#define TX_WAKE_THRESHOLD min_t(u16, FM10K_MIN_TXD - 1, DESC_NEEDED * 2)
968 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
969 (fm10k_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
970 /* Make sure that anybody stopping the queue after this
971 * sees the new next_to_clean.
972 */
973 smp_mb();
974 if (__netif_subqueue_stopped(tx_ring->netdev,
975 tx_ring->queue_index) &&
976 !test_bit(__FM10K_DOWN, &interface->state)) {
977 netif_wake_subqueue(tx_ring->netdev,
978 tx_ring->queue_index);
979 ++tx_ring->tx_stats.restart_queue;
980 }
981 }
982
983 return !!budget;
984}
985
70/** 986/**
71 * fm10k_update_itr - update the dynamic ITR value based on packet size 987 * fm10k_update_itr - update the dynamic ITR value based on packet size
72 * 988 *
@@ -137,6 +1053,28 @@ static int fm10k_poll(struct napi_struct *napi, int budget)
137{ 1053{
138 struct fm10k_q_vector *q_vector = 1054 struct fm10k_q_vector *q_vector =
139 container_of(napi, struct fm10k_q_vector, napi); 1055 container_of(napi, struct fm10k_q_vector, napi);
1056 struct fm10k_ring *ring;
1057 int per_ring_budget;
1058 bool clean_complete = true;
1059
1060 fm10k_for_each_ring(ring, q_vector->tx)
1061 clean_complete &= fm10k_clean_tx_irq(q_vector, ring);
1062
1063 /* attempt to distribute budget to each queue fairly, but don't
1064 * allow the budget to go below 1 because we'll exit polling
1065 */
1066 if (q_vector->rx.count > 1)
1067 per_ring_budget = max(budget/q_vector->rx.count, 1);
1068 else
1069 per_ring_budget = budget;
1070
1071 fm10k_for_each_ring(ring, q_vector->rx)
1072 clean_complete &= fm10k_clean_rx_irq(q_vector, ring,
1073 per_ring_budget);
1074
1075 /* If all work not completed, return budget and keep polling */
1076 if (!clean_complete)
1077 return budget;
140 1078
141 /* all work done, exit the polling mode */ 1079 /* all work done, exit the polling mode */
142 napi_complete(napi); 1080 napi_complete(napi);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 52673d78de8e..46df65856156 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -451,8 +451,66 @@ int fm10k_close(struct net_device *netdev)
451 451
452static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) 452static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
453{ 453{
454 dev_kfree_skb_any(skb); 454 struct fm10k_intfc *interface = netdev_priv(dev);
455 return NETDEV_TX_OK; 455 unsigned int r_idx = 0;
456 int err;
457
458 if ((skb->protocol == htons(ETH_P_8021Q)) &&
459 !vlan_tx_tag_present(skb)) {
460 /* FM10K only supports hardware tagging, any tags in frame
461 * are considered 2nd level or "outer" tags
462 */
463 struct vlan_hdr *vhdr;
464 __be16 proto;
465
466 /* make sure skb is not shared */
467 skb = skb_share_check(skb, GFP_ATOMIC);
468 if (!skb)
469 return NETDEV_TX_OK;
470
471 /* make sure there is enough room to move the ethernet header */
472 if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
473 return NETDEV_TX_OK;
474
475 /* verify the skb head is not shared */
476 err = skb_cow_head(skb, 0);
477 if (err)
478 return NETDEV_TX_OK;
479
480 /* locate vlan header */
481 vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
482
483 /* pull the 2 key pieces of data out of it */
484 __vlan_hwaccel_put_tag(skb,
485 htons(ETH_P_8021Q),
486 ntohs(vhdr->h_vlan_TCI));
487 proto = vhdr->h_vlan_encapsulated_proto;
488 skb->protocol = (ntohs(proto) >= 1536) ? proto :
489 htons(ETH_P_802_2);
490
491 /* squash it by moving the ethernet addresses up 4 bytes */
492 memmove(skb->data + VLAN_HLEN, skb->data, 12);
493 __skb_pull(skb, VLAN_HLEN);
494 skb_reset_mac_header(skb);
495 }
496
497 /* The minimum packet size for a single buffer is 17B so pad the skb
498 * in order to meet this minimum size requirement.
499 */
500 if (unlikely(skb->len < 17)) {
501 int pad_len = 17 - skb->len;
502
503 if (skb_pad(skb, pad_len))
504 return NETDEV_TX_OK;
505 __skb_put(skb, pad_len);
506 }
507
508 if (r_idx >= interface->num_tx_queues)
509 r_idx %= interface->num_tx_queues;
510
511 err = fm10k_xmit_frame_ring(skb, interface->tx_ring[r_idx]);
512
513 return err;
456} 514}
457 515
458static int fm10k_change_mtu(struct net_device *dev, int new_mtu) 516static int fm10k_change_mtu(struct net_device *dev, int new_mtu)
@@ -465,6 +523,37 @@ static int fm10k_change_mtu(struct net_device *dev, int new_mtu)
465 return 0; 523 return 0;
466} 524}
467 525
526/**
527 * fm10k_tx_timeout - Respond to a Tx Hang
528 * @netdev: network interface device structure
529 **/
530static void fm10k_tx_timeout(struct net_device *netdev)
531{
532 struct fm10k_intfc *interface = netdev_priv(netdev);
533 bool real_tx_hang = false;
534 int i;
535
536#define TX_TIMEO_LIMIT 16000
537 for (i = 0; i < interface->num_tx_queues; i++) {
538 struct fm10k_ring *tx_ring = interface->tx_ring[i];
539
540 if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring))
541 real_tx_hang = true;
542 }
543
544 if (real_tx_hang) {
545 fm10k_tx_timeout_reset(interface);
546 } else {
547 netif_info(interface, drv, netdev,
548 "Fake Tx hang detected with timeout of %d seconds\n",
549 netdev->watchdog_timeo/HZ);
550
551 /* fake Tx hang - increase the kernel timeout */
552 if (netdev->watchdog_timeo < TX_TIMEO_LIMIT)
553 netdev->watchdog_timeo *= 2;
554 }
555}
556
468static int fm10k_uc_vlan_unsync(struct net_device *netdev, 557static int fm10k_uc_vlan_unsync(struct net_device *netdev,
469 const unsigned char *uc_addr) 558 const unsigned char *uc_addr)
470{ 559{
@@ -891,6 +980,7 @@ static const struct net_device_ops fm10k_netdev_ops = {
891 .ndo_start_xmit = fm10k_xmit_frame, 980 .ndo_start_xmit = fm10k_xmit_frame,
892 .ndo_set_mac_address = fm10k_set_mac, 981 .ndo_set_mac_address = fm10k_set_mac,
893 .ndo_change_mtu = fm10k_change_mtu, 982 .ndo_change_mtu = fm10k_change_mtu,
983 .ndo_tx_timeout = fm10k_tx_timeout,
894 .ndo_vlan_rx_add_vid = fm10k_vlan_rx_add_vid, 984 .ndo_vlan_rx_add_vid = fm10k_vlan_rx_add_vid,
895 .ndo_vlan_rx_kill_vid = fm10k_vlan_rx_kill_vid, 985 .ndo_vlan_rx_kill_vid = fm10k_vlan_rx_kill_vid,
896 .ndo_set_rx_mode = fm10k_set_rx_mode, 986 .ndo_set_rx_mode = fm10k_set_rx_mode,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 7529a8498da9..229c7e491251 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -661,6 +661,9 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface,
661 661
662 /* enable queue */ 662 /* enable queue */
663 fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl); 663 fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl);
664
665 /* place buffers on ring for receive data */
666 fm10k_alloc_rx_buffers(ring, fm10k_desc_unused(ring));
664} 667}
665 668
666/** 669/**