diff options
author | Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> | 2018-03-20 10:58:14 -0400 |
---|---|---|
committer | Jeff Kirsher <jeffrey.t.kirsher@intel.com> | 2018-03-26 14:27:05 -0400 |
commit | 2b245cb29421abbad508e93cdfedf81adc12edf1 (patch) | |
tree | a43188f96548d3bb9c521bbef6cc4ea200b58040 /drivers/net/ethernet/intel/ice/ice_txrx.c | |
parent | cdedef59deb020e78721d820a5692100128c8c73 (diff) |
ice: Implement transmit and NAPI support
This patch implements ice_start_xmit (the handler for ndo_start_xmit) and
related functions. ice_start_xmit ultimately calls ice_tx_map, where the
Tx descriptor is built and posted to the hardware by bumping the ring tail.
This patch also implements ice_napi_poll, which is invoked when there's an
interrupt on the VSI's queues. The interrupt can be due to either a
completed Tx or an Rx event. In case of a completed Tx/Rx event, resources
are reclaimed. Additionally, in case of an Rx event, the skb is fetched
and passed up to the network stack.
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_txrx.c')
-rw-r--r-- | drivers/net/ethernet/intel/ice/ice_txrx.c | 1026 |
1 files changed, 1024 insertions, 2 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 6190ea30ee01..1ccf8e69b85a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
8 | #include "ice.h" | 8 | #include "ice.h" |
9 | 9 | ||
10 | #define ICE_RX_HDR_SIZE 256 | ||
11 | |||
10 | /** | 12 | /** |
11 | * ice_unmap_and_free_tx_buf - Release a Tx buffer | 13 | * ice_unmap_and_free_tx_buf - Release a Tx buffer |
12 | * @ring: the ring that owns the buffer | 14 | * @ring: the ring that owns the buffer |
@@ -93,6 +95,129 @@ void ice_free_tx_ring(struct ice_ring *tx_ring) | |||
93 | } | 95 | } |
94 | 96 | ||
95 | /** | 97 | /** |
98 | * ice_clean_tx_irq - Reclaim resources after transmit completes | ||
99 | * @vsi: the VSI we care about | ||
100 | * @tx_ring: Tx ring to clean | ||
101 | * @napi_budget: Used to determine if we are in netpoll | ||
102 | * | ||
103 | * Returns true if there's any budget left (e.g. the clean is finished) | ||
104 | */ | ||
105 | static bool ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, | ||
106 | int napi_budget) | ||
107 | { | ||
108 | unsigned int total_bytes = 0, total_pkts = 0; | ||
109 | unsigned int budget = vsi->work_lmt; | ||
110 | s16 i = tx_ring->next_to_clean; | ||
111 | struct ice_tx_desc *tx_desc; | ||
112 | struct ice_tx_buf *tx_buf; | ||
113 | |||
114 | tx_buf = &tx_ring->tx_buf[i]; | ||
115 | tx_desc = ICE_TX_DESC(tx_ring, i); | ||
116 | i -= tx_ring->count; | ||
117 | |||
118 | do { | ||
119 | struct ice_tx_desc *eop_desc = tx_buf->next_to_watch; | ||
120 | |||
121 | /* if next_to_watch is not set then there is no work pending */ | ||
122 | if (!eop_desc) | ||
123 | break; | ||
124 | |||
125 | smp_rmb(); /* prevent any other reads prior to eop_desc */ | ||
126 | |||
127 | /* if the descriptor isn't done, no work yet to do */ | ||
128 | if (!(eop_desc->cmd_type_offset_bsz & | ||
129 | cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) | ||
130 | break; | ||
131 | |||
132 | /* clear next_to_watch to prevent false hangs */ | ||
133 | tx_buf->next_to_watch = NULL; | ||
134 | |||
135 | /* update the statistics for this packet */ | ||
136 | total_bytes += tx_buf->bytecount; | ||
137 | total_pkts += tx_buf->gso_segs; | ||
138 | |||
139 | /* free the skb */ | ||
140 | napi_consume_skb(tx_buf->skb, napi_budget); | ||
141 | |||
142 | /* unmap skb header data */ | ||
143 | dma_unmap_single(tx_ring->dev, | ||
144 | dma_unmap_addr(tx_buf, dma), | ||
145 | dma_unmap_len(tx_buf, len), | ||
146 | DMA_TO_DEVICE); | ||
147 | |||
148 | /* clear tx_buf data */ | ||
149 | tx_buf->skb = NULL; | ||
150 | dma_unmap_len_set(tx_buf, len, 0); | ||
151 | |||
152 | /* unmap remaining buffers */ | ||
153 | while (tx_desc != eop_desc) { | ||
154 | tx_buf++; | ||
155 | tx_desc++; | ||
156 | i++; | ||
157 | if (unlikely(!i)) { | ||
158 | i -= tx_ring->count; | ||
159 | tx_buf = tx_ring->tx_buf; | ||
160 | tx_desc = ICE_TX_DESC(tx_ring, 0); | ||
161 | } | ||
162 | |||
163 | /* unmap any remaining paged data */ | ||
164 | if (dma_unmap_len(tx_buf, len)) { | ||
165 | dma_unmap_page(tx_ring->dev, | ||
166 | dma_unmap_addr(tx_buf, dma), | ||
167 | dma_unmap_len(tx_buf, len), | ||
168 | DMA_TO_DEVICE); | ||
169 | dma_unmap_len_set(tx_buf, len, 0); | ||
170 | } | ||
171 | } | ||
172 | |||
173 | /* move us one more past the eop_desc for start of next pkt */ | ||
174 | tx_buf++; | ||
175 | tx_desc++; | ||
176 | i++; | ||
177 | if (unlikely(!i)) { | ||
178 | i -= tx_ring->count; | ||
179 | tx_buf = tx_ring->tx_buf; | ||
180 | tx_desc = ICE_TX_DESC(tx_ring, 0); | ||
181 | } | ||
182 | |||
183 | prefetch(tx_desc); | ||
184 | |||
185 | /* update budget accounting */ | ||
186 | budget--; | ||
187 | } while (likely(budget)); | ||
188 | |||
189 | i += tx_ring->count; | ||
190 | tx_ring->next_to_clean = i; | ||
191 | u64_stats_update_begin(&tx_ring->syncp); | ||
192 | tx_ring->stats.bytes += total_bytes; | ||
193 | tx_ring->stats.pkts += total_pkts; | ||
194 | u64_stats_update_end(&tx_ring->syncp); | ||
195 | tx_ring->q_vector->tx.total_bytes += total_bytes; | ||
196 | tx_ring->q_vector->tx.total_pkts += total_pkts; | ||
197 | |||
198 | netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, | ||
199 | total_bytes); | ||
200 | |||
201 | #define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) | ||
202 | if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) && | ||
203 | (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { | ||
204 | /* Make sure that anybody stopping the queue after this | ||
205 | * sees the new next_to_clean. | ||
206 | */ | ||
207 | smp_mb(); | ||
208 | if (__netif_subqueue_stopped(tx_ring->netdev, | ||
209 | tx_ring->q_index) && | ||
210 | !test_bit(__ICE_DOWN, vsi->state)) { | ||
211 | netif_wake_subqueue(tx_ring->netdev, | ||
212 | tx_ring->q_index); | ||
213 | ++tx_ring->tx_stats.restart_q; | ||
214 | } | ||
215 | } | ||
216 | |||
217 | return !!budget; | ||
218 | } | ||
219 | |||
220 | /** | ||
96 | * ice_setup_tx_ring - Allocate the Tx descriptors | 221 | * ice_setup_tx_ring - Allocate the Tx descriptors |
97 | * @tx_ring: the tx ring to set up | 222 | * @tx_ring: the tx ring to set up |
98 | * | 223 | * |
@@ -274,13 +399,17 @@ static bool ice_alloc_mapped_page(struct ice_ring *rx_ring, | |||
274 | dma_addr_t dma; | 399 | dma_addr_t dma; |
275 | 400 | ||
276 | /* since we are recycling buffers we should seldom need to alloc */ | 401 | /* since we are recycling buffers we should seldom need to alloc */ |
277 | if (likely(page)) | 402 | if (likely(page)) { |
403 | rx_ring->rx_stats.page_reuse_count++; | ||
278 | return true; | 404 | return true; |
405 | } | ||
279 | 406 | ||
280 | /* alloc new page for storage */ | 407 | /* alloc new page for storage */ |
281 | page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); | 408 | page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); |
282 | if (unlikely(!page)) | 409 | if (unlikely(!page)) { |
410 | rx_ring->rx_stats.alloc_page_failed++; | ||
283 | return false; | 411 | return false; |
412 | } | ||
284 | 413 | ||
285 | /* map page for use */ | 414 | /* map page for use */ |
286 | dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); | 415 | dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); |
@@ -290,6 +419,7 @@ static bool ice_alloc_mapped_page(struct ice_ring *rx_ring, | |||
290 | */ | 419 | */ |
291 | if (dma_mapping_error(rx_ring->dev, dma)) { | 420 | if (dma_mapping_error(rx_ring->dev, dma)) { |
292 | __free_pages(page, 0); | 421 | __free_pages(page, 0); |
422 | rx_ring->rx_stats.alloc_page_failed++; | ||
293 | return false; | 423 | return false; |
294 | } | 424 | } |
295 | 425 | ||
@@ -359,3 +489,895 @@ no_bufs: | |||
359 | */ | 489 | */ |
360 | return true; | 490 | return true; |
361 | } | 491 | } |
492 | |||
493 | /** | ||
494 | * ice_page_is_reserved - check if reuse is possible | ||
495 | * @page: page struct to check | ||
496 | */ | ||
497 | static bool ice_page_is_reserved(struct page *page) | ||
498 | { | ||
499 | return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); | ||
500 | } | ||
501 | |||
502 | /** | ||
503 | * ice_add_rx_frag - Add contents of Rx buffer to sk_buff | ||
504 | * @rx_buf: buffer containing page to add | ||
505 | * @rx_desc: descriptor containing length of buffer written by hardware | ||
506 | * @skb: sk_buf to place the data into | ||
507 | * | ||
508 | * This function will add the data contained in rx_buf->page to the skb. | ||
509 | * This is done either through a direct copy if the data in the buffer is | ||
510 | * less than the skb header size, otherwise it will just attach the page as | ||
511 | * a frag to the skb. | ||
512 | * | ||
513 | * The function will then update the page offset if necessary and return | ||
514 | * true if the buffer can be reused by the adapter. | ||
515 | */ | ||
516 | static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf, | ||
517 | union ice_32b_rx_flex_desc *rx_desc, | ||
518 | struct sk_buff *skb) | ||
519 | { | ||
520 | #if (PAGE_SIZE < 8192) | ||
521 | unsigned int truesize = ICE_RXBUF_2048; | ||
522 | #else | ||
523 | unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048; | ||
524 | unsigned int truesize; | ||
525 | #endif /* PAGE_SIZE < 8192) */ | ||
526 | |||
527 | struct page *page; | ||
528 | unsigned int size; | ||
529 | |||
530 | size = le16_to_cpu(rx_desc->wb.pkt_len) & | ||
531 | ICE_RX_FLX_DESC_PKT_LEN_M; | ||
532 | |||
533 | page = rx_buf->page; | ||
534 | |||
535 | #if (PAGE_SIZE >= 8192) | ||
536 | truesize = ALIGN(size, L1_CACHE_BYTES); | ||
537 | #endif /* PAGE_SIZE >= 8192) */ | ||
538 | |||
539 | /* will the data fit in the skb we allocated? if so, just | ||
540 | * copy it as it is pretty small anyway | ||
541 | */ | ||
542 | if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) { | ||
543 | unsigned char *va = page_address(page) + rx_buf->page_offset; | ||
544 | |||
545 | memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); | ||
546 | |||
547 | /* page is not reserved, we can reuse buffer as-is */ | ||
548 | if (likely(!ice_page_is_reserved(page))) | ||
549 | return true; | ||
550 | |||
551 | /* this page cannot be reused so discard it */ | ||
552 | __free_pages(page, 0); | ||
553 | return false; | ||
554 | } | ||
555 | |||
556 | skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, | ||
557 | rx_buf->page_offset, size, truesize); | ||
558 | |||
559 | /* avoid re-using remote pages */ | ||
560 | if (unlikely(ice_page_is_reserved(page))) | ||
561 | return false; | ||
562 | |||
563 | #if (PAGE_SIZE < 8192) | ||
564 | /* if we are only owner of page we can reuse it */ | ||
565 | if (unlikely(page_count(page) != 1)) | ||
566 | return false; | ||
567 | |||
568 | /* flip page offset to other buffer */ | ||
569 | rx_buf->page_offset ^= truesize; | ||
570 | #else | ||
571 | /* move offset up to the next cache line */ | ||
572 | rx_buf->page_offset += truesize; | ||
573 | |||
574 | if (rx_buf->page_offset > last_offset) | ||
575 | return false; | ||
576 | #endif /* PAGE_SIZE < 8192) */ | ||
577 | |||
578 | /* Even if we own the page, we are not allowed to use atomic_set() | ||
579 | * This would break get_page_unless_zero() users. | ||
580 | */ | ||
581 | get_page(rx_buf->page); | ||
582 | |||
583 | return true; | ||
584 | } | ||
585 | |||
586 | /** | ||
587 | * ice_reuse_rx_page - page flip buffer and store it back on the ring | ||
588 | * @rx_ring: rx descriptor ring to store buffers on | ||
589 | * @old_buf: donor buffer to have page reused | ||
590 | * | ||
591 | * Synchronizes page for reuse by the adapter | ||
592 | */ | ||
593 | static void ice_reuse_rx_page(struct ice_ring *rx_ring, | ||
594 | struct ice_rx_buf *old_buf) | ||
595 | { | ||
596 | u16 nta = rx_ring->next_to_alloc; | ||
597 | struct ice_rx_buf *new_buf; | ||
598 | |||
599 | new_buf = &rx_ring->rx_buf[nta]; | ||
600 | |||
601 | /* update, and store next to alloc */ | ||
602 | nta++; | ||
603 | rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; | ||
604 | |||
605 | /* transfer page from old buffer to new buffer */ | ||
606 | *new_buf = *old_buf; | ||
607 | } | ||
608 | |||
609 | /** | ||
610 | * ice_fetch_rx_buf - Allocate skb and populate it | ||
611 | * @rx_ring: rx descriptor ring to transact packets on | ||
612 | * @rx_desc: descriptor containing info written by hardware | ||
613 | * | ||
614 | * This function allocates an skb on the fly, and populates it with the page | ||
615 | * data from the current receive descriptor, taking care to set up the skb | ||
616 | * correctly, as well as handling calling the page recycle function if | ||
617 | * necessary. | ||
618 | */ | ||
619 | static struct sk_buff *ice_fetch_rx_buf(struct ice_ring *rx_ring, | ||
620 | union ice_32b_rx_flex_desc *rx_desc) | ||
621 | { | ||
622 | struct ice_rx_buf *rx_buf; | ||
623 | struct sk_buff *skb; | ||
624 | struct page *page; | ||
625 | |||
626 | rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; | ||
627 | page = rx_buf->page; | ||
628 | prefetchw(page); | ||
629 | |||
630 | skb = rx_buf->skb; | ||
631 | |||
632 | if (likely(!skb)) { | ||
633 | u8 *page_addr = page_address(page) + rx_buf->page_offset; | ||
634 | |||
635 | /* prefetch first cache line of first page */ | ||
636 | prefetch(page_addr); | ||
637 | #if L1_CACHE_BYTES < 128 | ||
638 | prefetch((void *)(page_addr + L1_CACHE_BYTES)); | ||
639 | #endif /* L1_CACHE_BYTES */ | ||
640 | |||
641 | /* allocate a skb to store the frags */ | ||
642 | skb = __napi_alloc_skb(&rx_ring->q_vector->napi, | ||
643 | ICE_RX_HDR_SIZE, | ||
644 | GFP_ATOMIC | __GFP_NOWARN); | ||
645 | if (unlikely(!skb)) { | ||
646 | rx_ring->rx_stats.alloc_buf_failed++; | ||
647 | return NULL; | ||
648 | } | ||
649 | |||
650 | /* we will be copying header into skb->data in | ||
651 | * pskb_may_pull so it is in our interest to prefetch | ||
652 | * it now to avoid a possible cache miss | ||
653 | */ | ||
654 | prefetchw(skb->data); | ||
655 | |||
656 | skb_record_rx_queue(skb, rx_ring->q_index); | ||
657 | } else { | ||
658 | /* we are reusing so sync this buffer for CPU use */ | ||
659 | dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, | ||
660 | rx_buf->page_offset, | ||
661 | ICE_RXBUF_2048, | ||
662 | DMA_FROM_DEVICE); | ||
663 | |||
664 | rx_buf->skb = NULL; | ||
665 | } | ||
666 | |||
667 | /* pull page into skb */ | ||
668 | if (ice_add_rx_frag(rx_buf, rx_desc, skb)) { | ||
669 | /* hand second half of page back to the ring */ | ||
670 | ice_reuse_rx_page(rx_ring, rx_buf); | ||
671 | rx_ring->rx_stats.page_reuse_count++; | ||
672 | } else { | ||
673 | /* we are not reusing the buffer so unmap it */ | ||
674 | dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE, | ||
675 | DMA_FROM_DEVICE); | ||
676 | } | ||
677 | |||
678 | /* clear contents of buffer_info */ | ||
679 | rx_buf->page = NULL; | ||
680 | |||
681 | return skb; | ||
682 | } | ||
683 | |||
684 | /** | ||
685 | * ice_pull_tail - ice specific version of skb_pull_tail | ||
686 | * @skb: pointer to current skb being adjusted | ||
687 | * | ||
688 | * This function is an ice specific version of __pskb_pull_tail. The | ||
689 | * main difference between this version and the original function is that | ||
690 | * this function can make several assumptions about the state of things | ||
691 | * that allow for significant optimizations versus the standard function. | ||
692 | * As a result we can do things like drop a frag and maintain an accurate | ||
693 | * truesize for the skb. | ||
694 | */ | ||
695 | static void ice_pull_tail(struct sk_buff *skb) | ||
696 | { | ||
697 | struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; | ||
698 | unsigned int pull_len; | ||
699 | unsigned char *va; | ||
700 | |||
701 | /* it is valid to use page_address instead of kmap since we are | ||
702 | * working with pages allocated out of the lomem pool per | ||
703 | * alloc_page(GFP_ATOMIC) | ||
704 | */ | ||
705 | va = skb_frag_address(frag); | ||
706 | |||
707 | /* we need the header to contain the greater of either ETH_HLEN or | ||
708 | * 60 bytes if the skb->len is less than 60 for skb_pad. | ||
709 | */ | ||
710 | pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE); | ||
711 | |||
712 | /* align pull length to size of long to optimize memcpy performance */ | ||
713 | skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); | ||
714 | |||
715 | /* update all of the pointers */ | ||
716 | skb_frag_size_sub(frag, pull_len); | ||
717 | frag->page_offset += pull_len; | ||
718 | skb->data_len -= pull_len; | ||
719 | skb->tail += pull_len; | ||
720 | } | ||
721 | |||
722 | /** | ||
723 | * ice_cleanup_headers - Correct empty headers | ||
724 | * @skb: pointer to current skb being fixed | ||
725 | * | ||
726 | * Also address the case where we are pulling data in on pages only | ||
727 | * and as such no data is present in the skb header. | ||
728 | * | ||
729 | * In addition if skb is not at least 60 bytes we need to pad it so that | ||
730 | * it is large enough to qualify as a valid Ethernet frame. | ||
731 | * | ||
732 | * Returns true if an error was encountered and skb was freed. | ||
733 | */ | ||
734 | static bool ice_cleanup_headers(struct sk_buff *skb) | ||
735 | { | ||
736 | /* place header in linear portion of buffer */ | ||
737 | if (skb_is_nonlinear(skb)) | ||
738 | ice_pull_tail(skb); | ||
739 | |||
740 | /* if eth_skb_pad returns an error the skb was freed */ | ||
741 | if (eth_skb_pad(skb)) | ||
742 | return true; | ||
743 | |||
744 | return false; | ||
745 | } | ||
746 | |||
747 | /** | ||
748 | * ice_test_staterr - tests bits in Rx descriptor status and error fields | ||
749 | * @rx_desc: pointer to receive descriptor (in le64 format) | ||
750 | * @stat_err_bits: value to mask | ||
751 | * | ||
752 | * This function does some fast chicanery in order to return the | ||
753 | * value of the mask which is really only used for boolean tests. | ||
754 | * The status_error_len doesn't need to be shifted because it begins | ||
755 | * at offset zero. | ||
756 | */ | ||
757 | static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, | ||
758 | const u16 stat_err_bits) | ||
759 | { | ||
760 | return !!(rx_desc->wb.status_error0 & | ||
761 | cpu_to_le16(stat_err_bits)); | ||
762 | } | ||
763 | |||
764 | /** | ||
765 | * ice_is_non_eop - process handling of non-EOP buffers | ||
766 | * @rx_ring: Rx ring being processed | ||
767 | * @rx_desc: Rx descriptor for current buffer | ||
768 | * @skb: Current socket buffer containing buffer in progress | ||
769 | * | ||
770 | * This function updates next to clean. If the buffer is an EOP buffer | ||
771 | * this function exits returning false, otherwise it will place the | ||
772 | * sk_buff in the next buffer to be chained and return true indicating | ||
773 | * that this is in fact a non-EOP buffer. | ||
774 | */ | ||
775 | static bool ice_is_non_eop(struct ice_ring *rx_ring, | ||
776 | union ice_32b_rx_flex_desc *rx_desc, | ||
777 | struct sk_buff *skb) | ||
778 | { | ||
779 | u32 ntc = rx_ring->next_to_clean + 1; | ||
780 | |||
781 | /* fetch, update, and store next to clean */ | ||
782 | ntc = (ntc < rx_ring->count) ? ntc : 0; | ||
783 | rx_ring->next_to_clean = ntc; | ||
784 | |||
785 | prefetch(ICE_RX_DESC(rx_ring, ntc)); | ||
786 | |||
787 | /* if we are the last buffer then there is nothing else to do */ | ||
788 | #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) | ||
789 | if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF))) | ||
790 | return false; | ||
791 | |||
792 | /* place skb in next buffer to be received */ | ||
793 | rx_ring->rx_buf[ntc].skb = skb; | ||
794 | rx_ring->rx_stats.non_eop_descs++; | ||
795 | |||
796 | return true; | ||
797 | } | ||
798 | |||
799 | /** | ||
800 | * ice_receive_skb - Send a completed packet up the stack | ||
801 | * @rx_ring: rx ring in play | ||
802 | * @skb: packet to send up | ||
803 | * @vlan_tag: vlan tag for packet | ||
804 | * | ||
805 | * This function sends the completed packet (via. skb) up the stack using | ||
806 | * gro receive functions (with/without vlan tag) | ||
807 | */ | ||
808 | static void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, | ||
809 | u16 vlan_tag) | ||
810 | { | ||
811 | if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && | ||
812 | (vlan_tag & VLAN_VID_MASK)) { | ||
813 | __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); | ||
814 | } | ||
815 | napi_gro_receive(&rx_ring->q_vector->napi, skb); | ||
816 | } | ||
817 | |||
818 | /** | ||
819 | * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf | ||
820 | * @rx_ring: rx descriptor ring to transact packets on | ||
821 | * @budget: Total limit on number of packets to process | ||
822 | * | ||
823 | * This function provides a "bounce buffer" approach to Rx interrupt | ||
824 | * processing. The advantage to this is that on systems that have | ||
825 | * expensive overhead for IOMMU access this provides a means of avoiding | ||
826 | * it by maintaining the mapping of the page to the system. | ||
827 | * | ||
828 | * Returns amount of work completed | ||
829 | */ | ||
830 | static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) | ||
831 | { | ||
832 | unsigned int total_rx_bytes = 0, total_rx_pkts = 0; | ||
833 | u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); | ||
834 | bool failure = false; | ||
835 | |||
836 | /* start the loop to process RX packets bounded by 'budget' */ | ||
837 | while (likely(total_rx_pkts < (unsigned int)budget)) { | ||
838 | union ice_32b_rx_flex_desc *rx_desc; | ||
839 | struct sk_buff *skb; | ||
840 | u16 stat_err_bits; | ||
841 | u16 vlan_tag = 0; | ||
842 | |||
843 | /* return some buffers to hardware, one at a time is too slow */ | ||
844 | if (cleaned_count >= ICE_RX_BUF_WRITE) { | ||
845 | failure = failure || | ||
846 | ice_alloc_rx_bufs(rx_ring, cleaned_count); | ||
847 | cleaned_count = 0; | ||
848 | } | ||
849 | |||
850 | /* get the RX desc from RX ring based on 'next_to_clean' */ | ||
851 | rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); | ||
852 | |||
853 | /* status_error_len will always be zero for unused descriptors | ||
854 | * because it's cleared in cleanup, and overlaps with hdr_addr | ||
855 | * which is always zero because packet split isn't used, if the | ||
856 | * hardware wrote DD then it will be non-zero | ||
857 | */ | ||
858 | stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S); | ||
859 | if (!ice_test_staterr(rx_desc, stat_err_bits)) | ||
860 | break; | ||
861 | |||
862 | /* This memory barrier is needed to keep us from reading | ||
863 | * any other fields out of the rx_desc until we know the | ||
864 | * DD bit is set. | ||
865 | */ | ||
866 | dma_rmb(); | ||
867 | |||
868 | /* allocate (if needed) and populate skb */ | ||
869 | skb = ice_fetch_rx_buf(rx_ring, rx_desc); | ||
870 | if (!skb) | ||
871 | break; | ||
872 | |||
873 | cleaned_count++; | ||
874 | |||
875 | /* skip if it is NOP desc */ | ||
876 | if (ice_is_non_eop(rx_ring, rx_desc, skb)) | ||
877 | continue; | ||
878 | |||
879 | stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); | ||
880 | if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) { | ||
881 | dev_kfree_skb_any(skb); | ||
882 | continue; | ||
883 | } | ||
884 | |||
885 | stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); | ||
886 | if (ice_test_staterr(rx_desc, stat_err_bits)) | ||
887 | vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); | ||
888 | |||
889 | /* correct empty headers and pad skb if needed (to make valid | ||
890 | * ethernet frame | ||
891 | */ | ||
892 | if (ice_cleanup_headers(skb)) { | ||
893 | skb = NULL; | ||
894 | continue; | ||
895 | } | ||
896 | |||
897 | /* probably a little skewed due to removing CRC */ | ||
898 | total_rx_bytes += skb->len; | ||
899 | |||
900 | /* send completed skb up the stack */ | ||
901 | ice_receive_skb(rx_ring, skb, vlan_tag); | ||
902 | |||
903 | /* update budget accounting */ | ||
904 | total_rx_pkts++; | ||
905 | } | ||
906 | |||
907 | /* update queue and vector specific stats */ | ||
908 | u64_stats_update_begin(&rx_ring->syncp); | ||
909 | rx_ring->stats.pkts += total_rx_pkts; | ||
910 | rx_ring->stats.bytes += total_rx_bytes; | ||
911 | u64_stats_update_end(&rx_ring->syncp); | ||
912 | rx_ring->q_vector->rx.total_pkts += total_rx_pkts; | ||
913 | rx_ring->q_vector->rx.total_bytes += total_rx_bytes; | ||
914 | |||
915 | /* guarantee a trip back through this routine if there was a failure */ | ||
916 | return failure ? budget : (int)total_rx_pkts; | ||
917 | } | ||
918 | |||
919 | /** | ||
920 | * ice_napi_poll - NAPI polling Rx/Tx cleanup routine | ||
921 | * @napi: napi struct with our devices info in it | ||
922 | * @budget: amount of work driver is allowed to do this pass, in packets | ||
923 | * | ||
924 | * This function will clean all queues associated with a q_vector. | ||
925 | * | ||
926 | * Returns the amount of work done | ||
927 | */ | ||
928 | int ice_napi_poll(struct napi_struct *napi, int budget) | ||
929 | { | ||
930 | struct ice_q_vector *q_vector = | ||
931 | container_of(napi, struct ice_q_vector, napi); | ||
932 | struct ice_vsi *vsi = q_vector->vsi; | ||
933 | struct ice_pf *pf = vsi->back; | ||
934 | bool clean_complete = true; | ||
935 | int budget_per_ring = 0; | ||
936 | struct ice_ring *ring; | ||
937 | int work_done = 0; | ||
938 | |||
939 | /* Since the actual Tx work is minimal, we can give the Tx a larger | ||
940 | * budget and be more aggressive about cleaning up the Tx descriptors. | ||
941 | */ | ||
942 | ice_for_each_ring(ring, q_vector->tx) | ||
943 | if (!ice_clean_tx_irq(vsi, ring, budget)) | ||
944 | clean_complete = false; | ||
945 | |||
946 | /* Handle case where we are called by netpoll with a budget of 0 */ | ||
947 | if (budget <= 0) | ||
948 | return budget; | ||
949 | |||
950 | /* We attempt to distribute budget to each Rx queue fairly, but don't | ||
951 | * allow the budget to go below 1 because that would exit polling early. | ||
952 | */ | ||
953 | if (q_vector->num_ring_rx) | ||
954 | budget_per_ring = max(budget / q_vector->num_ring_rx, 1); | ||
955 | |||
956 | ice_for_each_ring(ring, q_vector->rx) { | ||
957 | int cleaned; | ||
958 | |||
959 | cleaned = ice_clean_rx_irq(ring, budget_per_ring); | ||
960 | work_done += cleaned; | ||
961 | /* if we clean as many as budgeted, we must not be done */ | ||
962 | if (cleaned >= budget_per_ring) | ||
963 | clean_complete = false; | ||
964 | } | ||
965 | |||
966 | /* If work not completed, return budget and polling will return */ | ||
967 | if (!clean_complete) | ||
968 | return budget; | ||
969 | |||
970 | /* Work is done so exit the polling mode and re-enable the interrupt */ | ||
971 | napi_complete_done(napi, work_done); | ||
972 | if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) | ||
973 | ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector); | ||
974 | return 0; | ||
975 | } | ||
976 | |||
977 | /* helper function for building cmd/type/offset */ | ||
978 | static __le64 | ||
979 | build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) | ||
980 | { | ||
981 | return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA | | ||
982 | (td_cmd << ICE_TXD_QW1_CMD_S) | | ||
983 | (td_offset << ICE_TXD_QW1_OFFSET_S) | | ||
984 | ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) | | ||
985 | (td_tag << ICE_TXD_QW1_L2TAG1_S)); | ||
986 | } | ||
987 | |||
988 | /** | ||
989 | * __ice_maybe_stop_tx - 2nd level check for tx stop conditions | ||
990 | * @tx_ring: the ring to be checked | ||
991 | * @size: the size buffer we want to assure is available | ||
992 | * | ||
993 | * Returns -EBUSY if a stop is needed, else 0 | ||
994 | */ | ||
995 | static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) | ||
996 | { | ||
997 | netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index); | ||
998 | /* Memory barrier before checking head and tail */ | ||
999 | smp_mb(); | ||
1000 | |||
1001 | /* Check again in a case another CPU has just made room available. */ | ||
1002 | if (likely(ICE_DESC_UNUSED(tx_ring) < size)) | ||
1003 | return -EBUSY; | ||
1004 | |||
1005 | /* A reprieve! - use start_subqueue because it doesn't call schedule */ | ||
1006 | netif_start_subqueue(tx_ring->netdev, tx_ring->q_index); | ||
1007 | ++tx_ring->tx_stats.restart_q; | ||
1008 | return 0; | ||
1009 | } | ||
1010 | |||
1011 | /** | ||
1012 | * ice_maybe_stop_tx - 1st level check for tx stop conditions | ||
1013 | * @tx_ring: the ring to be checked | ||
1014 | * @size: the size buffer we want to assure is available | ||
1015 | * | ||
1016 | * Returns 0 if stop is not needed | ||
1017 | */ | ||
1018 | static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) | ||
1019 | { | ||
1020 | if (likely(ICE_DESC_UNUSED(tx_ring) >= size)) | ||
1021 | return 0; | ||
1022 | return __ice_maybe_stop_tx(tx_ring, size); | ||
1023 | } | ||
1024 | |||
1025 | /** | ||
1026 | * ice_tx_map - Build the Tx descriptor | ||
1027 | * @tx_ring: ring to send buffer on | ||
1028 | * @first: first buffer info buffer to use | ||
1029 | * | ||
1030 | * This function loops over the skb data pointed to by *first | ||
1031 | * and gets a physical address for each memory location and programs | ||
1032 | * it and the length into the transmit descriptor. | ||
1033 | */ | ||
1034 | static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first) | ||
1035 | { | ||
1036 | u64 td_offset = 0, td_tag = 0, td_cmd = 0; | ||
1037 | u16 i = tx_ring->next_to_use; | ||
1038 | struct skb_frag_struct *frag; | ||
1039 | unsigned int data_len, size; | ||
1040 | struct ice_tx_desc *tx_desc; | ||
1041 | struct ice_tx_buf *tx_buf; | ||
1042 | struct sk_buff *skb; | ||
1043 | dma_addr_t dma; | ||
1044 | |||
1045 | skb = first->skb; | ||
1046 | |||
1047 | data_len = skb->data_len; | ||
1048 | size = skb_headlen(skb); | ||
1049 | |||
1050 | tx_desc = ICE_TX_DESC(tx_ring, i); | ||
1051 | |||
1052 | dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); | ||
1053 | |||
1054 | tx_buf = first; | ||
1055 | |||
1056 | for (frag = &skb_shinfo(skb)->frags[0];; frag++) { | ||
1057 | unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED; | ||
1058 | |||
1059 | if (dma_mapping_error(tx_ring->dev, dma)) | ||
1060 | goto dma_error; | ||
1061 | |||
1062 | /* record length, and DMA address */ | ||
1063 | dma_unmap_len_set(tx_buf, len, size); | ||
1064 | dma_unmap_addr_set(tx_buf, dma, dma); | ||
1065 | |||
1066 | /* align size to end of page */ | ||
1067 | max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1); | ||
1068 | tx_desc->buf_addr = cpu_to_le64(dma); | ||
1069 | |||
1070 | /* account for data chunks larger than the hardware | ||
1071 | * can handle | ||
1072 | */ | ||
1073 | while (unlikely(size > ICE_MAX_DATA_PER_TXD)) { | ||
1074 | tx_desc->cmd_type_offset_bsz = | ||
1075 | build_ctob(td_cmd, td_offset, max_data, td_tag); | ||
1076 | |||
1077 | tx_desc++; | ||
1078 | i++; | ||
1079 | |||
1080 | if (i == tx_ring->count) { | ||
1081 | tx_desc = ICE_TX_DESC(tx_ring, 0); | ||
1082 | i = 0; | ||
1083 | } | ||
1084 | |||
1085 | dma += max_data; | ||
1086 | size -= max_data; | ||
1087 | |||
1088 | max_data = ICE_MAX_DATA_PER_TXD_ALIGNED; | ||
1089 | tx_desc->buf_addr = cpu_to_le64(dma); | ||
1090 | } | ||
1091 | |||
1092 | if (likely(!data_len)) | ||
1093 | break; | ||
1094 | |||
1095 | tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, | ||
1096 | size, td_tag); | ||
1097 | |||
1098 | tx_desc++; | ||
1099 | i++; | ||
1100 | |||
1101 | if (i == tx_ring->count) { | ||
1102 | tx_desc = ICE_TX_DESC(tx_ring, 0); | ||
1103 | i = 0; | ||
1104 | } | ||
1105 | |||
1106 | size = skb_frag_size(frag); | ||
1107 | data_len -= size; | ||
1108 | |||
1109 | dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, | ||
1110 | DMA_TO_DEVICE); | ||
1111 | |||
1112 | tx_buf = &tx_ring->tx_buf[i]; | ||
1113 | } | ||
1114 | |||
1115 | /* record bytecount for BQL */ | ||
1116 | netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); | ||
1117 | |||
1118 | /* record SW timestamp if HW timestamp is not available */ | ||
1119 | skb_tx_timestamp(first->skb); | ||
1120 | |||
1121 | i++; | ||
1122 | if (i == tx_ring->count) | ||
1123 | i = 0; | ||
1124 | |||
1125 | /* write last descriptor with RS and EOP bits */ | ||
1126 | td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS); | ||
1127 | tx_desc->cmd_type_offset_bsz = | ||
1128 | build_ctob(td_cmd, td_offset, size, td_tag); | ||
1129 | |||
1130 | /* Force memory writes to complete before letting h/w know there | ||
1131 | * are new descriptors to fetch. | ||
1132 | * | ||
1133 | * We also use this memory barrier to make certain all of the | ||
1134 | * status bits have been updated before next_to_watch is written. | ||
1135 | */ | ||
1136 | wmb(); | ||
1137 | |||
1138 | /* set next_to_watch value indicating a packet is present */ | ||
1139 | first->next_to_watch = tx_desc; | ||
1140 | |||
1141 | tx_ring->next_to_use = i; | ||
1142 | |||
1143 | ice_maybe_stop_tx(tx_ring, DESC_NEEDED); | ||
1144 | |||
1145 | /* notify HW of packet */ | ||
1146 | if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { | ||
1147 | writel(i, tx_ring->tail); | ||
1148 | |||
1149 | /* we need this if more than one processor can write to our tail | ||
1150 | * at a time, it synchronizes IO on IA64/Altix systems | ||
1151 | */ | ||
1152 | mmiowb(); | ||
1153 | } | ||
1154 | |||
1155 | return; | ||
1156 | |||
1157 | dma_error: | ||
1158 | /* clear dma mappings for failed tx_buf map */ | ||
1159 | for (;;) { | ||
1160 | tx_buf = &tx_ring->tx_buf[i]; | ||
1161 | ice_unmap_and_free_tx_buf(tx_ring, tx_buf); | ||
1162 | if (tx_buf == first) | ||
1163 | break; | ||
1164 | if (i == 0) | ||
1165 | i = tx_ring->count; | ||
1166 | i--; | ||
1167 | } | ||
1168 | |||
1169 | tx_ring->next_to_use = i; | ||
1170 | } | ||
1171 | |||
1172 | /** | ||
1173 | * ice_txd_use_count - estimate the number of descriptors needed for Tx | ||
1174 | * @size: transmit request size in bytes | ||
1175 | * | ||
1176 | * Due to hardware alignment restrictions (4K alignment), we need to | ||
1177 | * assume that we can have no more than 12K of data per descriptor, even | ||
1178 | * though each descriptor can take up to 16K - 1 bytes of aligned memory. | ||
1179 | * Thus, we need to divide by 12K. But division is slow! Instead, | ||
1180 | * we decompose the operation into shifts and one relatively cheap | ||
1181 | * multiply operation. | ||
1182 | * | ||
1183 | * To divide by 12K, we first divide by 4K, then divide by 3: | ||
1184 | * To divide by 4K, shift right by 12 bits | ||
1185 | * To divide by 3, multiply by 85, then divide by 256 | ||
1186 | * (Divide by 256 is done by shifting right by 8 bits) | ||
1187 | * Finally, we add one to round up. Because 256 isn't an exact multiple of | ||
1188 | * 3, we'll underestimate near each multiple of 12K. This is actually more | ||
1189 | * accurate as we have 4K - 1 of wiggle room that we can fit into the last | ||
1190 | * segment. For our purposes this is accurate out to 1M which is orders of | ||
1191 | * magnitude greater than our largest possible GSO size. | ||
1192 | * | ||
1193 | * This would then be implemented as: | ||
1194 | * return (((size >> 12) * 85) >> 8) + 1; | ||
1195 | * | ||
1196 | * Since multiplication and division are commutative, we can reorder | ||
1197 | * operations into: | ||
1198 | * return ((size * 85) >> 20) + 1; | ||
1199 | */ | ||
1200 | static unsigned int ice_txd_use_count(unsigned int size) | ||
1201 | { | ||
1202 | return ((size * 85) >> 20) + 1; | ||
1203 | } | ||
1204 | |||
1205 | /** | ||
1206 | * ice_xmit_desc_count - calculate number of tx descriptors needed | ||
1207 | * @skb: send buffer | ||
1208 | * | ||
1209 | * Returns number of data descriptors needed for this skb. | ||
1210 | */ | ||
1211 | static unsigned int ice_xmit_desc_count(struct sk_buff *skb) | ||
1212 | { | ||
1213 | const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; | ||
1214 | unsigned int nr_frags = skb_shinfo(skb)->nr_frags; | ||
1215 | unsigned int count = 0, size = skb_headlen(skb); | ||
1216 | |||
1217 | for (;;) { | ||
1218 | count += ice_txd_use_count(size); | ||
1219 | |||
1220 | if (!nr_frags--) | ||
1221 | break; | ||
1222 | |||
1223 | size = skb_frag_size(frag++); | ||
1224 | } | ||
1225 | |||
1226 | return count; | ||
1227 | } | ||
1228 | |||
1229 | /** | ||
1230 | * __ice_chk_linearize - Check if there are more than 8 buffers per packet | ||
1231 | * @skb: send buffer | ||
1232 | * | ||
1233 | * Note: This HW can't DMA more than 8 buffers to build a packet on the wire | ||
1234 | * and so we need to figure out the cases where we need to linearize the skb. | ||
1235 | * | ||
1236 | * For TSO we need to count the TSO header and segment payload separately. | ||
1237 | * As such we need to check cases where we have 7 fragments or more as we | ||
1238 | * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for | ||
1239 | * the segment payload in the first descriptor, and another 7 for the | ||
1240 | * fragments. | ||
1241 | */ | ||
1242 | static bool __ice_chk_linearize(struct sk_buff *skb) | ||
1243 | { | ||
1244 | const struct skb_frag_struct *frag, *stale; | ||
1245 | int nr_frags, sum; | ||
1246 | |||
1247 | /* no need to check if number of frags is less than 7 */ | ||
1248 | nr_frags = skb_shinfo(skb)->nr_frags; | ||
1249 | if (nr_frags < (ICE_MAX_BUF_TXD - 1)) | ||
1250 | return false; | ||
1251 | |||
1252 | /* We need to walk through the list and validate that each group | ||
1253 | * of 6 fragments totals at least gso_size. | ||
1254 | */ | ||
1255 | nr_frags -= ICE_MAX_BUF_TXD - 2; | ||
1256 | frag = &skb_shinfo(skb)->frags[0]; | ||
1257 | |||
1258 | /* Initialize size to the negative value of gso_size minus 1. We | ||
1259 | * use this as the worst case scenerio in which the frag ahead | ||
1260 | * of us only provides one byte which is why we are limited to 6 | ||
1261 | * descriptors for a single transmit as the header and previous | ||
1262 | * fragment are already consuming 2 descriptors. | ||
1263 | */ | ||
1264 | sum = 1 - skb_shinfo(skb)->gso_size; | ||
1265 | |||
1266 | /* Add size of frags 0 through 4 to create our initial sum */ | ||
1267 | sum += skb_frag_size(frag++); | ||
1268 | sum += skb_frag_size(frag++); | ||
1269 | sum += skb_frag_size(frag++); | ||
1270 | sum += skb_frag_size(frag++); | ||
1271 | sum += skb_frag_size(frag++); | ||
1272 | |||
1273 | /* Walk through fragments adding latest fragment, testing it, and | ||
1274 | * then removing stale fragments from the sum. | ||
1275 | */ | ||
1276 | stale = &skb_shinfo(skb)->frags[0]; | ||
1277 | for (;;) { | ||
1278 | sum += skb_frag_size(frag++); | ||
1279 | |||
1280 | /* if sum is negative we failed to make sufficient progress */ | ||
1281 | if (sum < 0) | ||
1282 | return true; | ||
1283 | |||
1284 | if (!nr_frags--) | ||
1285 | break; | ||
1286 | |||
1287 | sum -= skb_frag_size(stale++); | ||
1288 | } | ||
1289 | |||
1290 | return false; | ||
1291 | } | ||
1292 | |||
1293 | /** | ||
1294 | * ice_chk_linearize - Check if there are more than 8 fragments per packet | ||
1295 | * @skb: send buffer | ||
1296 | * @count: number of buffers used | ||
1297 | * | ||
1298 | * Note: Our HW can't scatter-gather more than 8 fragments to build | ||
1299 | * a packet on the wire and so we need to figure out the cases where we | ||
1300 | * need to linearize the skb. | ||
1301 | */ | ||
1302 | static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count) | ||
1303 | { | ||
1304 | /* Both TSO and single send will work if count is less than 8 */ | ||
1305 | if (likely(count < ICE_MAX_BUF_TXD)) | ||
1306 | return false; | ||
1307 | |||
1308 | if (skb_is_gso(skb)) | ||
1309 | return __ice_chk_linearize(skb); | ||
1310 | |||
1311 | /* we can support up to 8 data buffers for a single send */ | ||
1312 | return count != ICE_MAX_BUF_TXD; | ||
1313 | } | ||
1314 | |||
1315 | /** | ||
1316 | * ice_xmit_frame_ring - Sends buffer on Tx ring | ||
1317 | * @skb: send buffer | ||
1318 | * @tx_ring: ring to send buffer on | ||
1319 | * | ||
1320 | * Returns NETDEV_TX_OK if sent, else an error code | ||
1321 | */ | ||
1322 | static netdev_tx_t | ||
1323 | ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) | ||
1324 | { | ||
1325 | struct ice_tx_buf *first; | ||
1326 | unsigned int count; | ||
1327 | |||
1328 | count = ice_xmit_desc_count(skb); | ||
1329 | if (ice_chk_linearize(skb, count)) { | ||
1330 | if (__skb_linearize(skb)) | ||
1331 | goto out_drop; | ||
1332 | count = ice_txd_use_count(skb->len); | ||
1333 | tx_ring->tx_stats.tx_linearize++; | ||
1334 | } | ||
1335 | |||
1336 | /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD, | ||
1337 | * + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD, | ||
1338 | * + 4 desc gap to avoid the cache line where head is, | ||
1339 | * + 1 desc for context descriptor, | ||
1340 | * otherwise try next time | ||
1341 | */ | ||
1342 | if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) { | ||
1343 | tx_ring->tx_stats.tx_busy++; | ||
1344 | return NETDEV_TX_BUSY; | ||
1345 | } | ||
1346 | |||
1347 | /* record the location of the first descriptor for this packet */ | ||
1348 | first = &tx_ring->tx_buf[tx_ring->next_to_use]; | ||
1349 | first->skb = skb; | ||
1350 | first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); | ||
1351 | first->gso_segs = 1; | ||
1352 | |||
1353 | ice_tx_map(tx_ring, first); | ||
1354 | return NETDEV_TX_OK; | ||
1355 | |||
1356 | out_drop: | ||
1357 | dev_kfree_skb_any(skb); | ||
1358 | return NETDEV_TX_OK; | ||
1359 | } | ||
1360 | |||
1361 | /** | ||
1362 | * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer | ||
1363 | * @skb: send buffer | ||
1364 | * @netdev: network interface device structure | ||
1365 | * | ||
1366 | * Returns NETDEV_TX_OK if sent, else an error code | ||
1367 | */ | ||
1368 | netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev) | ||
1369 | { | ||
1370 | struct ice_netdev_priv *np = netdev_priv(netdev); | ||
1371 | struct ice_vsi *vsi = np->vsi; | ||
1372 | struct ice_ring *tx_ring; | ||
1373 | |||
1374 | tx_ring = vsi->tx_rings[skb->queue_mapping]; | ||
1375 | |||
1376 | /* hardware can't handle really short frames, hardware padding works | ||
1377 | * beyond this point | ||
1378 | */ | ||
1379 | if (skb_put_padto(skb, ICE_MIN_TX_LEN)) | ||
1380 | return NETDEV_TX_OK; | ||
1381 | |||
1382 | return ice_xmit_frame_ring(skb, tx_ring); | ||
1383 | } | ||