diff options
author | Ben Hutchings <bhutchings@solarflare.com> | 2008-05-07 07:51:12 -0400 |
---|---|---|
committer | Jeff Garzik <jgarzik@redhat.com> | 2008-05-13 01:31:40 -0400 |
commit | b9b39b625cf57cd0ea998717598b68963cbec3cb (patch) | |
tree | 19f358d15b6c75d660cf1ea369559f58ad9f0c1e /drivers/net | |
parent | 48cfb14f8b89d4d5b3df6c16f08b258686fb12ad (diff) |
[netdrvr] sfc: Add TSO support
The SFC4000 controller does not have hardware support for TSO, and the
core GSO code incurs a high cost in allocating and freeing skbs. This
TSO implementation uses lightweight packet header structures and is
substantially faster.
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/sfc/efx.c | 4 | ||||
-rw-r--r-- | drivers/net/sfc/ethtool.c | 27 | ||||
-rw-r--r-- | drivers/net/sfc/net_driver.h | 14 | ||||
-rw-r--r-- | drivers/net/sfc/tx.c | 664 |
4 files changed, 708 insertions, 1 deletions
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index 59edcf793c1..418f2e53a95 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c | |||
@@ -1873,6 +1873,7 @@ static int efx_init_struct(struct efx_nic *efx, struct efx_nic_type *type, | |||
1873 | tx_queue->queue = i; | 1873 | tx_queue->queue = i; |
1874 | tx_queue->buffer = NULL; | 1874 | tx_queue->buffer = NULL; |
1875 | tx_queue->channel = &efx->channel[0]; /* for safety */ | 1875 | tx_queue->channel = &efx->channel[0]; /* for safety */ |
1876 | tx_queue->tso_headers_free = NULL; | ||
1876 | } | 1877 | } |
1877 | for (i = 0; i < EFX_MAX_RX_QUEUES; i++) { | 1878 | for (i = 0; i < EFX_MAX_RX_QUEUES; i++) { |
1878 | rx_queue = &efx->rx_queue[i]; | 1879 | rx_queue = &efx->rx_queue[i]; |
@@ -2071,7 +2072,8 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, | |||
2071 | net_dev = alloc_etherdev(sizeof(*efx)); | 2072 | net_dev = alloc_etherdev(sizeof(*efx)); |
2072 | if (!net_dev) | 2073 | if (!net_dev) |
2073 | return -ENOMEM; | 2074 | return -ENOMEM; |
2074 | net_dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA; | 2075 | net_dev->features |= (NETIF_F_IP_CSUM | NETIF_F_SG | |
2076 | NETIF_F_HIGHDMA | NETIF_F_TSO); | ||
2075 | if (lro) | 2077 | if (lro) |
2076 | net_dev->features |= NETIF_F_LRO; | 2078 | net_dev->features |= NETIF_F_LRO; |
2077 | efx = net_dev->priv; | 2079 | efx = net_dev->priv; |
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c index ad541badbd9..b756840e2a1 100644 --- a/drivers/net/sfc/ethtool.c +++ b/drivers/net/sfc/ethtool.c | |||
@@ -272,6 +272,22 @@ static void efx_ethtool_get_stats(struct net_device *net_dev, | |||
272 | } | 272 | } |
273 | } | 273 | } |
274 | 274 | ||
275 | static int efx_ethtool_set_tso(struct net_device *net_dev, u32 enable) | ||
276 | { | ||
277 | int rc; | ||
278 | |||
279 | /* Our TSO requires TX checksumming, so force TX checksumming | ||
280 | * on when TSO is enabled. | ||
281 | */ | ||
282 | if (enable) { | ||
283 | rc = efx_ethtool_set_tx_csum(net_dev, 1); | ||
284 | if (rc) | ||
285 | return rc; | ||
286 | } | ||
287 | |||
288 | return ethtool_op_set_tso(net_dev, enable); | ||
289 | } | ||
290 | |||
275 | static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable) | 291 | static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable) |
276 | { | 292 | { |
277 | struct efx_nic *efx = net_dev->priv; | 293 | struct efx_nic *efx = net_dev->priv; |
@@ -283,6 +299,15 @@ static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable) | |||
283 | 299 | ||
284 | efx_flush_queues(efx); | 300 | efx_flush_queues(efx); |
285 | 301 | ||
302 | /* Our TSO requires TX checksumming, so disable TSO when | ||
303 | * checksumming is disabled | ||
304 | */ | ||
305 | if (!enable) { | ||
306 | rc = efx_ethtool_set_tso(net_dev, 0); | ||
307 | if (rc) | ||
308 | return rc; | ||
309 | } | ||
310 | |||
286 | return 0; | 311 | return 0; |
287 | } | 312 | } |
288 | 313 | ||
@@ -451,6 +476,8 @@ struct ethtool_ops efx_ethtool_ops = { | |||
451 | .set_tx_csum = efx_ethtool_set_tx_csum, | 476 | .set_tx_csum = efx_ethtool_set_tx_csum, |
452 | .get_sg = ethtool_op_get_sg, | 477 | .get_sg = ethtool_op_get_sg, |
453 | .set_sg = ethtool_op_set_sg, | 478 | .set_sg = ethtool_op_set_sg, |
479 | .get_tso = ethtool_op_get_tso, | ||
480 | .set_tso = efx_ethtool_set_tso, | ||
454 | .get_flags = ethtool_op_get_flags, | 481 | .get_flags = ethtool_op_get_flags, |
455 | .set_flags = ethtool_op_set_flags, | 482 | .set_flags = ethtool_op_set_flags, |
456 | .get_strings = efx_ethtool_get_strings, | 483 | .get_strings = efx_ethtool_get_strings, |
diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h index c505482c252..6ffa7116336 100644 --- a/drivers/net/sfc/net_driver.h +++ b/drivers/net/sfc/net_driver.h | |||
@@ -134,6 +134,8 @@ struct efx_special_buffer { | |||
134 | * Set only on the final fragment of a packet; %NULL for all other | 134 | * Set only on the final fragment of a packet; %NULL for all other |
135 | * fragments. When this fragment completes, then we can free this | 135 | * fragments. When this fragment completes, then we can free this |
136 | * skb. | 136 | * skb. |
137 | * @tsoh: The associated TSO header structure, or %NULL if this | ||
138 | * buffer is not a TSO header. | ||
137 | * @dma_addr: DMA address of the fragment. | 139 | * @dma_addr: DMA address of the fragment. |
138 | * @len: Length of this fragment. | 140 | * @len: Length of this fragment. |
139 | * This field is zero when the queue slot is empty. | 141 | * This field is zero when the queue slot is empty. |
@@ -144,6 +146,7 @@ struct efx_special_buffer { | |||
144 | */ | 146 | */ |
145 | struct efx_tx_buffer { | 147 | struct efx_tx_buffer { |
146 | const struct sk_buff *skb; | 148 | const struct sk_buff *skb; |
149 | struct efx_tso_header *tsoh; | ||
147 | dma_addr_t dma_addr; | 150 | dma_addr_t dma_addr; |
148 | unsigned short len; | 151 | unsigned short len; |
149 | unsigned char continuation; | 152 | unsigned char continuation; |
@@ -187,6 +190,13 @@ struct efx_tx_buffer { | |||
187 | * variable indicates that the queue is full. This is to | 190 | * variable indicates that the queue is full. This is to |
188 | * avoid cache-line ping-pong between the xmit path and the | 191 | * avoid cache-line ping-pong between the xmit path and the |
189 | * completion path. | 192 | * completion path. |
193 | * @tso_headers_free: A list of TSO headers allocated for this TX queue | ||
194 | * that are not in use, and so available for new TSO sends. The list | ||
195 | * is protected by the TX queue lock. | ||
196 | * @tso_bursts: Number of times TSO xmit invoked by kernel | ||
197 | * @tso_long_headers: Number of packets with headers too long for standard | ||
198 | * blocks | ||
199 | * @tso_packets: Number of packets via the TSO xmit path | ||
190 | */ | 200 | */ |
191 | struct efx_tx_queue { | 201 | struct efx_tx_queue { |
192 | /* Members which don't change on the fast path */ | 202 | /* Members which don't change on the fast path */ |
@@ -206,6 +216,10 @@ struct efx_tx_queue { | |||
206 | unsigned int insert_count ____cacheline_aligned_in_smp; | 216 | unsigned int insert_count ____cacheline_aligned_in_smp; |
207 | unsigned int write_count; | 217 | unsigned int write_count; |
208 | unsigned int old_read_count; | 218 | unsigned int old_read_count; |
219 | struct efx_tso_header *tso_headers_free; | ||
220 | unsigned int tso_bursts; | ||
221 | unsigned int tso_long_headers; | ||
222 | unsigned int tso_packets; | ||
209 | }; | 223 | }; |
210 | 224 | ||
211 | /** | 225 | /** |
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c index fbb866b2185..9b436f5b488 100644 --- a/drivers/net/sfc/tx.c +++ b/drivers/net/sfc/tx.c | |||
@@ -82,6 +82,46 @@ static inline void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, | |||
82 | } | 82 | } |
83 | } | 83 | } |
84 | 84 | ||
85 | /** | ||
86 | * struct efx_tso_header - a DMA mapped buffer for packet headers | ||
87 | * @next: Linked list of free ones. | ||
88 | * The list is protected by the TX queue lock. | ||
89 | * @dma_unmap_len: Length to unmap for an oversize buffer, or 0. | ||
90 | * @dma_addr: The DMA address of the header below. | ||
91 | * | ||
92 | * This controls the memory used for a TSO header. Use TSOH_DATA() | ||
93 | * to find the packet header data. Use TSOH_SIZE() to calculate the | ||
94 | * total size required for a given packet header length. TSO headers | ||
95 | * in the free list are exactly %TSOH_STD_SIZE bytes in size. | ||
96 | */ | ||
97 | struct efx_tso_header { | ||
98 | union { | ||
99 | struct efx_tso_header *next; | ||
100 | size_t unmap_len; | ||
101 | }; | ||
102 | dma_addr_t dma_addr; | ||
103 | }; | ||
104 | |||
105 | static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, | ||
106 | const struct sk_buff *skb); | ||
107 | static void efx_fini_tso(struct efx_tx_queue *tx_queue); | ||
108 | static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, | ||
109 | struct efx_tso_header *tsoh); | ||
110 | |||
111 | static inline void efx_tsoh_free(struct efx_tx_queue *tx_queue, | ||
112 | struct efx_tx_buffer *buffer) | ||
113 | { | ||
114 | if (buffer->tsoh) { | ||
115 | if (likely(!buffer->tsoh->unmap_len)) { | ||
116 | buffer->tsoh->next = tx_queue->tso_headers_free; | ||
117 | tx_queue->tso_headers_free = buffer->tsoh; | ||
118 | } else { | ||
119 | efx_tsoh_heap_free(tx_queue, buffer->tsoh); | ||
120 | } | ||
121 | buffer->tsoh = NULL; | ||
122 | } | ||
123 | } | ||
124 | |||
85 | 125 | ||
86 | /* | 126 | /* |
87 | * Add a socket buffer to a TX queue | 127 | * Add a socket buffer to a TX queue |
@@ -114,6 +154,9 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue, | |||
114 | 154 | ||
115 | EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); | 155 | EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); |
116 | 156 | ||
157 | if (skb_shinfo((struct sk_buff *)skb)->gso_size) | ||
158 | return efx_enqueue_skb_tso(tx_queue, skb); | ||
159 | |||
117 | /* Get size of the initial fragment */ | 160 | /* Get size of the initial fragment */ |
118 | len = skb_headlen(skb); | 161 | len = skb_headlen(skb); |
119 | 162 | ||
@@ -166,6 +209,8 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue, | |||
166 | insert_ptr = (tx_queue->insert_count & | 209 | insert_ptr = (tx_queue->insert_count & |
167 | efx->type->txd_ring_mask); | 210 | efx->type->txd_ring_mask); |
168 | buffer = &tx_queue->buffer[insert_ptr]; | 211 | buffer = &tx_queue->buffer[insert_ptr]; |
212 | efx_tsoh_free(tx_queue, buffer); | ||
213 | EFX_BUG_ON_PARANOID(buffer->tsoh); | ||
169 | EFX_BUG_ON_PARANOID(buffer->skb); | 214 | EFX_BUG_ON_PARANOID(buffer->skb); |
170 | EFX_BUG_ON_PARANOID(buffer->len); | 215 | EFX_BUG_ON_PARANOID(buffer->len); |
171 | EFX_BUG_ON_PARANOID(buffer->continuation != 1); | 216 | EFX_BUG_ON_PARANOID(buffer->continuation != 1); |
@@ -432,6 +477,9 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) | |||
432 | 477 | ||
433 | efx_release_tx_buffers(tx_queue); | 478 | efx_release_tx_buffers(tx_queue); |
434 | 479 | ||
480 | /* Free up TSO header cache */ | ||
481 | efx_fini_tso(tx_queue); | ||
482 | |||
435 | /* Release queue's stop on port, if any */ | 483 | /* Release queue's stop on port, if any */ |
436 | if (tx_queue->stopped) { | 484 | if (tx_queue->stopped) { |
437 | tx_queue->stopped = 0; | 485 | tx_queue->stopped = 0; |
@@ -450,3 +498,619 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) | |||
450 | } | 498 | } |
451 | 499 | ||
452 | 500 | ||
501 | /* Efx TCP segmentation acceleration. | ||
502 | * | ||
503 | * Why? Because by doing it here in the driver we can go significantly | ||
504 | * faster than the GSO. | ||
505 | * | ||
506 | * Requires TX checksum offload support. | ||
507 | */ | ||
508 | |||
509 | /* Number of bytes inserted at the start of a TSO header buffer, | ||
510 | * similar to NET_IP_ALIGN. | ||
511 | */ | ||
512 | #if defined(__i386__) || defined(__x86_64__) | ||
513 | #define TSOH_OFFSET 0 | ||
514 | #else | ||
515 | #define TSOH_OFFSET NET_IP_ALIGN | ||
516 | #endif | ||
517 | |||
518 | #define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET) | ||
519 | |||
520 | /* Total size of struct efx_tso_header, buffer and padding */ | ||
521 | #define TSOH_SIZE(hdr_len) \ | ||
522 | (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len) | ||
523 | |||
524 | /* Size of blocks on free list. Larger blocks must be allocated from | ||
525 | * the heap. | ||
526 | */ | ||
527 | #define TSOH_STD_SIZE 128 | ||
528 | |||
529 | #define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2)) | ||
530 | #define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data) | ||
531 | #define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data) | ||
532 | #define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data) | ||
533 | |||
534 | /** | ||
535 | * struct tso_state - TSO state for an SKB | ||
536 | * @remaining_len: Bytes of data we've yet to segment | ||
537 | * @seqnum: Current sequence number | ||
538 | * @packet_space: Remaining space in current packet | ||
539 | * @ifc: Input fragment cursor. | ||
540 | * Where we are in the current fragment of the incoming SKB. These | ||
541 | * values get updated in place when we split a fragment over | ||
542 | * multiple packets. | ||
543 | * @p: Parameters. | ||
544 | * These values are set once at the start of the TSO send and do | ||
545 | * not get changed as the routine progresses. | ||
546 | * | ||
547 | * The state used during segmentation. It is put into this data structure | ||
548 | * just to make it easy to pass into inline functions. | ||
549 | */ | ||
550 | struct tso_state { | ||
551 | unsigned remaining_len; | ||
552 | unsigned seqnum; | ||
553 | unsigned packet_space; | ||
554 | |||
555 | struct { | ||
556 | /* DMA address of current position */ | ||
557 | dma_addr_t dma_addr; | ||
558 | /* Remaining length */ | ||
559 | unsigned int len; | ||
560 | /* DMA address and length of the whole fragment */ | ||
561 | unsigned int unmap_len; | ||
562 | dma_addr_t unmap_addr; | ||
563 | struct page *page; | ||
564 | unsigned page_off; | ||
565 | } ifc; | ||
566 | |||
567 | struct { | ||
568 | /* The number of bytes of header */ | ||
569 | unsigned int header_length; | ||
570 | |||
571 | /* The number of bytes to put in each outgoing segment. */ | ||
572 | int full_packet_size; | ||
573 | |||
574 | /* Current IPv4 ID, host endian. */ | ||
575 | unsigned ipv4_id; | ||
576 | } p; | ||
577 | }; | ||
578 | |||
579 | |||
580 | /* | ||
581 | * Verify that our various assumptions about sk_buffs and the conditions | ||
582 | * under which TSO will be attempted hold true. | ||
583 | */ | ||
584 | static inline void efx_tso_check_safe(const struct sk_buff *skb) | ||
585 | { | ||
586 | EFX_BUG_ON_PARANOID(skb->protocol != htons(ETH_P_IP)); | ||
587 | EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto != | ||
588 | skb->protocol); | ||
589 | EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP); | ||
590 | EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data) | ||
591 | + (tcp_hdr(skb)->doff << 2u)) > | ||
592 | skb_headlen(skb)); | ||
593 | } | ||
594 | |||
595 | |||
596 | /* | ||
597 | * Allocate a page worth of efx_tso_header structures, and string them | ||
598 | * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM. | ||
599 | */ | ||
600 | static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue) | ||
601 | { | ||
602 | |||
603 | struct pci_dev *pci_dev = tx_queue->efx->pci_dev; | ||
604 | struct efx_tso_header *tsoh; | ||
605 | dma_addr_t dma_addr; | ||
606 | u8 *base_kva, *kva; | ||
607 | |||
608 | base_kva = pci_alloc_consistent(pci_dev, PAGE_SIZE, &dma_addr); | ||
609 | if (base_kva == NULL) { | ||
610 | EFX_ERR(tx_queue->efx, "Unable to allocate page for TSO" | ||
611 | " headers\n"); | ||
612 | return -ENOMEM; | ||
613 | } | ||
614 | |||
615 | /* pci_alloc_consistent() allocates pages. */ | ||
616 | EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u)); | ||
617 | |||
618 | for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) { | ||
619 | tsoh = (struct efx_tso_header *)kva; | ||
620 | tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva); | ||
621 | tsoh->next = tx_queue->tso_headers_free; | ||
622 | tx_queue->tso_headers_free = tsoh; | ||
623 | } | ||
624 | |||
625 | return 0; | ||
626 | } | ||
627 | |||
628 | |||
629 | /* Free up a TSO header, and all others in the same page. */ | ||
630 | static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue, | ||
631 | struct efx_tso_header *tsoh, | ||
632 | struct pci_dev *pci_dev) | ||
633 | { | ||
634 | struct efx_tso_header **p; | ||
635 | unsigned long base_kva; | ||
636 | dma_addr_t base_dma; | ||
637 | |||
638 | base_kva = (unsigned long)tsoh & PAGE_MASK; | ||
639 | base_dma = tsoh->dma_addr & PAGE_MASK; | ||
640 | |||
641 | p = &tx_queue->tso_headers_free; | ||
642 | while (*p != NULL) | ||
643 | if (((unsigned long)*p & PAGE_MASK) == base_kva) | ||
644 | *p = (*p)->next; | ||
645 | else | ||
646 | p = &(*p)->next; | ||
647 | |||
648 | pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma); | ||
649 | } | ||
650 | |||
651 | static struct efx_tso_header * | ||
652 | efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len) | ||
653 | { | ||
654 | struct efx_tso_header *tsoh; | ||
655 | |||
656 | tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA); | ||
657 | if (unlikely(!tsoh)) | ||
658 | return NULL; | ||
659 | |||
660 | tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev, | ||
661 | TSOH_BUFFER(tsoh), header_len, | ||
662 | PCI_DMA_TODEVICE); | ||
663 | if (unlikely(pci_dma_mapping_error(tsoh->dma_addr))) { | ||
664 | kfree(tsoh); | ||
665 | return NULL; | ||
666 | } | ||
667 | |||
668 | tsoh->unmap_len = header_len; | ||
669 | return tsoh; | ||
670 | } | ||
671 | |||
672 | static void | ||
673 | efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) | ||
674 | { | ||
675 | pci_unmap_single(tx_queue->efx->pci_dev, | ||
676 | tsoh->dma_addr, tsoh->unmap_len, | ||
677 | PCI_DMA_TODEVICE); | ||
678 | kfree(tsoh); | ||
679 | } | ||
680 | |||
681 | /** | ||
682 | * efx_tx_queue_insert - push descriptors onto the TX queue | ||
683 | * @tx_queue: Efx TX queue | ||
684 | * @dma_addr: DMA address of fragment | ||
685 | * @len: Length of fragment | ||
686 | * @skb: Only non-null for end of last segment | ||
687 | * @end_of_packet: True if last fragment in a packet | ||
688 | * @unmap_addr: DMA address of fragment for unmapping | ||
689 | * @unmap_len: Only set this in last segment of a fragment | ||
690 | * | ||
691 | * Push descriptors onto the TX queue. Return 0 on success or 1 if | ||
692 | * @tx_queue full. | ||
693 | */ | ||
694 | static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, | ||
695 | dma_addr_t dma_addr, unsigned len, | ||
696 | const struct sk_buff *skb, int end_of_packet, | ||
697 | dma_addr_t unmap_addr, unsigned unmap_len) | ||
698 | { | ||
699 | struct efx_tx_buffer *buffer; | ||
700 | struct efx_nic *efx = tx_queue->efx; | ||
701 | unsigned dma_len, fill_level, insert_ptr, misalign; | ||
702 | int q_space; | ||
703 | |||
704 | EFX_BUG_ON_PARANOID(len <= 0); | ||
705 | |||
706 | fill_level = tx_queue->insert_count - tx_queue->old_read_count; | ||
707 | /* -1 as there is no way to represent all descriptors used */ | ||
708 | q_space = efx->type->txd_ring_mask - 1 - fill_level; | ||
709 | |||
710 | while (1) { | ||
711 | if (unlikely(q_space-- <= 0)) { | ||
712 | /* It might be that completions have happened | ||
713 | * since the xmit path last checked. Update | ||
714 | * the xmit path's copy of read_count. | ||
715 | */ | ||
716 | ++tx_queue->stopped; | ||
717 | /* This memory barrier protects the change of | ||
718 | * stopped from the access of read_count. */ | ||
719 | smp_mb(); | ||
720 | tx_queue->old_read_count = | ||
721 | *(volatile unsigned *)&tx_queue->read_count; | ||
722 | fill_level = (tx_queue->insert_count | ||
723 | - tx_queue->old_read_count); | ||
724 | q_space = efx->type->txd_ring_mask - 1 - fill_level; | ||
725 | if (unlikely(q_space-- <= 0)) | ||
726 | return 1; | ||
727 | smp_mb(); | ||
728 | --tx_queue->stopped; | ||
729 | } | ||
730 | |||
731 | insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask; | ||
732 | buffer = &tx_queue->buffer[insert_ptr]; | ||
733 | ++tx_queue->insert_count; | ||
734 | |||
735 | EFX_BUG_ON_PARANOID(tx_queue->insert_count - | ||
736 | tx_queue->read_count > | ||
737 | efx->type->txd_ring_mask); | ||
738 | |||
739 | efx_tsoh_free(tx_queue, buffer); | ||
740 | EFX_BUG_ON_PARANOID(buffer->len); | ||
741 | EFX_BUG_ON_PARANOID(buffer->unmap_len); | ||
742 | EFX_BUG_ON_PARANOID(buffer->skb); | ||
743 | EFX_BUG_ON_PARANOID(buffer->continuation != 1); | ||
744 | EFX_BUG_ON_PARANOID(buffer->tsoh); | ||
745 | |||
746 | buffer->dma_addr = dma_addr; | ||
747 | |||
748 | /* Ensure we do not cross a boundary unsupported by H/W */ | ||
749 | dma_len = (~dma_addr & efx->type->tx_dma_mask) + 1; | ||
750 | |||
751 | misalign = (unsigned)dma_addr & efx->type->bug5391_mask; | ||
752 | if (misalign && dma_len + misalign > 512) | ||
753 | dma_len = 512 - misalign; | ||
754 | |||
755 | /* If there is enough space to send then do so */ | ||
756 | if (dma_len >= len) | ||
757 | break; | ||
758 | |||
759 | buffer->len = dma_len; /* Don't set the other members */ | ||
760 | dma_addr += dma_len; | ||
761 | len -= dma_len; | ||
762 | } | ||
763 | |||
764 | EFX_BUG_ON_PARANOID(!len); | ||
765 | buffer->len = len; | ||
766 | buffer->skb = skb; | ||
767 | buffer->continuation = !end_of_packet; | ||
768 | buffer->unmap_addr = unmap_addr; | ||
769 | buffer->unmap_len = unmap_len; | ||
770 | return 0; | ||
771 | } | ||
772 | |||
773 | |||
774 | /* | ||
775 | * Put a TSO header into the TX queue. | ||
776 | * | ||
777 | * This is special-cased because we know that it is small enough to fit in | ||
778 | * a single fragment, and we know it doesn't cross a page boundary. It | ||
779 | * also allows us to not worry about end-of-packet etc. | ||
780 | */ | ||
781 | static inline void efx_tso_put_header(struct efx_tx_queue *tx_queue, | ||
782 | struct efx_tso_header *tsoh, unsigned len) | ||
783 | { | ||
784 | struct efx_tx_buffer *buffer; | ||
785 | |||
786 | buffer = &tx_queue->buffer[tx_queue->insert_count & | ||
787 | tx_queue->efx->type->txd_ring_mask]; | ||
788 | efx_tsoh_free(tx_queue, buffer); | ||
789 | EFX_BUG_ON_PARANOID(buffer->len); | ||
790 | EFX_BUG_ON_PARANOID(buffer->unmap_len); | ||
791 | EFX_BUG_ON_PARANOID(buffer->skb); | ||
792 | EFX_BUG_ON_PARANOID(buffer->continuation != 1); | ||
793 | EFX_BUG_ON_PARANOID(buffer->tsoh); | ||
794 | buffer->len = len; | ||
795 | buffer->dma_addr = tsoh->dma_addr; | ||
796 | buffer->tsoh = tsoh; | ||
797 | |||
798 | ++tx_queue->insert_count; | ||
799 | } | ||
800 | |||
801 | |||
802 | /* Remove descriptors put into a tx_queue. */ | ||
803 | static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) | ||
804 | { | ||
805 | struct efx_tx_buffer *buffer; | ||
806 | |||
807 | /* Work backwards until we hit the original insert pointer value */ | ||
808 | while (tx_queue->insert_count != tx_queue->write_count) { | ||
809 | --tx_queue->insert_count; | ||
810 | buffer = &tx_queue->buffer[tx_queue->insert_count & | ||
811 | tx_queue->efx->type->txd_ring_mask]; | ||
812 | efx_tsoh_free(tx_queue, buffer); | ||
813 | EFX_BUG_ON_PARANOID(buffer->skb); | ||
814 | buffer->len = 0; | ||
815 | buffer->continuation = 1; | ||
816 | if (buffer->unmap_len) { | ||
817 | pci_unmap_page(tx_queue->efx->pci_dev, | ||
818 | buffer->unmap_addr, | ||
819 | buffer->unmap_len, PCI_DMA_TODEVICE); | ||
820 | buffer->unmap_len = 0; | ||
821 | } | ||
822 | } | ||
823 | } | ||
824 | |||
825 | |||
826 | /* Parse the SKB header and initialise state. */ | ||
827 | static inline void tso_start(struct tso_state *st, const struct sk_buff *skb) | ||
828 | { | ||
829 | /* All ethernet/IP/TCP headers combined size is TCP header size | ||
830 | * plus offset of TCP header relative to start of packet. | ||
831 | */ | ||
832 | st->p.header_length = ((tcp_hdr(skb)->doff << 2u) | ||
833 | + PTR_DIFF(tcp_hdr(skb), skb->data)); | ||
834 | st->p.full_packet_size = (st->p.header_length | ||
835 | + skb_shinfo(skb)->gso_size); | ||
836 | |||
837 | st->p.ipv4_id = ntohs(ip_hdr(skb)->id); | ||
838 | st->seqnum = ntohl(tcp_hdr(skb)->seq); | ||
839 | |||
840 | EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg); | ||
841 | EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn); | ||
842 | EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst); | ||
843 | |||
844 | st->packet_space = st->p.full_packet_size; | ||
845 | st->remaining_len = skb->len - st->p.header_length; | ||
846 | } | ||
847 | |||
848 | |||
849 | /** | ||
850 | * tso_get_fragment - record fragment details and map for DMA | ||
851 | * @st: TSO state | ||
852 | * @efx: Efx NIC | ||
853 | * @data: Pointer to fragment data | ||
854 | * @len: Length of fragment | ||
855 | * | ||
856 | * Record fragment details and map for DMA. Return 0 on success, or | ||
857 | * -%ENOMEM if DMA mapping fails. | ||
858 | */ | ||
859 | static inline int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, | ||
860 | int len, struct page *page, int page_off) | ||
861 | { | ||
862 | |||
863 | st->ifc.unmap_addr = pci_map_page(efx->pci_dev, page, page_off, | ||
864 | len, PCI_DMA_TODEVICE); | ||
865 | if (likely(!pci_dma_mapping_error(st->ifc.unmap_addr))) { | ||
866 | st->ifc.unmap_len = len; | ||
867 | st->ifc.len = len; | ||
868 | st->ifc.dma_addr = st->ifc.unmap_addr; | ||
869 | st->ifc.page = page; | ||
870 | st->ifc.page_off = page_off; | ||
871 | return 0; | ||
872 | } | ||
873 | return -ENOMEM; | ||
874 | } | ||
875 | |||
876 | |||
877 | /** | ||
878 | * tso_fill_packet_with_fragment - form descriptors for the current fragment | ||
879 | * @tx_queue: Efx TX queue | ||
880 | * @skb: Socket buffer | ||
881 | * @st: TSO state | ||
882 | * | ||
883 | * Form descriptors for the current fragment, until we reach the end | ||
884 | * of fragment or end-of-packet. Return 0 on success, 1 if not enough | ||
885 | * space in @tx_queue. | ||
886 | */ | ||
887 | static inline int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, | ||
888 | const struct sk_buff *skb, | ||
889 | struct tso_state *st) | ||
890 | { | ||
891 | |||
892 | int n, end_of_packet, rc; | ||
893 | |||
894 | if (st->ifc.len == 0) | ||
895 | return 0; | ||
896 | if (st->packet_space == 0) | ||
897 | return 0; | ||
898 | |||
899 | EFX_BUG_ON_PARANOID(st->ifc.len <= 0); | ||
900 | EFX_BUG_ON_PARANOID(st->packet_space <= 0); | ||
901 | |||
902 | n = min(st->ifc.len, st->packet_space); | ||
903 | |||
904 | st->packet_space -= n; | ||
905 | st->remaining_len -= n; | ||
906 | st->ifc.len -= n; | ||
907 | st->ifc.page_off += n; | ||
908 | end_of_packet = st->remaining_len == 0 || st->packet_space == 0; | ||
909 | |||
910 | rc = efx_tx_queue_insert(tx_queue, st->ifc.dma_addr, n, | ||
911 | st->remaining_len ? NULL : skb, | ||
912 | end_of_packet, st->ifc.unmap_addr, | ||
913 | st->ifc.len ? 0 : st->ifc.unmap_len); | ||
914 | |||
915 | st->ifc.dma_addr += n; | ||
916 | |||
917 | return rc; | ||
918 | } | ||
919 | |||
920 | |||
921 | /** | ||
922 | * tso_start_new_packet - generate a new header and prepare for the new packet | ||
923 | * @tx_queue: Efx TX queue | ||
924 | * @skb: Socket buffer | ||
925 | * @st: TSO state | ||
926 | * | ||
927 | * Generate a new header and prepare for the new packet. Return 0 on | ||
928 | * success, or -1 if failed to alloc header. | ||
929 | */ | ||
930 | static inline int tso_start_new_packet(struct efx_tx_queue *tx_queue, | ||
931 | const struct sk_buff *skb, | ||
932 | struct tso_state *st) | ||
933 | { | ||
934 | struct efx_tso_header *tsoh; | ||
935 | struct iphdr *tsoh_iph; | ||
936 | struct tcphdr *tsoh_th; | ||
937 | unsigned ip_length; | ||
938 | u8 *header; | ||
939 | |||
940 | /* Allocate a DMA-mapped header buffer. */ | ||
941 | if (likely(TSOH_SIZE(st->p.header_length) <= TSOH_STD_SIZE)) { | ||
942 | if (tx_queue->tso_headers_free == NULL) | ||
943 | if (efx_tsoh_block_alloc(tx_queue)) | ||
944 | return -1; | ||
945 | EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free); | ||
946 | tsoh = tx_queue->tso_headers_free; | ||
947 | tx_queue->tso_headers_free = tsoh->next; | ||
948 | tsoh->unmap_len = 0; | ||
949 | } else { | ||
950 | tx_queue->tso_long_headers++; | ||
951 | tsoh = efx_tsoh_heap_alloc(tx_queue, st->p.header_length); | ||
952 | if (unlikely(!tsoh)) | ||
953 | return -1; | ||
954 | } | ||
955 | |||
956 | header = TSOH_BUFFER(tsoh); | ||
957 | tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb)); | ||
958 | tsoh_iph = (struct iphdr *)(header + SKB_IPV4_OFF(skb)); | ||
959 | |||
960 | /* Copy and update the headers. */ | ||
961 | memcpy(header, skb->data, st->p.header_length); | ||
962 | |||
963 | tsoh_th->seq = htonl(st->seqnum); | ||
964 | st->seqnum += skb_shinfo(skb)->gso_size; | ||
965 | if (st->remaining_len > skb_shinfo(skb)->gso_size) { | ||
966 | /* This packet will not finish the TSO burst. */ | ||
967 | ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb); | ||
968 | tsoh_th->fin = 0; | ||
969 | tsoh_th->psh = 0; | ||
970 | } else { | ||
971 | /* This packet will be the last in the TSO burst. */ | ||
972 | ip_length = (st->p.header_length - ETH_HDR_LEN(skb) | ||
973 | + st->remaining_len); | ||
974 | tsoh_th->fin = tcp_hdr(skb)->fin; | ||
975 | tsoh_th->psh = tcp_hdr(skb)->psh; | ||
976 | } | ||
977 | tsoh_iph->tot_len = htons(ip_length); | ||
978 | |||
979 | /* Linux leaves suitable gaps in the IP ID space for us to fill. */ | ||
980 | tsoh_iph->id = htons(st->p.ipv4_id); | ||
981 | st->p.ipv4_id++; | ||
982 | |||
983 | st->packet_space = skb_shinfo(skb)->gso_size; | ||
984 | ++tx_queue->tso_packets; | ||
985 | |||
986 | /* Form a descriptor for this header. */ | ||
987 | efx_tso_put_header(tx_queue, tsoh, st->p.header_length); | ||
988 | |||
989 | return 0; | ||
990 | } | ||
991 | |||
992 | |||
993 | /** | ||
994 | * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer | ||
995 | * @tx_queue: Efx TX queue | ||
996 | * @skb: Socket buffer | ||
997 | * | ||
998 | * Context: You must hold netif_tx_lock() to call this function. | ||
999 | * | ||
1000 | * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if | ||
1001 | * @skb was not enqueued. In all cases @skb is consumed. Return | ||
1002 | * %NETDEV_TX_OK or %NETDEV_TX_BUSY. | ||
1003 | */ | ||
1004 | static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, | ||
1005 | const struct sk_buff *skb) | ||
1006 | { | ||
1007 | int frag_i, rc, rc2 = NETDEV_TX_OK; | ||
1008 | struct tso_state state; | ||
1009 | skb_frag_t *f; | ||
1010 | |||
1011 | /* Verify TSO is safe - these checks should never fail. */ | ||
1012 | efx_tso_check_safe(skb); | ||
1013 | |||
1014 | EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); | ||
1015 | |||
1016 | tso_start(&state, skb); | ||
1017 | |||
1018 | /* Assume that skb header area contains exactly the headers, and | ||
1019 | * all payload is in the frag list. | ||
1020 | */ | ||
1021 | if (skb_headlen(skb) == state.p.header_length) { | ||
1022 | /* Grab the first payload fragment. */ | ||
1023 | EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1); | ||
1024 | frag_i = 0; | ||
1025 | f = &skb_shinfo(skb)->frags[frag_i]; | ||
1026 | rc = tso_get_fragment(&state, tx_queue->efx, | ||
1027 | f->size, f->page, f->page_offset); | ||
1028 | if (rc) | ||
1029 | goto mem_err; | ||
1030 | } else { | ||
1031 | /* It may look like this code fragment assumes that the | ||
1032 | * skb->data portion does not cross a page boundary, but | ||
1033 | * that is not the case. It is guaranteed to be direct | ||
1034 | * mapped memory, and therefore is physically contiguous, | ||
1035 | * and so DMA will work fine. kmap_atomic() on this region | ||
1036 | * will just return the direct mapping, so that will work | ||
1037 | * too. | ||
1038 | */ | ||
1039 | int page_off = (unsigned long)skb->data & (PAGE_SIZE - 1); | ||
1040 | int hl = state.p.header_length; | ||
1041 | rc = tso_get_fragment(&state, tx_queue->efx, | ||
1042 | skb_headlen(skb) - hl, | ||
1043 | virt_to_page(skb->data), page_off + hl); | ||
1044 | if (rc) | ||
1045 | goto mem_err; | ||
1046 | frag_i = -1; | ||
1047 | } | ||
1048 | |||
1049 | if (tso_start_new_packet(tx_queue, skb, &state) < 0) | ||
1050 | goto mem_err; | ||
1051 | |||
1052 | while (1) { | ||
1053 | rc = tso_fill_packet_with_fragment(tx_queue, skb, &state); | ||
1054 | if (unlikely(rc)) | ||
1055 | goto stop; | ||
1056 | |||
1057 | /* Move onto the next fragment? */ | ||
1058 | if (state.ifc.len == 0) { | ||
1059 | if (++frag_i >= skb_shinfo(skb)->nr_frags) | ||
1060 | /* End of payload reached. */ | ||
1061 | break; | ||
1062 | f = &skb_shinfo(skb)->frags[frag_i]; | ||
1063 | rc = tso_get_fragment(&state, tx_queue->efx, | ||
1064 | f->size, f->page, f->page_offset); | ||
1065 | if (rc) | ||
1066 | goto mem_err; | ||
1067 | } | ||
1068 | |||
1069 | /* Start at new packet? */ | ||
1070 | if (state.packet_space == 0 && | ||
1071 | tso_start_new_packet(tx_queue, skb, &state) < 0) | ||
1072 | goto mem_err; | ||
1073 | } | ||
1074 | |||
1075 | /* Pass off to hardware */ | ||
1076 | falcon_push_buffers(tx_queue); | ||
1077 | |||
1078 | tx_queue->tso_bursts++; | ||
1079 | return NETDEV_TX_OK; | ||
1080 | |||
1081 | mem_err: | ||
1082 | EFX_ERR(tx_queue->efx, "Out of memory for TSO headers, or PCI mapping" | ||
1083 | " error\n"); | ||
1084 | dev_kfree_skb_any((struct sk_buff *)skb); | ||
1085 | goto unwind; | ||
1086 | |||
1087 | stop: | ||
1088 | rc2 = NETDEV_TX_BUSY; | ||
1089 | |||
1090 | /* Stop the queue if it wasn't stopped before. */ | ||
1091 | if (tx_queue->stopped == 1) | ||
1092 | efx_stop_queue(tx_queue->efx); | ||
1093 | |||
1094 | unwind: | ||
1095 | efx_enqueue_unwind(tx_queue); | ||
1096 | return rc2; | ||
1097 | } | ||
1098 | |||
1099 | |||
1100 | /* | ||
1101 | * Free up all TSO datastructures associated with tx_queue. This | ||
1102 | * routine should be called only once the tx_queue is both empty and | ||
1103 | * will no longer be used. | ||
1104 | */ | ||
1105 | static void efx_fini_tso(struct efx_tx_queue *tx_queue) | ||
1106 | { | ||
1107 | unsigned i; | ||
1108 | |||
1109 | if (tx_queue->buffer) | ||
1110 | for (i = 0; i <= tx_queue->efx->type->txd_ring_mask; ++i) | ||
1111 | efx_tsoh_free(tx_queue, &tx_queue->buffer[i]); | ||
1112 | |||
1113 | while (tx_queue->tso_headers_free != NULL) | ||
1114 | efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free, | ||
1115 | tx_queue->efx->pci_dev); | ||
1116 | } | ||