aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorBen Hutchings <bhutchings@solarflare.com>2012-05-17 13:40:54 -0400
committerBen Hutchings <bhutchings@solarflare.com>2012-08-24 15:10:11 -0400
commitf7251a9ce936f1006fbfdef63dbe42ae5e0fee7c (patch)
treee0187036af8587b61f4f492a31f3429b33dc0ffb /drivers
parent14bf718fb97efe9ff649c317e7d87a3617b13e7c (diff)
sfc: Simplify TSO header buffer allocation
TSO header buffers contain a control structure immediately followed by the packet headers, and are kept on a free list when not in use. This complicates buffer management and tends to result in cache read misses when we recycle such buffers (particularly if DMA-coherent memory requires caches to be disabled). Replace the free list with a simple mapping by descriptor index. We know that there is always a payload descriptor between any two descriptors with TSO header buffers, so we can allocate only one such buffer for each two descriptors. While we're at it, use a standard error code for allocation failure, not -1. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h13
-rw-r--r--drivers/net/ethernet/sfc/nic.c2
-rw-r--r--drivers/net/ethernet/sfc/tx.c315
3 files changed, 112 insertions, 218 deletions
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 28a6d6258692..a4fe9a786ef8 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -94,7 +94,8 @@ struct efx_special_buffer {
94 * struct efx_tx_buffer - buffer state for a TX descriptor 94 * struct efx_tx_buffer - buffer state for a TX descriptor
95 * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be 95 * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be
96 * freed when descriptor completes 96 * freed when descriptor completes
97 * @tsoh: When @flags & %EFX_TX_BUF_TSOH, the associated TSO header structure. 97 * @heap_buf: When @flags & %EFX_TX_BUF_HEAP, the associated heap buffer to be
98 * freed when descriptor completes.
98 * @dma_addr: DMA address of the fragment. 99 * @dma_addr: DMA address of the fragment.
99 * @flags: Flags for allocation and DMA mapping type 100 * @flags: Flags for allocation and DMA mapping type
100 * @len: Length of this fragment. 101 * @len: Length of this fragment.
@@ -104,7 +105,7 @@ struct efx_special_buffer {
104struct efx_tx_buffer { 105struct efx_tx_buffer {
105 union { 106 union {
106 const struct sk_buff *skb; 107 const struct sk_buff *skb;
107 struct efx_tso_header *tsoh; 108 void *heap_buf;
108 }; 109 };
109 dma_addr_t dma_addr; 110 dma_addr_t dma_addr;
110 unsigned short flags; 111 unsigned short flags;
@@ -113,7 +114,7 @@ struct efx_tx_buffer {
113}; 114};
114#define EFX_TX_BUF_CONT 1 /* not last descriptor of packet */ 115#define EFX_TX_BUF_CONT 1 /* not last descriptor of packet */
115#define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ 116#define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */
116#define EFX_TX_BUF_TSOH 4 /* buffer is TSO header */ 117#define EFX_TX_BUF_HEAP 4 /* buffer was allocated with kmalloc() */
117#define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */ 118#define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */
118 119
119/** 120/**
@@ -134,6 +135,7 @@ struct efx_tx_buffer {
134 * @channel: The associated channel 135 * @channel: The associated channel
135 * @core_txq: The networking core TX queue structure 136 * @core_txq: The networking core TX queue structure
136 * @buffer: The software buffer ring 137 * @buffer: The software buffer ring
138 * @tsoh_page: Array of pages of TSO header buffers
137 * @txd: The hardware descriptor ring 139 * @txd: The hardware descriptor ring
138 * @ptr_mask: The size of the ring minus 1. 140 * @ptr_mask: The size of the ring minus 1.
139 * @initialised: Has hardware queue been initialised? 141 * @initialised: Has hardware queue been initialised?
@@ -157,9 +159,6 @@ struct efx_tx_buffer {
157 * variable indicates that the queue is full. This is to 159 * variable indicates that the queue is full. This is to
158 * avoid cache-line ping-pong between the xmit path and the 160 * avoid cache-line ping-pong between the xmit path and the
159 * completion path. 161 * completion path.
160 * @tso_headers_free: A list of TSO headers allocated for this TX queue
161 * that are not in use, and so available for new TSO sends. The list
162 * is protected by the TX queue lock.
163 * @tso_bursts: Number of times TSO xmit invoked by kernel 162 * @tso_bursts: Number of times TSO xmit invoked by kernel
164 * @tso_long_headers: Number of packets with headers too long for standard 163 * @tso_long_headers: Number of packets with headers too long for standard
165 * blocks 164 * blocks
@@ -176,6 +175,7 @@ struct efx_tx_queue {
176 struct efx_channel *channel; 175 struct efx_channel *channel;
177 struct netdev_queue *core_txq; 176 struct netdev_queue *core_txq;
178 struct efx_tx_buffer *buffer; 177 struct efx_tx_buffer *buffer;
178 struct efx_buffer *tsoh_page;
179 struct efx_special_buffer txd; 179 struct efx_special_buffer txd;
180 unsigned int ptr_mask; 180 unsigned int ptr_mask;
181 bool initialised; 181 bool initialised;
@@ -188,7 +188,6 @@ struct efx_tx_queue {
188 unsigned int insert_count ____cacheline_aligned_in_smp; 188 unsigned int insert_count ____cacheline_aligned_in_smp;
189 unsigned int write_count; 189 unsigned int write_count;
190 unsigned int old_read_count; 190 unsigned int old_read_count;
191 struct efx_tso_header *tso_headers_free;
192 unsigned int tso_bursts; 191 unsigned int tso_bursts;
193 unsigned int tso_long_headers; 192 unsigned int tso_long_headers;
194 unsigned int tso_packets; 193 unsigned int tso_packets;
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index aa113709831d..cdff40b65729 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -298,7 +298,7 @@ efx_free_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer)
298/************************************************************************** 298/**************************************************************************
299 * 299 *
300 * Generic buffer handling 300 * Generic buffer handling
301 * These buffers are used for interrupt status and MAC stats 301 * These buffers are used for interrupt status, MAC stats, etc.
302 * 302 *
303 **************************************************************************/ 303 **************************************************************************/
304 304
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 330d9111a339..61bc0ed718e3 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -47,51 +47,16 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
47 netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, 47 netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
48 "TX queue %d transmission id %x complete\n", 48 "TX queue %d transmission id %x complete\n",
49 tx_queue->queue, tx_queue->read_count); 49 tx_queue->queue, tx_queue->read_count);
50 } else if (buffer->flags & EFX_TX_BUF_HEAP) {
51 kfree(buffer->heap_buf);
50 } 52 }
51 53
52 buffer->flags &= EFX_TX_BUF_TSOH; 54 buffer->len = 0;
55 buffer->flags = 0;
53} 56}
54 57
55/**
56 * struct efx_tso_header - a DMA mapped buffer for packet headers
57 * @next: Linked list of free ones.
58 * The list is protected by the TX queue lock.
59 * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
60 * @dma_addr: The DMA address of the header below.
61 *
62 * This controls the memory used for a TSO header. Use TSOH_DATA()
63 * to find the packet header data. Use TSOH_SIZE() to calculate the
64 * total size required for a given packet header length. TSO headers
65 * in the free list are exactly %TSOH_STD_SIZE bytes in size.
66 */
67struct efx_tso_header {
68 union {
69 struct efx_tso_header *next;
70 size_t unmap_len;
71 };
72 dma_addr_t dma_addr;
73};
74
75static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, 58static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
76 struct sk_buff *skb); 59 struct sk_buff *skb);
77static void efx_fini_tso(struct efx_tx_queue *tx_queue);
78static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
79 struct efx_tso_header *tsoh);
80
81static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
82 struct efx_tx_buffer *buffer)
83{
84 if (buffer->flags & EFX_TX_BUF_TSOH) {
85 if (likely(!buffer->tsoh->unmap_len)) {
86 buffer->tsoh->next = tx_queue->tso_headers_free;
87 tx_queue->tso_headers_free = buffer->tsoh;
88 } else {
89 efx_tsoh_heap_free(tx_queue, buffer->tsoh);
90 }
91 buffer->flags &= ~EFX_TX_BUF_TSOH;
92 }
93}
94
95 60
96static inline unsigned 61static inline unsigned
97efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr) 62efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
@@ -245,7 +210,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
245 do { 210 do {
246 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; 211 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
247 buffer = &tx_queue->buffer[insert_ptr]; 212 buffer = &tx_queue->buffer[insert_ptr];
248 efx_tsoh_free(tx_queue, buffer);
249 EFX_BUG_ON_PARANOID(buffer->flags); 213 EFX_BUG_ON_PARANOID(buffer->flags);
250 EFX_BUG_ON_PARANOID(buffer->len); 214 EFX_BUG_ON_PARANOID(buffer->len);
251 EFX_BUG_ON_PARANOID(buffer->unmap_len); 215 EFX_BUG_ON_PARANOID(buffer->unmap_len);
@@ -309,7 +273,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
309 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; 273 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
310 buffer = &tx_queue->buffer[insert_ptr]; 274 buffer = &tx_queue->buffer[insert_ptr];
311 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); 275 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
312 buffer->len = 0;
313 } 276 }
314 277
315 /* Free the fragment we were mid-way through pushing */ 278 /* Free the fragment we were mid-way through pushing */
@@ -352,7 +315,6 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
352 } 315 }
353 316
354 efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); 317 efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
355 buffer->len = 0;
356 318
357 ++tx_queue->read_count; 319 ++tx_queue->read_count;
358 read_ptr = tx_queue->read_count & tx_queue->ptr_mask; 320 read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
@@ -495,6 +457,21 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
495 } 457 }
496} 458}
497 459
460/* Size of page-based TSO header buffers. Larger blocks must be
461 * allocated from the heap.
462 */
463#define TSOH_STD_SIZE 128
464#define TSOH_PER_PAGE (PAGE_SIZE / TSOH_STD_SIZE)
465
466/* At most half the descriptors in the queue at any time will refer to
467 * a TSO header buffer, since they must always be followed by a
468 * payload descriptor referring to an skb.
469 */
470static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue)
471{
472 return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE);
473}
474
498int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) 475int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
499{ 476{
500 struct efx_nic *efx = tx_queue->efx; 477 struct efx_nic *efx = tx_queue->efx;
@@ -516,14 +493,27 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
516 if (!tx_queue->buffer) 493 if (!tx_queue->buffer)
517 return -ENOMEM; 494 return -ENOMEM;
518 495
496 if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) {
497 tx_queue->tsoh_page =
498 kcalloc(efx_tsoh_page_count(tx_queue),
499 sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL);
500 if (!tx_queue->tsoh_page) {
501 rc = -ENOMEM;
502 goto fail1;
503 }
504 }
505
519 /* Allocate hardware ring */ 506 /* Allocate hardware ring */
520 rc = efx_nic_probe_tx(tx_queue); 507 rc = efx_nic_probe_tx(tx_queue);
521 if (rc) 508 if (rc)
522 goto fail; 509 goto fail2;
523 510
524 return 0; 511 return 0;
525 512
526 fail: 513fail2:
514 kfree(tx_queue->tsoh_page);
515 tx_queue->tsoh_page = NULL;
516fail1:
527 kfree(tx_queue->buffer); 517 kfree(tx_queue->buffer);
528 tx_queue->buffer = NULL; 518 tx_queue->buffer = NULL;
529 return rc; 519 return rc;
@@ -559,7 +549,6 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
559 unsigned int pkts_compl = 0, bytes_compl = 0; 549 unsigned int pkts_compl = 0, bytes_compl = 0;
560 buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; 550 buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
561 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); 551 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
562 buffer->len = 0;
563 552
564 ++tx_queue->read_count; 553 ++tx_queue->read_count;
565 } 554 }
@@ -580,13 +569,12 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
580 efx_nic_fini_tx(tx_queue); 569 efx_nic_fini_tx(tx_queue);
581 570
582 efx_release_tx_buffers(tx_queue); 571 efx_release_tx_buffers(tx_queue);
583
584 /* Free up TSO header cache */
585 efx_fini_tso(tx_queue);
586} 572}
587 573
588void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) 574void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
589{ 575{
576 int i;
577
590 if (!tx_queue->buffer) 578 if (!tx_queue->buffer)
591 return; 579 return;
592 580
@@ -594,6 +582,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
594 "destroying TX queue %d\n", tx_queue->queue); 582 "destroying TX queue %d\n", tx_queue->queue);
595 efx_nic_remove_tx(tx_queue); 583 efx_nic_remove_tx(tx_queue);
596 584
585 if (tx_queue->tsoh_page) {
586 for (i = 0; i < efx_tsoh_page_count(tx_queue); i++)
587 efx_nic_free_buffer(tx_queue->efx,
588 &tx_queue->tsoh_page[i]);
589 kfree(tx_queue->tsoh_page);
590 tx_queue->tsoh_page = NULL;
591 }
592
597 kfree(tx_queue->buffer); 593 kfree(tx_queue->buffer);
598 tx_queue->buffer = NULL; 594 tx_queue->buffer = NULL;
599} 595}
@@ -616,17 +612,6 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
616#define TSOH_OFFSET NET_IP_ALIGN 612#define TSOH_OFFSET NET_IP_ALIGN
617#endif 613#endif
618 614
619#define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET)
620
621/* Total size of struct efx_tso_header, buffer and padding */
622#define TSOH_SIZE(hdr_len) \
623 (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
624
625/* Size of blocks on free list. Larger blocks must be allocated from
626 * the heap.
627 */
628#define TSOH_STD_SIZE 128
629
630#define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2)) 615#define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2))
631#define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data) 616#define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data)
632#define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data) 617#define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data)
@@ -699,91 +684,43 @@ static __be16 efx_tso_check_protocol(struct sk_buff *skb)
699 return protocol; 684 return protocol;
700} 685}
701 686
702 687static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue,
703/* 688 struct efx_tx_buffer *buffer, unsigned int len)
704 * Allocate a page worth of efx_tso_header structures, and string them
705 * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
706 */
707static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
708{ 689{
709 struct device *dma_dev = &tx_queue->efx->pci_dev->dev; 690 u8 *result;
710 struct efx_tso_header *tsoh;
711 dma_addr_t dma_addr;
712 u8 *base_kva, *kva;
713 691
714 base_kva = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr, GFP_ATOMIC); 692 EFX_BUG_ON_PARANOID(buffer->len);
715 if (base_kva == NULL) { 693 EFX_BUG_ON_PARANOID(buffer->flags);
716 netif_err(tx_queue->efx, tx_err, tx_queue->efx->net_dev, 694 EFX_BUG_ON_PARANOID(buffer->unmap_len);
717 "Unable to allocate page for TSO headers\n");
718 return -ENOMEM;
719 }
720
721 /* dma_alloc_coherent() allocates pages. */
722 EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
723
724 for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
725 tsoh = (struct efx_tso_header *)kva;
726 tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
727 tsoh->next = tx_queue->tso_headers_free;
728 tx_queue->tso_headers_free = tsoh;
729 }
730
731 return 0;
732}
733
734
735/* Free up a TSO header, and all others in the same page. */
736static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
737 struct efx_tso_header *tsoh,
738 struct device *dma_dev)
739{
740 struct efx_tso_header **p;
741 unsigned long base_kva;
742 dma_addr_t base_dma;
743 695
744 base_kva = (unsigned long)tsoh & PAGE_MASK; 696 if (likely(len <= TSOH_STD_SIZE - TSOH_OFFSET)) {
745 base_dma = tsoh->dma_addr & PAGE_MASK; 697 unsigned index =
698 (tx_queue->insert_count & tx_queue->ptr_mask) / 2;
699 struct efx_buffer *page_buf =
700 &tx_queue->tsoh_page[index / TSOH_PER_PAGE];
701 unsigned offset =
702 TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + TSOH_OFFSET;
746 703
747 p = &tx_queue->tso_headers_free; 704 if (unlikely(!page_buf->addr) &&
748 while (*p != NULL) { 705 efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE))
749 if (((unsigned long)*p & PAGE_MASK) == base_kva) 706 return NULL;
750 *p = (*p)->next;
751 else
752 p = &(*p)->next;
753 }
754 707
755 dma_free_coherent(dma_dev, PAGE_SIZE, (void *)base_kva, base_dma); 708 result = (u8 *)page_buf->addr + offset;
756} 709 buffer->dma_addr = page_buf->dma_addr + offset;
710 buffer->flags = EFX_TX_BUF_CONT;
711 } else {
712 tx_queue->tso_long_headers++;
757 713
758static struct efx_tso_header * 714 buffer->heap_buf = kmalloc(TSOH_OFFSET + len, GFP_ATOMIC);
759efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len) 715 if (unlikely(!buffer->heap_buf))
760{ 716 return NULL;
761 struct efx_tso_header *tsoh; 717 result = (u8 *)buffer->heap_buf + TSOH_OFFSET;
762 718 buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP;
763 tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
764 if (unlikely(!tsoh))
765 return NULL;
766
767 tsoh->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
768 TSOH_BUFFER(tsoh), header_len,
769 DMA_TO_DEVICE);
770 if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
771 tsoh->dma_addr))) {
772 kfree(tsoh);
773 return NULL;
774 } 719 }
775 720
776 tsoh->unmap_len = header_len; 721 buffer->len = len;
777 return tsoh;
778}
779 722
780static void 723 return result;
781efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
782{
783 dma_unmap_single(&tx_queue->efx->pci_dev->dev,
784 tsoh->dma_addr, tsoh->unmap_len,
785 DMA_TO_DEVICE);
786 kfree(tsoh);
787} 724}
788 725
789/** 726/**
@@ -814,7 +751,6 @@ static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
814 tx_queue->read_count >= 751 tx_queue->read_count >=
815 efx->txq_entries); 752 efx->txq_entries);
816 753
817 efx_tsoh_free(tx_queue, buffer);
818 EFX_BUG_ON_PARANOID(buffer->len); 754 EFX_BUG_ON_PARANOID(buffer->len);
819 EFX_BUG_ON_PARANOID(buffer->unmap_len); 755 EFX_BUG_ON_PARANOID(buffer->unmap_len);
820 EFX_BUG_ON_PARANOID(buffer->flags); 756 EFX_BUG_ON_PARANOID(buffer->flags);
@@ -846,53 +782,42 @@ static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
846 * a single fragment, and we know it doesn't cross a page boundary. It 782 * a single fragment, and we know it doesn't cross a page boundary. It
847 * also allows us to not worry about end-of-packet etc. 783 * also allows us to not worry about end-of-packet etc.
848 */ 784 */
849static void efx_tso_put_header(struct efx_tx_queue *tx_queue, 785static int efx_tso_put_header(struct efx_tx_queue *tx_queue,
850 struct efx_tso_header *tsoh, unsigned len) 786 struct efx_tx_buffer *buffer, u8 *header)
851{ 787{
852 struct efx_tx_buffer *buffer; 788 if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) {
853 789 buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
854 buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; 790 header, buffer->len,
855 efx_tsoh_free(tx_queue, buffer); 791 DMA_TO_DEVICE);
856 EFX_BUG_ON_PARANOID(buffer->len); 792 if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
857 EFX_BUG_ON_PARANOID(buffer->unmap_len); 793 buffer->dma_addr))) {
858 EFX_BUG_ON_PARANOID(buffer->flags); 794 kfree(buffer->heap_buf);
859 buffer->len = len; 795 buffer->len = 0;
860 buffer->dma_addr = tsoh->dma_addr; 796 buffer->flags = 0;
861 buffer->tsoh = tsoh; 797 return -ENOMEM;
862 buffer->flags = EFX_TX_BUF_TSOH | EFX_TX_BUF_CONT; 798 }
799 buffer->unmap_len = buffer->len;
800 buffer->flags |= EFX_TX_BUF_MAP_SINGLE;
801 }
863 802
864 ++tx_queue->insert_count; 803 ++tx_queue->insert_count;
804 return 0;
865} 805}
866 806
867 807
868/* Remove descriptors put into a tx_queue. */ 808/* Remove buffers put into a tx_queue. None of the buffers must have
809 * an skb attached.
810 */
869static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) 811static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
870{ 812{
871 struct efx_tx_buffer *buffer; 813 struct efx_tx_buffer *buffer;
872 dma_addr_t unmap_addr;
873 814
874 /* Work backwards until we hit the original insert pointer value */ 815 /* Work backwards until we hit the original insert pointer value */
875 while (tx_queue->insert_count != tx_queue->write_count) { 816 while (tx_queue->insert_count != tx_queue->write_count) {
876 --tx_queue->insert_count; 817 --tx_queue->insert_count;
877 buffer = &tx_queue->buffer[tx_queue->insert_count & 818 buffer = &tx_queue->buffer[tx_queue->insert_count &
878 tx_queue->ptr_mask]; 819 tx_queue->ptr_mask];
879 efx_tsoh_free(tx_queue, buffer); 820 efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
880 EFX_BUG_ON_PARANOID(buffer->flags & EFX_TX_BUF_SKB);
881 if (buffer->unmap_len) {
882 unmap_addr = (buffer->dma_addr + buffer->len -
883 buffer->unmap_len);
884 if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
885 dma_unmap_single(&tx_queue->efx->pci_dev->dev,
886 unmap_addr, buffer->unmap_len,
887 DMA_TO_DEVICE);
888 else
889 dma_unmap_page(&tx_queue->efx->pci_dev->dev,
890 unmap_addr, buffer->unmap_len,
891 DMA_TO_DEVICE);
892 buffer->unmap_len = 0;
893 }
894 buffer->len = 0;
895 buffer->flags = 0;
896 } 821 }
897} 822}
898 823
@@ -1014,35 +939,24 @@ static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
1014 * @st: TSO state 939 * @st: TSO state
1015 * 940 *
1016 * Generate a new header and prepare for the new packet. Return 0 on 941 * Generate a new header and prepare for the new packet. Return 0 on
1017 * success, or -1 if failed to alloc header. 942 * success, or -%ENOMEM if failed to alloc header.
1018 */ 943 */
1019static int tso_start_new_packet(struct efx_tx_queue *tx_queue, 944static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
1020 const struct sk_buff *skb, 945 const struct sk_buff *skb,
1021 struct tso_state *st) 946 struct tso_state *st)
1022{ 947{
1023 struct efx_tso_header *tsoh; 948 struct efx_tx_buffer *buffer =
949 &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask];
1024 struct tcphdr *tsoh_th; 950 struct tcphdr *tsoh_th;
1025 unsigned ip_length; 951 unsigned ip_length;
1026 u8 *header; 952 u8 *header;
953 int rc;
1027 954
1028 /* Allocate a DMA-mapped header buffer. */ 955 /* Allocate and insert a DMA-mapped header buffer. */
1029 if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) { 956 header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len);
1030 if (tx_queue->tso_headers_free == NULL) { 957 if (!header)
1031 if (efx_tsoh_block_alloc(tx_queue)) 958 return -ENOMEM;
1032 return -1;
1033 }
1034 EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
1035 tsoh = tx_queue->tso_headers_free;
1036 tx_queue->tso_headers_free = tsoh->next;
1037 tsoh->unmap_len = 0;
1038 } else {
1039 tx_queue->tso_long_headers++;
1040 tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
1041 if (unlikely(!tsoh))
1042 return -1;
1043 }
1044 959
1045 header = TSOH_BUFFER(tsoh);
1046 tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb)); 960 tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
1047 961
1048 /* Copy and update the headers. */ 962 /* Copy and update the headers. */
@@ -1078,12 +992,13 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
1078 tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph)); 992 tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph));
1079 } 993 }
1080 994
995 rc = efx_tso_put_header(tx_queue, buffer, header);
996 if (unlikely(rc))
997 return rc;
998
1081 st->packet_space = skb_shinfo(skb)->gso_size; 999 st->packet_space = skb_shinfo(skb)->gso_size;
1082 ++tx_queue->tso_packets; 1000 ++tx_queue->tso_packets;
1083 1001
1084 /* Form a descriptor for this header. */
1085 efx_tso_put_header(tx_queue, tsoh, st->header_len);
1086
1087 return 0; 1002 return 0;
1088} 1003}
1089 1004
@@ -1182,23 +1097,3 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1182 efx_enqueue_unwind(tx_queue); 1097 efx_enqueue_unwind(tx_queue);
1183 return NETDEV_TX_OK; 1098 return NETDEV_TX_OK;
1184} 1099}
1185
1186
1187/*
1188 * Free up all TSO datastructures associated with tx_queue. This
1189 * routine should be called only once the tx_queue is both empty and
1190 * will no longer be used.
1191 */
1192static void efx_fini_tso(struct efx_tx_queue *tx_queue)
1193{
1194 unsigned i;
1195
1196 if (tx_queue->buffer) {
1197 for (i = 0; i <= tx_queue->ptr_mask; ++i)
1198 efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
1199 }
1200
1201 while (tx_queue->tso_headers_free != NULL)
1202 efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
1203 &tx_queue->efx->pci_dev->dev);
1204}