aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/sfc/tx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/sfc/tx.c')
-rw-r--r--drivers/net/sfc/tx.c385
1 files changed, 204 insertions, 181 deletions
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c
index 5e8374ab28ee..da3e9ff339f5 100644
--- a/drivers/net/sfc/tx.c
+++ b/drivers/net/sfc/tx.c
@@ -47,7 +47,7 @@ void efx_stop_queue(struct efx_nic *efx)
47 * We want to be able to nest calls to netif_stop_queue(), since each 47 * We want to be able to nest calls to netif_stop_queue(), since each
48 * channel can have an individual stop on the queue. 48 * channel can have an individual stop on the queue.
49 */ 49 */
50inline void efx_wake_queue(struct efx_nic *efx) 50void efx_wake_queue(struct efx_nic *efx)
51{ 51{
52 local_bh_disable(); 52 local_bh_disable();
53 if (atomic_dec_and_lock(&efx->netif_stop_count, 53 if (atomic_dec_and_lock(&efx->netif_stop_count,
@@ -59,19 +59,21 @@ inline void efx_wake_queue(struct efx_nic *efx)
59 local_bh_enable(); 59 local_bh_enable();
60} 60}
61 61
62static inline void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, 62static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
63 struct efx_tx_buffer *buffer) 63 struct efx_tx_buffer *buffer)
64{ 64{
65 if (buffer->unmap_len) { 65 if (buffer->unmap_len) {
66 struct pci_dev *pci_dev = tx_queue->efx->pci_dev; 66 struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
67 dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
68 buffer->unmap_len);
67 if (buffer->unmap_single) 69 if (buffer->unmap_single)
68 pci_unmap_single(pci_dev, buffer->unmap_addr, 70 pci_unmap_single(pci_dev, unmap_addr, buffer->unmap_len,
69 buffer->unmap_len, PCI_DMA_TODEVICE); 71 PCI_DMA_TODEVICE);
70 else 72 else
71 pci_unmap_page(pci_dev, buffer->unmap_addr, 73 pci_unmap_page(pci_dev, unmap_addr, buffer->unmap_len,
72 buffer->unmap_len, PCI_DMA_TODEVICE); 74 PCI_DMA_TODEVICE);
73 buffer->unmap_len = 0; 75 buffer->unmap_len = 0;
74 buffer->unmap_single = 0; 76 buffer->unmap_single = false;
75 } 77 }
76 78
77 if (buffer->skb) { 79 if (buffer->skb) {
@@ -103,13 +105,13 @@ struct efx_tso_header {
103}; 105};
104 106
105static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, 107static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
106 const struct sk_buff *skb); 108 struct sk_buff *skb);
107static void efx_fini_tso(struct efx_tx_queue *tx_queue); 109static void efx_fini_tso(struct efx_tx_queue *tx_queue);
108static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, 110static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
109 struct efx_tso_header *tsoh); 111 struct efx_tso_header *tsoh);
110 112
111static inline void efx_tsoh_free(struct efx_tx_queue *tx_queue, 113static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
112 struct efx_tx_buffer *buffer) 114 struct efx_tx_buffer *buffer)
113{ 115{
114 if (buffer->tsoh) { 116 if (buffer->tsoh) {
115 if (likely(!buffer->tsoh->unmap_len)) { 117 if (likely(!buffer->tsoh->unmap_len)) {
@@ -136,8 +138,8 @@ static inline void efx_tsoh_free(struct efx_tx_queue *tx_queue,
136 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY 138 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
137 * You must hold netif_tx_lock() to call this function. 139 * You must hold netif_tx_lock() to call this function.
138 */ 140 */
139static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue, 141static int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
140 const struct sk_buff *skb) 142 struct sk_buff *skb)
141{ 143{
142 struct efx_nic *efx = tx_queue->efx; 144 struct efx_nic *efx = tx_queue->efx;
143 struct pci_dev *pci_dev = efx->pci_dev; 145 struct pci_dev *pci_dev = efx->pci_dev;
@@ -148,7 +150,7 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
148 unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign; 150 unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign;
149 dma_addr_t dma_addr, unmap_addr = 0; 151 dma_addr_t dma_addr, unmap_addr = 0;
150 unsigned int dma_len; 152 unsigned int dma_len;
151 unsigned unmap_single; 153 bool unmap_single;
152 int q_space, i = 0; 154 int q_space, i = 0;
153 int rc = NETDEV_TX_OK; 155 int rc = NETDEV_TX_OK;
154 156
@@ -167,7 +169,7 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
167 * since this is more efficient on machines with sparse 169 * since this is more efficient on machines with sparse
168 * memory. 170 * memory.
169 */ 171 */
170 unmap_single = 1; 172 unmap_single = true;
171 dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE); 173 dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
172 174
173 /* Process all fragments */ 175 /* Process all fragments */
@@ -213,7 +215,7 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
213 EFX_BUG_ON_PARANOID(buffer->tsoh); 215 EFX_BUG_ON_PARANOID(buffer->tsoh);
214 EFX_BUG_ON_PARANOID(buffer->skb); 216 EFX_BUG_ON_PARANOID(buffer->skb);
215 EFX_BUG_ON_PARANOID(buffer->len); 217 EFX_BUG_ON_PARANOID(buffer->len);
216 EFX_BUG_ON_PARANOID(buffer->continuation != 1); 218 EFX_BUG_ON_PARANOID(!buffer->continuation);
217 EFX_BUG_ON_PARANOID(buffer->unmap_len); 219 EFX_BUG_ON_PARANOID(buffer->unmap_len);
218 220
219 dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1); 221 dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1);
@@ -233,7 +235,6 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
233 } while (len); 235 } while (len);
234 236
235 /* Transfer ownership of the unmapping to the final buffer */ 237 /* Transfer ownership of the unmapping to the final buffer */
236 buffer->unmap_addr = unmap_addr;
237 buffer->unmap_single = unmap_single; 238 buffer->unmap_single = unmap_single;
238 buffer->unmap_len = unmap_len; 239 buffer->unmap_len = unmap_len;
239 unmap_len = 0; 240 unmap_len = 0;
@@ -247,14 +248,14 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
247 page_offset = fragment->page_offset; 248 page_offset = fragment->page_offset;
248 i++; 249 i++;
249 /* Map for DMA */ 250 /* Map for DMA */
250 unmap_single = 0; 251 unmap_single = false;
251 dma_addr = pci_map_page(pci_dev, page, page_offset, len, 252 dma_addr = pci_map_page(pci_dev, page, page_offset, len,
252 PCI_DMA_TODEVICE); 253 PCI_DMA_TODEVICE);
253 } 254 }
254 255
255 /* Transfer ownership of the skb to the final buffer */ 256 /* Transfer ownership of the skb to the final buffer */
256 buffer->skb = skb; 257 buffer->skb = skb;
257 buffer->continuation = 0; 258 buffer->continuation = false;
258 259
259 /* Pass off to hardware */ 260 /* Pass off to hardware */
260 falcon_push_buffers(tx_queue); 261 falcon_push_buffers(tx_queue);
@@ -287,9 +288,14 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
287 } 288 }
288 289
289 /* Free the fragment we were mid-way through pushing */ 290 /* Free the fragment we were mid-way through pushing */
290 if (unmap_len) 291 if (unmap_len) {
291 pci_unmap_page(pci_dev, unmap_addr, unmap_len, 292 if (unmap_single)
292 PCI_DMA_TODEVICE); 293 pci_unmap_single(pci_dev, unmap_addr, unmap_len,
294 PCI_DMA_TODEVICE);
295 else
296 pci_unmap_page(pci_dev, unmap_addr, unmap_len,
297 PCI_DMA_TODEVICE);
298 }
293 299
294 return rc; 300 return rc;
295} 301}
@@ -299,8 +305,8 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
299 * This removes packets from the TX queue, up to and including the 305 * This removes packets from the TX queue, up to and including the
300 * specified index. 306 * specified index.
301 */ 307 */
302static inline void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, 308static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
303 unsigned int index) 309 unsigned int index)
304{ 310{
305 struct efx_nic *efx = tx_queue->efx; 311 struct efx_nic *efx = tx_queue->efx;
306 unsigned int stop_index, read_ptr; 312 unsigned int stop_index, read_ptr;
@@ -320,7 +326,7 @@ static inline void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
320 } 326 }
321 327
322 efx_dequeue_buffer(tx_queue, buffer); 328 efx_dequeue_buffer(tx_queue, buffer);
323 buffer->continuation = 1; 329 buffer->continuation = true;
324 buffer->len = 0; 330 buffer->len = 0;
325 331
326 ++tx_queue->read_count; 332 ++tx_queue->read_count;
@@ -367,8 +373,15 @@ inline int efx_xmit(struct efx_nic *efx,
367 */ 373 */
368int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev) 374int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
369{ 375{
370 struct efx_nic *efx = net_dev->priv; 376 struct efx_nic *efx = netdev_priv(net_dev);
371 return efx_xmit(efx, &efx->tx_queue[0], skb); 377 struct efx_tx_queue *tx_queue;
378
379 if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
380 tx_queue = &efx->tx_queue[EFX_TX_QUEUE_OFFLOAD_CSUM];
381 else
382 tx_queue = &efx->tx_queue[EFX_TX_QUEUE_NO_CSUM];
383
384 return efx_xmit(efx, tx_queue, skb);
372} 385}
373 386
374void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) 387void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
@@ -412,30 +425,25 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
412 /* Allocate software ring */ 425 /* Allocate software ring */
413 txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer); 426 txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer);
414 tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL); 427 tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
415 if (!tx_queue->buffer) { 428 if (!tx_queue->buffer)
416 rc = -ENOMEM; 429 return -ENOMEM;
417 goto fail1;
418 }
419 for (i = 0; i <= efx->type->txd_ring_mask; ++i) 430 for (i = 0; i <= efx->type->txd_ring_mask; ++i)
420 tx_queue->buffer[i].continuation = 1; 431 tx_queue->buffer[i].continuation = true;
421 432
422 /* Allocate hardware ring */ 433 /* Allocate hardware ring */
423 rc = falcon_probe_tx(tx_queue); 434 rc = falcon_probe_tx(tx_queue);
424 if (rc) 435 if (rc)
425 goto fail2; 436 goto fail;
426 437
427 return 0; 438 return 0;
428 439
429 fail2: 440 fail:
430 kfree(tx_queue->buffer); 441 kfree(tx_queue->buffer);
431 tx_queue->buffer = NULL; 442 tx_queue->buffer = NULL;
432 fail1:
433 tx_queue->used = 0;
434
435 return rc; 443 return rc;
436} 444}
437 445
438int efx_init_tx_queue(struct efx_tx_queue *tx_queue) 446void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
439{ 447{
440 EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue); 448 EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue);
441 449
@@ -446,7 +454,7 @@ int efx_init_tx_queue(struct efx_tx_queue *tx_queue)
446 BUG_ON(tx_queue->stopped); 454 BUG_ON(tx_queue->stopped);
447 455
448 /* Set up TX descriptor ring */ 456 /* Set up TX descriptor ring */
449 return falcon_init_tx(tx_queue); 457 falcon_init_tx(tx_queue);
450} 458}
451 459
452void efx_release_tx_buffers(struct efx_tx_queue *tx_queue) 460void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
@@ -461,7 +469,7 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
461 buffer = &tx_queue->buffer[tx_queue->read_count & 469 buffer = &tx_queue->buffer[tx_queue->read_count &
462 tx_queue->efx->type->txd_ring_mask]; 470 tx_queue->efx->type->txd_ring_mask];
463 efx_dequeue_buffer(tx_queue, buffer); 471 efx_dequeue_buffer(tx_queue, buffer);
464 buffer->continuation = 1; 472 buffer->continuation = true;
465 buffer->len = 0; 473 buffer->len = 0;
466 474
467 ++tx_queue->read_count; 475 ++tx_queue->read_count;
@@ -494,7 +502,6 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
494 502
495 kfree(tx_queue->buffer); 503 kfree(tx_queue->buffer);
496 tx_queue->buffer = NULL; 504 tx_queue->buffer = NULL;
497 tx_queue->used = 0;
498} 505}
499 506
500 507
@@ -509,7 +516,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
509/* Number of bytes inserted at the start of a TSO header buffer, 516/* Number of bytes inserted at the start of a TSO header buffer,
510 * similar to NET_IP_ALIGN. 517 * similar to NET_IP_ALIGN.
511 */ 518 */
512#if defined(__i386__) || defined(__x86_64__) 519#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
513#define TSOH_OFFSET 0 520#define TSOH_OFFSET 0
514#else 521#else
515#define TSOH_OFFSET NET_IP_ALIGN 522#define TSOH_OFFSET NET_IP_ALIGN
@@ -533,47 +540,37 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
533 540
534/** 541/**
535 * struct tso_state - TSO state for an SKB 542 * struct tso_state - TSO state for an SKB
536 * @remaining_len: Bytes of data we've yet to segment 543 * @out_len: Remaining length in current segment
537 * @seqnum: Current sequence number 544 * @seqnum: Current sequence number
545 * @ipv4_id: Current IPv4 ID, host endian
538 * @packet_space: Remaining space in current packet 546 * @packet_space: Remaining space in current packet
539 * @ifc: Input fragment cursor. 547 * @dma_addr: DMA address of current position
540 * Where we are in the current fragment of the incoming SKB. These 548 * @in_len: Remaining length in current SKB fragment
541 * values get updated in place when we split a fragment over 549 * @unmap_len: Length of SKB fragment
542 * multiple packets. 550 * @unmap_addr: DMA address of SKB fragment
543 * @p: Parameters. 551 * @unmap_single: DMA single vs page mapping flag
544 * These values are set once at the start of the TSO send and do 552 * @header_len: Number of bytes of header
545 * not get changed as the routine progresses. 553 * @full_packet_size: Number of bytes to put in each outgoing segment
546 * 554 *
547 * The state used during segmentation. It is put into this data structure 555 * The state used during segmentation. It is put into this data structure
548 * just to make it easy to pass into inline functions. 556 * just to make it easy to pass into inline functions.
549 */ 557 */
550struct tso_state { 558struct tso_state {
551 unsigned remaining_len; 559 /* Output position */
560 unsigned out_len;
552 unsigned seqnum; 561 unsigned seqnum;
562 unsigned ipv4_id;
553 unsigned packet_space; 563 unsigned packet_space;
554 564
555 struct { 565 /* Input position */
556 /* DMA address of current position */ 566 dma_addr_t dma_addr;
557 dma_addr_t dma_addr; 567 unsigned in_len;
558 /* Remaining length */ 568 unsigned unmap_len;
559 unsigned int len; 569 dma_addr_t unmap_addr;
560 /* DMA address and length of the whole fragment */ 570 bool unmap_single;
561 unsigned int unmap_len; 571
562 dma_addr_t unmap_addr; 572 unsigned header_len;
563 struct page *page; 573 int full_packet_size;
564 unsigned page_off;
565 } ifc;
566
567 struct {
568 /* The number of bytes of header */
569 unsigned int header_length;
570
571 /* The number of bytes to put in each outgoing segment. */
572 int full_packet_size;
573
574 /* Current IPv4 ID, host endian. */
575 unsigned ipv4_id;
576 } p;
577}; 574};
578 575
579 576
@@ -581,11 +578,24 @@ struct tso_state {
581 * Verify that our various assumptions about sk_buffs and the conditions 578 * Verify that our various assumptions about sk_buffs and the conditions
582 * under which TSO will be attempted hold true. 579 * under which TSO will be attempted hold true.
583 */ 580 */
584static inline void efx_tso_check_safe(const struct sk_buff *skb) 581static void efx_tso_check_safe(struct sk_buff *skb)
585{ 582{
586 EFX_BUG_ON_PARANOID(skb->protocol != htons(ETH_P_IP)); 583 __be16 protocol = skb->protocol;
584
587 EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto != 585 EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
588 skb->protocol); 586 protocol);
587 if (protocol == htons(ETH_P_8021Q)) {
588 /* Find the encapsulated protocol; reset network header
589 * and transport header based on that. */
590 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
591 protocol = veh->h_vlan_encapsulated_proto;
592 skb_set_network_header(skb, sizeof(*veh));
593 if (protocol == htons(ETH_P_IP))
594 skb_set_transport_header(skb, sizeof(*veh) +
595 4 * ip_hdr(skb)->ihl);
596 }
597
598 EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IP));
589 EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP); 599 EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
590 EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data) 600 EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
591 + (tcp_hdr(skb)->doff << 2u)) > 601 + (tcp_hdr(skb)->doff << 2u)) >
@@ -685,18 +695,14 @@ efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
685 * @tx_queue: Efx TX queue 695 * @tx_queue: Efx TX queue
686 * @dma_addr: DMA address of fragment 696 * @dma_addr: DMA address of fragment
687 * @len: Length of fragment 697 * @len: Length of fragment
688 * @skb: Only non-null for end of last segment 698 * @final_buffer: The final buffer inserted into the queue
689 * @end_of_packet: True if last fragment in a packet
690 * @unmap_addr: DMA address of fragment for unmapping
691 * @unmap_len: Only set this in last segment of a fragment
692 * 699 *
693 * Push descriptors onto the TX queue. Return 0 on success or 1 if 700 * Push descriptors onto the TX queue. Return 0 on success or 1 if
694 * @tx_queue full. 701 * @tx_queue full.
695 */ 702 */
696static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, 703static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
697 dma_addr_t dma_addr, unsigned len, 704 dma_addr_t dma_addr, unsigned len,
698 const struct sk_buff *skb, int end_of_packet, 705 struct efx_tx_buffer **final_buffer)
699 dma_addr_t unmap_addr, unsigned unmap_len)
700{ 706{
701 struct efx_tx_buffer *buffer; 707 struct efx_tx_buffer *buffer;
702 struct efx_nic *efx = tx_queue->efx; 708 struct efx_nic *efx = tx_queue->efx;
@@ -724,8 +730,10 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
724 fill_level = (tx_queue->insert_count 730 fill_level = (tx_queue->insert_count
725 - tx_queue->old_read_count); 731 - tx_queue->old_read_count);
726 q_space = efx->type->txd_ring_mask - 1 - fill_level; 732 q_space = efx->type->txd_ring_mask - 1 - fill_level;
727 if (unlikely(q_space-- <= 0)) 733 if (unlikely(q_space-- <= 0)) {
734 *final_buffer = NULL;
728 return 1; 735 return 1;
736 }
729 smp_mb(); 737 smp_mb();
730 --tx_queue->stopped; 738 --tx_queue->stopped;
731 } 739 }
@@ -742,7 +750,7 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
742 EFX_BUG_ON_PARANOID(buffer->len); 750 EFX_BUG_ON_PARANOID(buffer->len);
743 EFX_BUG_ON_PARANOID(buffer->unmap_len); 751 EFX_BUG_ON_PARANOID(buffer->unmap_len);
744 EFX_BUG_ON_PARANOID(buffer->skb); 752 EFX_BUG_ON_PARANOID(buffer->skb);
745 EFX_BUG_ON_PARANOID(buffer->continuation != 1); 753 EFX_BUG_ON_PARANOID(!buffer->continuation);
746 EFX_BUG_ON_PARANOID(buffer->tsoh); 754 EFX_BUG_ON_PARANOID(buffer->tsoh);
747 755
748 buffer->dma_addr = dma_addr; 756 buffer->dma_addr = dma_addr;
@@ -765,10 +773,7 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
765 773
766 EFX_BUG_ON_PARANOID(!len); 774 EFX_BUG_ON_PARANOID(!len);
767 buffer->len = len; 775 buffer->len = len;
768 buffer->skb = skb; 776 *final_buffer = buffer;
769 buffer->continuation = !end_of_packet;
770 buffer->unmap_addr = unmap_addr;
771 buffer->unmap_len = unmap_len;
772 return 0; 777 return 0;
773} 778}
774 779
@@ -780,8 +785,8 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
780 * a single fragment, and we know it doesn't cross a page boundary. It 785 * a single fragment, and we know it doesn't cross a page boundary. It
781 * also allows us to not worry about end-of-packet etc. 786 * also allows us to not worry about end-of-packet etc.
782 */ 787 */
783static inline void efx_tso_put_header(struct efx_tx_queue *tx_queue, 788static void efx_tso_put_header(struct efx_tx_queue *tx_queue,
784 struct efx_tso_header *tsoh, unsigned len) 789 struct efx_tso_header *tsoh, unsigned len)
785{ 790{
786 struct efx_tx_buffer *buffer; 791 struct efx_tx_buffer *buffer;
787 792
@@ -791,7 +796,7 @@ static inline void efx_tso_put_header(struct efx_tx_queue *tx_queue,
791 EFX_BUG_ON_PARANOID(buffer->len); 796 EFX_BUG_ON_PARANOID(buffer->len);
792 EFX_BUG_ON_PARANOID(buffer->unmap_len); 797 EFX_BUG_ON_PARANOID(buffer->unmap_len);
793 EFX_BUG_ON_PARANOID(buffer->skb); 798 EFX_BUG_ON_PARANOID(buffer->skb);
794 EFX_BUG_ON_PARANOID(buffer->continuation != 1); 799 EFX_BUG_ON_PARANOID(!buffer->continuation);
795 EFX_BUG_ON_PARANOID(buffer->tsoh); 800 EFX_BUG_ON_PARANOID(buffer->tsoh);
796 buffer->len = len; 801 buffer->len = len;
797 buffer->dma_addr = tsoh->dma_addr; 802 buffer->dma_addr = tsoh->dma_addr;
@@ -805,6 +810,7 @@ static inline void efx_tso_put_header(struct efx_tx_queue *tx_queue,
805static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) 810static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
806{ 811{
807 struct efx_tx_buffer *buffer; 812 struct efx_tx_buffer *buffer;
813 dma_addr_t unmap_addr;
808 814
809 /* Work backwards until we hit the original insert pointer value */ 815 /* Work backwards until we hit the original insert pointer value */
810 while (tx_queue->insert_count != tx_queue->write_count) { 816 while (tx_queue->insert_count != tx_queue->write_count) {
@@ -814,11 +820,18 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
814 efx_tsoh_free(tx_queue, buffer); 820 efx_tsoh_free(tx_queue, buffer);
815 EFX_BUG_ON_PARANOID(buffer->skb); 821 EFX_BUG_ON_PARANOID(buffer->skb);
816 buffer->len = 0; 822 buffer->len = 0;
817 buffer->continuation = 1; 823 buffer->continuation = true;
818 if (buffer->unmap_len) { 824 if (buffer->unmap_len) {
819 pci_unmap_page(tx_queue->efx->pci_dev, 825 unmap_addr = (buffer->dma_addr + buffer->len -
820 buffer->unmap_addr, 826 buffer->unmap_len);
821 buffer->unmap_len, PCI_DMA_TODEVICE); 827 if (buffer->unmap_single)
828 pci_unmap_single(tx_queue->efx->pci_dev,
829 unmap_addr, buffer->unmap_len,
830 PCI_DMA_TODEVICE);
831 else
832 pci_unmap_page(tx_queue->efx->pci_dev,
833 unmap_addr, buffer->unmap_len,
834 PCI_DMA_TODEVICE);
822 buffer->unmap_len = 0; 835 buffer->unmap_len = 0;
823 } 836 }
824 } 837 }
@@ -826,50 +839,57 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
826 839
827 840
828/* Parse the SKB header and initialise state. */ 841/* Parse the SKB header and initialise state. */
829static inline void tso_start(struct tso_state *st, const struct sk_buff *skb) 842static void tso_start(struct tso_state *st, const struct sk_buff *skb)
830{ 843{
831 /* All ethernet/IP/TCP headers combined size is TCP header size 844 /* All ethernet/IP/TCP headers combined size is TCP header size
832 * plus offset of TCP header relative to start of packet. 845 * plus offset of TCP header relative to start of packet.
833 */ 846 */
834 st->p.header_length = ((tcp_hdr(skb)->doff << 2u) 847 st->header_len = ((tcp_hdr(skb)->doff << 2u)
835 + PTR_DIFF(tcp_hdr(skb), skb->data)); 848 + PTR_DIFF(tcp_hdr(skb), skb->data));
836 st->p.full_packet_size = (st->p.header_length 849 st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size;
837 + skb_shinfo(skb)->gso_size);
838 850
839 st->p.ipv4_id = ntohs(ip_hdr(skb)->id); 851 st->ipv4_id = ntohs(ip_hdr(skb)->id);
840 st->seqnum = ntohl(tcp_hdr(skb)->seq); 852 st->seqnum = ntohl(tcp_hdr(skb)->seq);
841 853
842 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg); 854 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
843 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn); 855 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
844 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst); 856 EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
845 857
846 st->packet_space = st->p.full_packet_size; 858 st->packet_space = st->full_packet_size;
847 st->remaining_len = skb->len - st->p.header_length; 859 st->out_len = skb->len - st->header_len;
860 st->unmap_len = 0;
861 st->unmap_single = false;
848} 862}
849 863
850 864static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
851/** 865 skb_frag_t *frag)
852 * tso_get_fragment - record fragment details and map for DMA
853 * @st: TSO state
854 * @efx: Efx NIC
855 * @data: Pointer to fragment data
856 * @len: Length of fragment
857 *
858 * Record fragment details and map for DMA. Return 0 on success, or
859 * -%ENOMEM if DMA mapping fails.
860 */
861static inline int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
862 int len, struct page *page, int page_off)
863{ 866{
867 st->unmap_addr = pci_map_page(efx->pci_dev, frag->page,
868 frag->page_offset, frag->size,
869 PCI_DMA_TODEVICE);
870 if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
871 st->unmap_single = false;
872 st->unmap_len = frag->size;
873 st->in_len = frag->size;
874 st->dma_addr = st->unmap_addr;
875 return 0;
876 }
877 return -ENOMEM;
878}
864 879
865 st->ifc.unmap_addr = pci_map_page(efx->pci_dev, page, page_off, 880static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
866 len, PCI_DMA_TODEVICE); 881 const struct sk_buff *skb)
867 if (likely(!pci_dma_mapping_error(efx->pci_dev, st->ifc.unmap_addr))) { 882{
868 st->ifc.unmap_len = len; 883 int hl = st->header_len;
869 st->ifc.len = len; 884 int len = skb_headlen(skb) - hl;
870 st->ifc.dma_addr = st->ifc.unmap_addr; 885
871 st->ifc.page = page; 886 st->unmap_addr = pci_map_single(efx->pci_dev, skb->data + hl,
872 st->ifc.page_off = page_off; 887 len, PCI_DMA_TODEVICE);
888 if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
889 st->unmap_single = true;
890 st->unmap_len = len;
891 st->in_len = len;
892 st->dma_addr = st->unmap_addr;
873 return 0; 893 return 0;
874 } 894 }
875 return -ENOMEM; 895 return -ENOMEM;
@@ -886,36 +906,45 @@ static inline int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
886 * of fragment or end-of-packet. Return 0 on success, 1 if not enough 906 * of fragment or end-of-packet. Return 0 on success, 1 if not enough
887 * space in @tx_queue. 907 * space in @tx_queue.
888 */ 908 */
889static inline int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, 909static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
890 const struct sk_buff *skb, 910 const struct sk_buff *skb,
891 struct tso_state *st) 911 struct tso_state *st)
892{ 912{
893 913 struct efx_tx_buffer *buffer;
894 int n, end_of_packet, rc; 914 int n, end_of_packet, rc;
895 915
896 if (st->ifc.len == 0) 916 if (st->in_len == 0)
897 return 0; 917 return 0;
898 if (st->packet_space == 0) 918 if (st->packet_space == 0)
899 return 0; 919 return 0;
900 920
901 EFX_BUG_ON_PARANOID(st->ifc.len <= 0); 921 EFX_BUG_ON_PARANOID(st->in_len <= 0);
902 EFX_BUG_ON_PARANOID(st->packet_space <= 0); 922 EFX_BUG_ON_PARANOID(st->packet_space <= 0);
903 923
904 n = min(st->ifc.len, st->packet_space); 924 n = min(st->in_len, st->packet_space);
905 925
906 st->packet_space -= n; 926 st->packet_space -= n;
907 st->remaining_len -= n; 927 st->out_len -= n;
908 st->ifc.len -= n; 928 st->in_len -= n;
909 st->ifc.page_off += n; 929
910 end_of_packet = st->remaining_len == 0 || st->packet_space == 0; 930 rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
911 931 if (likely(rc == 0)) {
912 rc = efx_tx_queue_insert(tx_queue, st->ifc.dma_addr, n, 932 if (st->out_len == 0)
913 st->remaining_len ? NULL : skb, 933 /* Transfer ownership of the skb */
914 end_of_packet, st->ifc.unmap_addr, 934 buffer->skb = skb;
915 st->ifc.len ? 0 : st->ifc.unmap_len); 935
916 936 end_of_packet = st->out_len == 0 || st->packet_space == 0;
917 st->ifc.dma_addr += n; 937 buffer->continuation = !end_of_packet;
938
939 if (st->in_len == 0) {
940 /* Transfer ownership of the pci mapping */
941 buffer->unmap_len = st->unmap_len;
942 buffer->unmap_single = st->unmap_single;
943 st->unmap_len = 0;
944 }
945 }
918 946
947 st->dma_addr += n;
919 return rc; 948 return rc;
920} 949}
921 950
@@ -929,9 +958,9 @@ static inline int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
929 * Generate a new header and prepare for the new packet. Return 0 on 958 * Generate a new header and prepare for the new packet. Return 0 on
930 * success, or -1 if failed to alloc header. 959 * success, or -1 if failed to alloc header.
931 */ 960 */
932static inline int tso_start_new_packet(struct efx_tx_queue *tx_queue, 961static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
933 const struct sk_buff *skb, 962 const struct sk_buff *skb,
934 struct tso_state *st) 963 struct tso_state *st)
935{ 964{
936 struct efx_tso_header *tsoh; 965 struct efx_tso_header *tsoh;
937 struct iphdr *tsoh_iph; 966 struct iphdr *tsoh_iph;
@@ -940,7 +969,7 @@ static inline int tso_start_new_packet(struct efx_tx_queue *tx_queue,
940 u8 *header; 969 u8 *header;
941 970
942 /* Allocate a DMA-mapped header buffer. */ 971 /* Allocate a DMA-mapped header buffer. */
943 if (likely(TSOH_SIZE(st->p.header_length) <= TSOH_STD_SIZE)) { 972 if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) {
944 if (tx_queue->tso_headers_free == NULL) { 973 if (tx_queue->tso_headers_free == NULL) {
945 if (efx_tsoh_block_alloc(tx_queue)) 974 if (efx_tsoh_block_alloc(tx_queue))
946 return -1; 975 return -1;
@@ -951,7 +980,7 @@ static inline int tso_start_new_packet(struct efx_tx_queue *tx_queue,
951 tsoh->unmap_len = 0; 980 tsoh->unmap_len = 0;
952 } else { 981 } else {
953 tx_queue->tso_long_headers++; 982 tx_queue->tso_long_headers++;
954 tsoh = efx_tsoh_heap_alloc(tx_queue, st->p.header_length); 983 tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
955 if (unlikely(!tsoh)) 984 if (unlikely(!tsoh))
956 return -1; 985 return -1;
957 } 986 }
@@ -961,33 +990,32 @@ static inline int tso_start_new_packet(struct efx_tx_queue *tx_queue,
961 tsoh_iph = (struct iphdr *)(header + SKB_IPV4_OFF(skb)); 990 tsoh_iph = (struct iphdr *)(header + SKB_IPV4_OFF(skb));
962 991
963 /* Copy and update the headers. */ 992 /* Copy and update the headers. */
964 memcpy(header, skb->data, st->p.header_length); 993 memcpy(header, skb->data, st->header_len);
965 994
966 tsoh_th->seq = htonl(st->seqnum); 995 tsoh_th->seq = htonl(st->seqnum);
967 st->seqnum += skb_shinfo(skb)->gso_size; 996 st->seqnum += skb_shinfo(skb)->gso_size;
968 if (st->remaining_len > skb_shinfo(skb)->gso_size) { 997 if (st->out_len > skb_shinfo(skb)->gso_size) {
969 /* This packet will not finish the TSO burst. */ 998 /* This packet will not finish the TSO burst. */
970 ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb); 999 ip_length = st->full_packet_size - ETH_HDR_LEN(skb);
971 tsoh_th->fin = 0; 1000 tsoh_th->fin = 0;
972 tsoh_th->psh = 0; 1001 tsoh_th->psh = 0;
973 } else { 1002 } else {
974 /* This packet will be the last in the TSO burst. */ 1003 /* This packet will be the last in the TSO burst. */
975 ip_length = (st->p.header_length - ETH_HDR_LEN(skb) 1004 ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len;
976 + st->remaining_len);
977 tsoh_th->fin = tcp_hdr(skb)->fin; 1005 tsoh_th->fin = tcp_hdr(skb)->fin;
978 tsoh_th->psh = tcp_hdr(skb)->psh; 1006 tsoh_th->psh = tcp_hdr(skb)->psh;
979 } 1007 }
980 tsoh_iph->tot_len = htons(ip_length); 1008 tsoh_iph->tot_len = htons(ip_length);
981 1009
982 /* Linux leaves suitable gaps in the IP ID space for us to fill. */ 1010 /* Linux leaves suitable gaps in the IP ID space for us to fill. */
983 tsoh_iph->id = htons(st->p.ipv4_id); 1011 tsoh_iph->id = htons(st->ipv4_id);
984 st->p.ipv4_id++; 1012 st->ipv4_id++;
985 1013
986 st->packet_space = skb_shinfo(skb)->gso_size; 1014 st->packet_space = skb_shinfo(skb)->gso_size;
987 ++tx_queue->tso_packets; 1015 ++tx_queue->tso_packets;
988 1016
989 /* Form a descriptor for this header. */ 1017 /* Form a descriptor for this header. */
990 efx_tso_put_header(tx_queue, tsoh, st->p.header_length); 1018 efx_tso_put_header(tx_queue, tsoh, st->header_len);
991 1019
992 return 0; 1020 return 0;
993} 1021}
@@ -1005,11 +1033,11 @@ static inline int tso_start_new_packet(struct efx_tx_queue *tx_queue,
1005 * %NETDEV_TX_OK or %NETDEV_TX_BUSY. 1033 * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
1006 */ 1034 */
1007static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, 1035static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1008 const struct sk_buff *skb) 1036 struct sk_buff *skb)
1009{ 1037{
1038 struct efx_nic *efx = tx_queue->efx;
1010 int frag_i, rc, rc2 = NETDEV_TX_OK; 1039 int frag_i, rc, rc2 = NETDEV_TX_OK;
1011 struct tso_state state; 1040 struct tso_state state;
1012 skb_frag_t *f;
1013 1041
1014 /* Verify TSO is safe - these checks should never fail. */ 1042 /* Verify TSO is safe - these checks should never fail. */
1015 efx_tso_check_safe(skb); 1043 efx_tso_check_safe(skb);
@@ -1021,29 +1049,16 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1021 /* Assume that skb header area contains exactly the headers, and 1049 /* Assume that skb header area contains exactly the headers, and
1022 * all payload is in the frag list. 1050 * all payload is in the frag list.
1023 */ 1051 */
1024 if (skb_headlen(skb) == state.p.header_length) { 1052 if (skb_headlen(skb) == state.header_len) {
1025 /* Grab the first payload fragment. */ 1053 /* Grab the first payload fragment. */
1026 EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1); 1054 EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
1027 frag_i = 0; 1055 frag_i = 0;
1028 f = &skb_shinfo(skb)->frags[frag_i]; 1056 rc = tso_get_fragment(&state, efx,
1029 rc = tso_get_fragment(&state, tx_queue->efx, 1057 skb_shinfo(skb)->frags + frag_i);
1030 f->size, f->page, f->page_offset);
1031 if (rc) 1058 if (rc)
1032 goto mem_err; 1059 goto mem_err;
1033 } else { 1060 } else {
1034 /* It may look like this code fragment assumes that the 1061 rc = tso_get_head_fragment(&state, efx, skb);
1035 * skb->data portion does not cross a page boundary, but
1036 * that is not the case. It is guaranteed to be direct
1037 * mapped memory, and therefore is physically contiguous,
1038 * and so DMA will work fine. kmap_atomic() on this region
1039 * will just return the direct mapping, so that will work
1040 * too.
1041 */
1042 int page_off = (unsigned long)skb->data & (PAGE_SIZE - 1);
1043 int hl = state.p.header_length;
1044 rc = tso_get_fragment(&state, tx_queue->efx,
1045 skb_headlen(skb) - hl,
1046 virt_to_page(skb->data), page_off + hl);
1047 if (rc) 1062 if (rc)
1048 goto mem_err; 1063 goto mem_err;
1049 frag_i = -1; 1064 frag_i = -1;
@@ -1058,13 +1073,12 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1058 goto stop; 1073 goto stop;
1059 1074
1060 /* Move onto the next fragment? */ 1075 /* Move onto the next fragment? */
1061 if (state.ifc.len == 0) { 1076 if (state.in_len == 0) {
1062 if (++frag_i >= skb_shinfo(skb)->nr_frags) 1077 if (++frag_i >= skb_shinfo(skb)->nr_frags)
1063 /* End of payload reached. */ 1078 /* End of payload reached. */
1064 break; 1079 break;
1065 f = &skb_shinfo(skb)->frags[frag_i]; 1080 rc = tso_get_fragment(&state, efx,
1066 rc = tso_get_fragment(&state, tx_queue->efx, 1081 skb_shinfo(skb)->frags + frag_i);
1067 f->size, f->page, f->page_offset);
1068 if (rc) 1082 if (rc)
1069 goto mem_err; 1083 goto mem_err;
1070 } 1084 }
@@ -1082,8 +1096,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1082 return NETDEV_TX_OK; 1096 return NETDEV_TX_OK;
1083 1097
1084 mem_err: 1098 mem_err:
1085 EFX_ERR(tx_queue->efx, "Out of memory for TSO headers, or PCI mapping" 1099 EFX_ERR(efx, "Out of memory for TSO headers, or PCI mapping error\n");
1086 " error\n");
1087 dev_kfree_skb_any((struct sk_buff *)skb); 1100 dev_kfree_skb_any((struct sk_buff *)skb);
1088 goto unwind; 1101 goto unwind;
1089 1102
@@ -1092,9 +1105,19 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1092 1105
1093 /* Stop the queue if it wasn't stopped before. */ 1106 /* Stop the queue if it wasn't stopped before. */
1094 if (tx_queue->stopped == 1) 1107 if (tx_queue->stopped == 1)
1095 efx_stop_queue(tx_queue->efx); 1108 efx_stop_queue(efx);
1096 1109
1097 unwind: 1110 unwind:
1111 /* Free the DMA mapping we were in the process of writing out */
1112 if (state.unmap_len) {
1113 if (state.unmap_single)
1114 pci_unmap_single(efx->pci_dev, state.unmap_addr,
1115 state.unmap_len, PCI_DMA_TODEVICE);
1116 else
1117 pci_unmap_page(efx->pci_dev, state.unmap_addr,
1118 state.unmap_len, PCI_DMA_TODEVICE);
1119 }
1120
1098 efx_enqueue_unwind(tx_queue); 1121 efx_enqueue_unwind(tx_queue);
1099 return rc2; 1122 return rc2;
1100} 1123}