diff options
author | Nimrod Andy <B38611@freescale.com> | 2014-06-11 20:16:23 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-06-12 14:01:57 -0400 |
commit | 79f339125ea316e910220e5f5b4ad30370f4de85 (patch) | |
tree | 51de696840abc67a2f5d5803f63212b2fd593481 | |
parent | 6e909283cb344e32aa8adb4a4c169512d8e5fd27 (diff) |
net: fec: Add software TSO support
Add software TSO support for FEC.
This feature allows to improve outbound throughput performance.
Tested on imx6dl sabresd board, running iperf tcp tests shows:
- 16.2% improvement comparing with FEC SG patch
- 82% improvement comparing with NO SG & TSO patch
$ ethtool -K eth0 tso on
$ iperf -c 10.192.242.167 -t 3 &
[ 3] local 10.192.242.108 port 35388 connected with 10.192.242.167 port 5001
[ ID] Interval Transfer Bandwidth
[ 3] 0.0- 3.0 sec 181 MBytes 506 Mbits/sec
During the testing, CPU loading is 30%.
Since imx6dl FEC Bandwidth is limited to SOC system bus bandwidth, the
performance with SW TSO is a milestone.
CC: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Laight <David.Laight@ACULAB.COM>
CC: Li Frank <B20596@freescale.com>
Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/freescale/fec.h | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/freescale/fec_main.c | 255 |
2 files changed, 238 insertions, 23 deletions
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index e7ce14d8d3c3..671d080105a7 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h | |||
@@ -299,6 +299,12 @@ struct fec_enet_private { | |||
299 | unsigned short bufdesc_size; | 299 | unsigned short bufdesc_size; |
300 | unsigned short tx_ring_size; | 300 | unsigned short tx_ring_size; |
301 | unsigned short rx_ring_size; | 301 | unsigned short rx_ring_size; |
302 | unsigned short tx_stop_threshold; | ||
303 | unsigned short tx_wake_threshold; | ||
304 | |||
305 | /* Software TSO */ | ||
306 | char *tso_hdrs; | ||
307 | dma_addr_t tso_hdrs_dma; | ||
302 | 308 | ||
303 | struct platform_device *pdev; | 309 | struct platform_device *pdev; |
304 | 310 | ||
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index bea00a8d6c99..38d9d276ab8b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/in.h> | 36 | #include <linux/in.h> |
37 | #include <linux/ip.h> | 37 | #include <linux/ip.h> |
38 | #include <net/ip.h> | 38 | #include <net/ip.h> |
39 | #include <net/tso.h> | ||
39 | #include <linux/tcp.h> | 40 | #include <linux/tcp.h> |
40 | #include <linux/udp.h> | 41 | #include <linux/udp.h> |
41 | #include <linux/icmp.h> | 42 | #include <linux/icmp.h> |
@@ -228,6 +229,15 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); | |||
228 | #define FEC_PAUSE_FLAG_AUTONEG 0x1 | 229 | #define FEC_PAUSE_FLAG_AUTONEG 0x1 |
229 | #define FEC_PAUSE_FLAG_ENABLE 0x2 | 230 | #define FEC_PAUSE_FLAG_ENABLE 0x2 |
230 | 231 | ||
232 | #define TSO_HEADER_SIZE 128 | ||
233 | /* Max number of allowed TCP segments for software TSO */ | ||
234 | #define FEC_MAX_TSO_SEGS 100 | ||
235 | #define FEC_MAX_SKB_DESCS (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) | ||
236 | |||
237 | #define IS_TSO_HEADER(txq, addr) \ | ||
238 | ((addr >= txq->tso_hdrs_dma) && \ | ||
239 | (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE)) | ||
240 | |||
231 | static int mii_cnt; | 241 | static int mii_cnt; |
232 | 242 | ||
233 | static inline | 243 | static inline |
@@ -438,8 +448,17 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) | |||
438 | unsigned short buflen; | 448 | unsigned short buflen; |
439 | unsigned int estatus = 0; | 449 | unsigned int estatus = 0; |
440 | unsigned int index; | 450 | unsigned int index; |
451 | int entries_free; | ||
441 | int ret; | 452 | int ret; |
442 | 453 | ||
454 | entries_free = fec_enet_get_free_txdesc_num(fep); | ||
455 | if (entries_free < MAX_SKB_FRAGS + 1) { | ||
456 | dev_kfree_skb_any(skb); | ||
457 | if (net_ratelimit()) | ||
458 | netdev_err(ndev, "NOT enough BD for SG!\n"); | ||
459 | return NETDEV_TX_OK; | ||
460 | } | ||
461 | |||
443 | /* Protocol checksum off-load for TCP and UDP. */ | 462 | /* Protocol checksum off-load for TCP and UDP. */ |
444 | if (fec_enet_clear_csum(skb, ndev)) { | 463 | if (fec_enet_clear_csum(skb, ndev)) { |
445 | dev_kfree_skb_any(skb); | 464 | dev_kfree_skb_any(skb); |
@@ -534,35 +553,210 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) | |||
534 | return 0; | 553 | return 0; |
535 | } | 554 | } |
536 | 555 | ||
537 | static netdev_tx_t | 556 | static int |
538 | fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) | 557 | fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev, |
558 | struct bufdesc *bdp, int index, char *data, | ||
559 | int size, bool last_tcp, bool is_last) | ||
539 | { | 560 | { |
540 | struct fec_enet_private *fep = netdev_priv(ndev); | 561 | struct fec_enet_private *fep = netdev_priv(ndev); |
541 | struct bufdesc *bdp; | 562 | const struct platform_device_id *id_entry = |
542 | unsigned short status; | 563 | platform_get_device_id(fep->pdev); |
543 | int entries_free; | 564 | struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; |
544 | int ret; | 565 | unsigned short status; |
545 | 566 | unsigned int estatus = 0; | |
546 | /* Fill in a Tx ring entry */ | ||
547 | bdp = fep->cur_tx; | ||
548 | 567 | ||
549 | status = bdp->cbd_sc; | 568 | status = bdp->cbd_sc; |
569 | status &= ~BD_ENET_TX_STATS; | ||
550 | 570 | ||
551 | if (status & BD_ENET_TX_READY) { | 571 | status |= (BD_ENET_TX_TC | BD_ENET_TX_READY); |
552 | /* Ooops. All transmit buffers are full. Bail out. | 572 | bdp->cbd_datlen = size; |
553 | * This should not happen, since ndev->tbusy should be set. | 573 | |
554 | */ | 574 | if (((unsigned long) data) & FEC_ALIGNMENT || |
575 | id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) { | ||
576 | memcpy(fep->tx_bounce[index], data, size); | ||
577 | data = fep->tx_bounce[index]; | ||
578 | |||
579 | if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) | ||
580 | swap_buffer(data, size); | ||
581 | } | ||
582 | |||
583 | bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data, | ||
584 | size, DMA_TO_DEVICE); | ||
585 | if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { | ||
586 | dev_kfree_skb_any(skb); | ||
555 | if (net_ratelimit()) | 587 | if (net_ratelimit()) |
556 | netdev_err(ndev, "tx queue full!\n"); | 588 | netdev_err(ndev, "Tx DMA memory map failed\n"); |
557 | return NETDEV_TX_BUSY; | 589 | return NETDEV_TX_BUSY; |
558 | } | 590 | } |
559 | 591 | ||
560 | ret = fec_enet_txq_submit_skb(skb, ndev); | 592 | if (fep->bufdesc_ex) { |
593 | if (skb->ip_summed == CHECKSUM_PARTIAL) | ||
594 | estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; | ||
595 | ebdp->cbd_bdu = 0; | ||
596 | ebdp->cbd_esc = estatus; | ||
597 | } | ||
598 | |||
599 | /* Handle the last BD specially */ | ||
600 | if (last_tcp) | ||
601 | status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC); | ||
602 | if (is_last) { | ||
603 | status |= BD_ENET_TX_INTR; | ||
604 | if (fep->bufdesc_ex) | ||
605 | ebdp->cbd_esc |= BD_ENET_TX_INT; | ||
606 | } | ||
607 | |||
608 | bdp->cbd_sc = status; | ||
609 | |||
610 | return 0; | ||
611 | } | ||
612 | |||
613 | static int | ||
614 | fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev, | ||
615 | struct bufdesc *bdp, int index) | ||
616 | { | ||
617 | struct fec_enet_private *fep = netdev_priv(ndev); | ||
618 | const struct platform_device_id *id_entry = | ||
619 | platform_get_device_id(fep->pdev); | ||
620 | int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); | ||
621 | struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; | ||
622 | void *bufaddr; | ||
623 | unsigned long dmabuf; | ||
624 | unsigned short status; | ||
625 | unsigned int estatus = 0; | ||
626 | |||
627 | status = bdp->cbd_sc; | ||
628 | status &= ~BD_ENET_TX_STATS; | ||
629 | status |= (BD_ENET_TX_TC | BD_ENET_TX_READY); | ||
630 | |||
631 | bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE; | ||
632 | dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE; | ||
633 | if (((unsigned long) bufaddr) & FEC_ALIGNMENT || | ||
634 | id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) { | ||
635 | memcpy(fep->tx_bounce[index], skb->data, hdr_len); | ||
636 | bufaddr = fep->tx_bounce[index]; | ||
637 | |||
638 | if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) | ||
639 | swap_buffer(bufaddr, hdr_len); | ||
640 | |||
641 | dmabuf = dma_map_single(&fep->pdev->dev, bufaddr, | ||
642 | hdr_len, DMA_TO_DEVICE); | ||
643 | if (dma_mapping_error(&fep->pdev->dev, dmabuf)) { | ||
644 | dev_kfree_skb_any(skb); | ||
645 | if (net_ratelimit()) | ||
646 | netdev_err(ndev, "Tx DMA memory map failed\n"); | ||
647 | return NETDEV_TX_BUSY; | ||
648 | } | ||
649 | } | ||
650 | |||
651 | bdp->cbd_bufaddr = dmabuf; | ||
652 | bdp->cbd_datlen = hdr_len; | ||
653 | |||
654 | if (fep->bufdesc_ex) { | ||
655 | if (skb->ip_summed == CHECKSUM_PARTIAL) | ||
656 | estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; | ||
657 | ebdp->cbd_bdu = 0; | ||
658 | ebdp->cbd_esc = estatus; | ||
659 | } | ||
660 | |||
661 | bdp->cbd_sc = status; | ||
662 | |||
663 | return 0; | ||
664 | } | ||
665 | |||
666 | static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev) | ||
667 | { | ||
668 | struct fec_enet_private *fep = netdev_priv(ndev); | ||
669 | int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); | ||
670 | int total_len, data_left; | ||
671 | struct bufdesc *bdp = fep->cur_tx; | ||
672 | struct tso_t tso; | ||
673 | unsigned int index = 0; | ||
674 | int ret; | ||
675 | |||
676 | if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) { | ||
677 | dev_kfree_skb_any(skb); | ||
678 | if (net_ratelimit()) | ||
679 | netdev_err(ndev, "NOT enough BD for TSO!\n"); | ||
680 | return NETDEV_TX_OK; | ||
681 | } | ||
682 | |||
683 | /* Protocol checksum off-load for TCP and UDP. */ | ||
684 | if (fec_enet_clear_csum(skb, ndev)) { | ||
685 | dev_kfree_skb_any(skb); | ||
686 | return NETDEV_TX_OK; | ||
687 | } | ||
688 | |||
689 | /* Initialize the TSO handler, and prepare the first payload */ | ||
690 | tso_start(skb, &tso); | ||
691 | |||
692 | total_len = skb->len - hdr_len; | ||
693 | while (total_len > 0) { | ||
694 | char *hdr; | ||
695 | |||
696 | index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); | ||
697 | data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); | ||
698 | total_len -= data_left; | ||
699 | |||
700 | /* prepare packet headers: MAC + IP + TCP */ | ||
701 | hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE; | ||
702 | tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0); | ||
703 | ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index); | ||
704 | if (ret) | ||
705 | goto err_release; | ||
706 | |||
707 | while (data_left > 0) { | ||
708 | int size; | ||
709 | |||
710 | size = min_t(int, tso.size, data_left); | ||
711 | bdp = fec_enet_get_nextdesc(bdp, fep); | ||
712 | index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); | ||
713 | ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data, | ||
714 | size, size == data_left, | ||
715 | total_len == 0); | ||
716 | if (ret) | ||
717 | goto err_release; | ||
718 | |||
719 | data_left -= size; | ||
720 | tso_build_data(skb, &tso, size); | ||
721 | } | ||
722 | |||
723 | bdp = fec_enet_get_nextdesc(bdp, fep); | ||
724 | } | ||
725 | |||
726 | /* Save skb pointer */ | ||
727 | fep->tx_skbuff[index] = skb; | ||
728 | |||
729 | fec_enet_submit_work(bdp, fep); | ||
730 | |||
731 | skb_tx_timestamp(skb); | ||
732 | fep->cur_tx = bdp; | ||
733 | |||
734 | /* Trigger transmission start */ | ||
735 | writel(0, fep->hwp + FEC_X_DES_ACTIVE); | ||
736 | |||
737 | return 0; | ||
738 | |||
739 | err_release: | ||
740 | /* TODO: Release all used data descriptors for TSO */ | ||
741 | return ret; | ||
742 | } | ||
743 | |||
744 | static netdev_tx_t | ||
745 | fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) | ||
746 | { | ||
747 | struct fec_enet_private *fep = netdev_priv(ndev); | ||
748 | int entries_free; | ||
749 | int ret; | ||
750 | |||
751 | if (skb_is_gso(skb)) | ||
752 | ret = fec_enet_txq_submit_tso(skb, ndev); | ||
753 | else | ||
754 | ret = fec_enet_txq_submit_skb(skb, ndev); | ||
561 | if (ret) | 755 | if (ret) |
562 | return ret; | 756 | return ret; |
563 | 757 | ||
564 | entries_free = fec_enet_get_free_txdesc_num(fep); | 758 | entries_free = fec_enet_get_free_txdesc_num(fep); |
565 | if (entries_free < MAX_SKB_FRAGS + 1) | 759 | if (entries_free <= fep->tx_stop_threshold) |
566 | netif_stop_queue(ndev); | 760 | netif_stop_queue(ndev); |
567 | 761 | ||
568 | return NETDEV_TX_OK; | 762 | return NETDEV_TX_OK; |
@@ -883,7 +1077,7 @@ fec_enet_tx(struct net_device *ndev) | |||
883 | unsigned short status; | 1077 | unsigned short status; |
884 | struct sk_buff *skb; | 1078 | struct sk_buff *skb; |
885 | int index = 0; | 1079 | int index = 0; |
886 | int entries; | 1080 | int entries_free; |
887 | 1081 | ||
888 | fep = netdev_priv(ndev); | 1082 | fep = netdev_priv(ndev); |
889 | bdp = fep->dirty_tx; | 1083 | bdp = fep->dirty_tx; |
@@ -900,8 +1094,9 @@ fec_enet_tx(struct net_device *ndev) | |||
900 | index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); | 1094 | index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); |
901 | 1095 | ||
902 | skb = fep->tx_skbuff[index]; | 1096 | skb = fep->tx_skbuff[index]; |
903 | dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen, | 1097 | if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr)) |
904 | DMA_TO_DEVICE); | 1098 | dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, |
1099 | bdp->cbd_datlen, DMA_TO_DEVICE); | ||
905 | bdp->cbd_bufaddr = 0; | 1100 | bdp->cbd_bufaddr = 0; |
906 | if (!skb) { | 1101 | if (!skb) { |
907 | bdp = fec_enet_get_nextdesc(bdp, fep); | 1102 | bdp = fec_enet_get_nextdesc(bdp, fep); |
@@ -962,9 +1157,11 @@ fec_enet_tx(struct net_device *ndev) | |||
962 | 1157 | ||
963 | /* Since we have freed up a buffer, the ring is no longer full | 1158 | /* Since we have freed up a buffer, the ring is no longer full |
964 | */ | 1159 | */ |
965 | entries = fec_enet_get_free_txdesc_num(fep); | 1160 | if (netif_queue_stopped(ndev)) { |
966 | if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev)) | 1161 | entries_free = fec_enet_get_free_txdesc_num(fep); |
967 | netif_wake_queue(ndev); | 1162 | if (entries_free >= fep->tx_wake_threshold) |
1163 | netif_wake_queue(ndev); | ||
1164 | } | ||
968 | } | 1165 | } |
969 | return; | 1166 | return; |
970 | } | 1167 | } |
@@ -2166,6 +2363,9 @@ static int fec_enet_init(struct net_device *ndev) | |||
2166 | fep->tx_ring_size = TX_RING_SIZE; | 2363 | fep->tx_ring_size = TX_RING_SIZE; |
2167 | fep->rx_ring_size = RX_RING_SIZE; | 2364 | fep->rx_ring_size = RX_RING_SIZE; |
2168 | 2365 | ||
2366 | fep->tx_stop_threshold = FEC_MAX_SKB_DESCS; | ||
2367 | fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2; | ||
2368 | |||
2169 | if (fep->bufdesc_ex) | 2369 | if (fep->bufdesc_ex) |
2170 | fep->bufdesc_size = sizeof(struct bufdesc_ex); | 2370 | fep->bufdesc_size = sizeof(struct bufdesc_ex); |
2171 | else | 2371 | else |
@@ -2179,6 +2379,13 @@ static int fec_enet_init(struct net_device *ndev) | |||
2179 | if (!cbd_base) | 2379 | if (!cbd_base) |
2180 | return -ENOMEM; | 2380 | return -ENOMEM; |
2181 | 2381 | ||
2382 | fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE, | ||
2383 | &fep->tso_hdrs_dma, GFP_KERNEL); | ||
2384 | if (!fep->tso_hdrs) { | ||
2385 | dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma); | ||
2386 | return -ENOMEM; | ||
2387 | } | ||
2388 | |||
2182 | memset(cbd_base, 0, PAGE_SIZE); | 2389 | memset(cbd_base, 0, PAGE_SIZE); |
2183 | 2390 | ||
2184 | fep->netdev = ndev; | 2391 | fep->netdev = ndev; |
@@ -2209,9 +2416,11 @@ static int fec_enet_init(struct net_device *ndev) | |||
2209 | ndev->features |= NETIF_F_HW_VLAN_CTAG_RX; | 2416 | ndev->features |= NETIF_F_HW_VLAN_CTAG_RX; |
2210 | 2417 | ||
2211 | if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) { | 2418 | if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) { |
2419 | ndev->gso_max_segs = FEC_MAX_TSO_SEGS; | ||
2420 | |||
2212 | /* enable hw accelerator */ | 2421 | /* enable hw accelerator */ |
2213 | ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 2422 | ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2214 | | NETIF_F_RXCSUM | NETIF_F_SG); | 2423 | | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO); |
2215 | fep->csum_flags |= FLAG_RX_CSUM_ENABLED; | 2424 | fep->csum_flags |= FLAG_RX_CSUM_ENABLED; |
2216 | } | 2425 | } |
2217 | 2426 | ||