aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/marvell/mvneta.c
diff options
context:
space:
mode:
authorEzequiel Garcia <ezequiel.garcia@free-electrons.com>2014-05-19 12:59:55 -0400
committerDavid S. Miller <davem@davemloft.net>2014-05-22 14:57:16 -0400
commit2adb719d74f6e174071e5c913290b9bbd8c2c0e8 (patch)
tree2a2e08ef568a16634a85b035a6908fb396e8d06e /drivers/net/ethernet/marvell/mvneta.c
parente19d2dda90c5305a201282317aa492ce0337aa62 (diff)
net: mvneta: Implement software TSO
Now that the TSO helper API has been introduced, this commit makes use of it to implement the TSO in this driver. Using iperf to test and vmstat to check the CPU usage, shows a substantial CPU usage drop when TSO is on (~15% vs. ~25%). HTTP-based tests performed by Willy Tarreau have shown performance improvements. Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/marvell/mvneta.c')
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c153
1 files changed, 152 insertions, 1 deletions
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 85090291e210..18c698d9ef9b 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -23,6 +23,7 @@
23#include <net/ip.h> 23#include <net/ip.h>
24#include <net/ipv6.h> 24#include <net/ipv6.h>
25#include <linux/io.h> 25#include <linux/io.h>
26#include <net/tso.h>
26#include <linux/of.h> 27#include <linux/of.h>
27#include <linux/of_irq.h> 28#include <linux/of_irq.h>
28#include <linux/of_mdio.h> 29#include <linux/of_mdio.h>
@@ -244,6 +245,9 @@
244 245
245#define MVNETA_TX_MTU_MAX 0x3ffff 246#define MVNETA_TX_MTU_MAX 0x3ffff
246 247
248/* TSO header size */
249#define TSO_HEADER_SIZE 128
250
247/* Max number of Rx descriptors */ 251/* Max number of Rx descriptors */
248#define MVNETA_MAX_RXD 128 252#define MVNETA_MAX_RXD 128
249 253
@@ -413,6 +417,12 @@ struct mvneta_tx_queue {
413 417
414 /* Index of the next TX DMA descriptor to process */ 418 /* Index of the next TX DMA descriptor to process */
415 int next_desc_to_proc; 419 int next_desc_to_proc;
420
421 /* DMA buffers for TSO headers */
422 char *tso_hdrs;
423
424 /* DMA address of TSO headers */
425 dma_addr_t tso_hdrs_phys;
416}; 426};
417 427
418struct mvneta_rx_queue { 428struct mvneta_rx_queue {
@@ -1519,6 +1529,126 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
1519 return rx_done; 1529 return rx_done;
1520} 1530}
1521 1531
1532static inline void
1533mvneta_tso_put_hdr(struct sk_buff *skb,
1534 struct mvneta_port *pp, struct mvneta_tx_queue *txq)
1535{
1536 struct mvneta_tx_desc *tx_desc;
1537 int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1538
1539 txq->tx_skb[txq->txq_put_index] = NULL;
1540 tx_desc = mvneta_txq_next_desc_get(txq);
1541 tx_desc->data_size = hdr_len;
1542 tx_desc->command = mvneta_skb_tx_csum(pp, skb);
1543 tx_desc->command |= MVNETA_TXD_F_DESC;
1544 tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
1545 txq->txq_put_index * TSO_HEADER_SIZE;
1546 mvneta_txq_inc_put(txq);
1547}
1548
1549static inline int
1550mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
1551 struct sk_buff *skb, char *data, int size,
1552 bool last_tcp, bool is_last)
1553{
1554 struct mvneta_tx_desc *tx_desc;
1555
1556 tx_desc = mvneta_txq_next_desc_get(txq);
1557 tx_desc->data_size = size;
1558 tx_desc->buf_phys_addr = dma_map_single(dev->dev.parent, data,
1559 size, DMA_TO_DEVICE);
1560 if (unlikely(dma_mapping_error(dev->dev.parent,
1561 tx_desc->buf_phys_addr))) {
1562 mvneta_txq_desc_put(txq);
1563 return -ENOMEM;
1564 }
1565
1566 tx_desc->command = 0;
1567 txq->tx_skb[txq->txq_put_index] = NULL;
1568
1569 if (last_tcp) {
1570 /* last descriptor in the TCP packet */
1571 tx_desc->command = MVNETA_TXD_L_DESC;
1572
1573 /* last descriptor in SKB */
1574 if (is_last)
1575 txq->tx_skb[txq->txq_put_index] = skb;
1576 }
1577 mvneta_txq_inc_put(txq);
1578 return 0;
1579}
1580
1581static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
1582 struct mvneta_tx_queue *txq)
1583{
1584 int total_len, data_left;
1585 int desc_count = 0;
1586 struct mvneta_port *pp = netdev_priv(dev);
1587 struct tso_t tso;
1588 int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1589 int i;
1590
1591 /* Count needed descriptors */
1592 if ((txq->count + tso_count_descs(skb)) >= txq->size)
1593 return 0;
1594
1595 if (skb_headlen(skb) < (skb_transport_offset(skb) + tcp_hdrlen(skb))) {
1596 pr_info("*** Is this even possible???!?!?\n");
1597 return 0;
1598 }
1599
1600 /* Initialize the TSO handler, and prepare the first payload */
1601 tso_start(skb, &tso);
1602
1603 total_len = skb->len - hdr_len;
1604 while (total_len > 0) {
1605 char *hdr;
1606
1607 data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
1608 total_len -= data_left;
1609 desc_count++;
1610
1611 /* prepare packet headers: MAC + IP + TCP */
1612 hdr = txq->tso_hdrs + txq->txq_put_index * TSO_HEADER_SIZE;
1613 tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
1614
1615 mvneta_tso_put_hdr(skb, pp, txq);
1616
1617 while (data_left > 0) {
1618 int size;
1619 desc_count++;
1620
1621 size = min_t(int, tso.size, data_left);
1622
1623 if (mvneta_tso_put_data(dev, txq, skb,
1624 tso.data, size,
1625 size == data_left,
1626 total_len == 0))
1627 goto err_release;
1628 data_left -= size;
1629
1630 tso_build_data(skb, &tso, size);
1631 }
1632 }
1633
1634 return desc_count;
1635
1636err_release:
1637 /* Release all used data descriptors; header descriptors must not
1638 * be DMA-unmapped.
1639 */
1640 for (i = desc_count - 1; i >= 0; i--) {
1641 struct mvneta_tx_desc *tx_desc = txq->descs + i;
1642 if (!(tx_desc->command & MVNETA_TXD_F_DESC))
1643 dma_unmap_single(pp->dev->dev.parent,
1644 tx_desc->buf_phys_addr,
1645 tx_desc->data_size,
1646 DMA_TO_DEVICE);
1647 mvneta_txq_desc_put(txq);
1648 }
1649 return 0;
1650}
1651
1522/* Handle tx fragmentation processing */ 1652/* Handle tx fragmentation processing */
1523static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb, 1653static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
1524 struct mvneta_tx_queue *txq) 1654 struct mvneta_tx_queue *txq)
@@ -1590,6 +1720,11 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
1590 if (!netif_running(dev)) 1720 if (!netif_running(dev))
1591 goto out; 1721 goto out;
1592 1722
1723 if (skb_is_gso(skb)) {
1724 frags = mvneta_tx_tso(skb, dev, txq);
1725 goto out;
1726 }
1727
1593 frags = skb_shinfo(skb)->nr_frags + 1; 1728 frags = skb_shinfo(skb)->nr_frags + 1;
1594 1729
1595 /* Get a descriptor for the first part of the packet */ 1730 /* Get a descriptor for the first part of the packet */
@@ -2108,6 +2243,18 @@ static int mvneta_txq_init(struct mvneta_port *pp,
2108 txq->descs, txq->descs_phys); 2243 txq->descs, txq->descs_phys);
2109 return -ENOMEM; 2244 return -ENOMEM;
2110 } 2245 }
2246
2247 /* Allocate DMA buffers for TSO MAC/IP/TCP headers */
2248 txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent,
2249 txq->size * TSO_HEADER_SIZE,
2250 &txq->tso_hdrs_phys, GFP_KERNEL);
2251 if (txq->tso_hdrs == NULL) {
2252 kfree(txq->tx_skb);
2253 dma_free_coherent(pp->dev->dev.parent,
2254 txq->size * MVNETA_DESC_ALIGNED_SIZE,
2255 txq->descs, txq->descs_phys);
2256 return -ENOMEM;
2257 }
2111 mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal); 2258 mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
2112 2259
2113 return 0; 2260 return 0;
@@ -2119,6 +2266,10 @@ static void mvneta_txq_deinit(struct mvneta_port *pp,
2119{ 2266{
2120 kfree(txq->tx_skb); 2267 kfree(txq->tx_skb);
2121 2268
2269 if (txq->tso_hdrs)
2270 dma_free_coherent(pp->dev->dev.parent,
2271 txq->size * TSO_HEADER_SIZE,
2272 txq->tso_hdrs, txq->tso_hdrs_phys);
2122 if (txq->descs) 2273 if (txq->descs)
2123 dma_free_coherent(pp->dev->dev.parent, 2274 dma_free_coherent(pp->dev->dev.parent,
2124 txq->size * MVNETA_DESC_ALIGNED_SIZE, 2275 txq->size * MVNETA_DESC_ALIGNED_SIZE,
@@ -2894,7 +3045,7 @@ static int mvneta_probe(struct platform_device *pdev)
2894 3045
2895 netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight); 3046 netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight);
2896 3047
2897 dev->features = NETIF_F_SG | NETIF_F_IP_CSUM; 3048 dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
2898 dev->hw_features |= dev->features; 3049 dev->hw_features |= dev->features;
2899 dev->vlan_features |= dev->features; 3050 dev->vlan_features |= dev->features;
2900 dev->priv_flags |= IFF_UNICAST_FLT; 3051 dev->priv_flags |= IFF_UNICAST_FLT;