aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJaedon Shin <jaedon.shin@gmail.com>2015-02-27 21:48:26 -0500
committerDavid S. Miller <davem@davemloft.net>2015-02-28 23:58:33 -0500
commit4092e6acf5cb16f56154e2dd22d647023dc3d646 (patch)
tree7d65853c721e935244f09c68abb28fd92ba319ee
parent2f1d8b9e8afa5a833d96afcd23abcb8cdf8d83ab (diff)
net: bcmgenet: fix throughtput regression
This patch adds bcmgenet_tx_poll for the tx_rings. This can reduce the interrupt load and send xmit in network stack on time. This also separated for the completion of tx_ring16 from bcmgenet_poll. The bcmgenet_tx_reclaim of tx_ring[{0,1,2,3}] operative by an interrupt is to be not more than a certain number TxBDs. It is caused by too slowly reclaiming the transmitted skb. Therefore, performance degradation of xmit after 605ad7f ("tcp: refine TSO autosizing"). Signed-off-by: Jaedon Shin <jaedon.shin@gmail.com> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c113
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h2
2 files changed, 88 insertions, 27 deletions
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index ff83c46bc389..2874a004f815 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -971,13 +971,14 @@ static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_priv *priv,
971} 971}
972 972
973/* Unlocked version of the reclaim routine */ 973/* Unlocked version of the reclaim routine */
974static void __bcmgenet_tx_reclaim(struct net_device *dev, 974static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
975 struct bcmgenet_tx_ring *ring) 975 struct bcmgenet_tx_ring *ring)
976{ 976{
977 struct bcmgenet_priv *priv = netdev_priv(dev); 977 struct bcmgenet_priv *priv = netdev_priv(dev);
978 int last_tx_cn, last_c_index, num_tx_bds; 978 int last_tx_cn, last_c_index, num_tx_bds;
979 struct enet_cb *tx_cb_ptr; 979 struct enet_cb *tx_cb_ptr;
980 struct netdev_queue *txq; 980 struct netdev_queue *txq;
981 unsigned int pkts_compl = 0;
981 unsigned int bds_compl; 982 unsigned int bds_compl;
982 unsigned int c_index; 983 unsigned int c_index;
983 984
@@ -1005,6 +1006,7 @@ static void __bcmgenet_tx_reclaim(struct net_device *dev,
1005 tx_cb_ptr = ring->cbs + last_c_index; 1006 tx_cb_ptr = ring->cbs + last_c_index;
1006 bds_compl = 0; 1007 bds_compl = 0;
1007 if (tx_cb_ptr->skb) { 1008 if (tx_cb_ptr->skb) {
1009 pkts_compl++;
1008 bds_compl = skb_shinfo(tx_cb_ptr->skb)->nr_frags + 1; 1010 bds_compl = skb_shinfo(tx_cb_ptr->skb)->nr_frags + 1;
1009 dev->stats.tx_bytes += tx_cb_ptr->skb->len; 1011 dev->stats.tx_bytes += tx_cb_ptr->skb->len;
1010 dma_unmap_single(&dev->dev, 1012 dma_unmap_single(&dev->dev,
@@ -1028,23 +1030,45 @@ static void __bcmgenet_tx_reclaim(struct net_device *dev,
1028 last_c_index &= (num_tx_bds - 1); 1030 last_c_index &= (num_tx_bds - 1);
1029 } 1031 }
1030 1032
1031 if (ring->free_bds > (MAX_SKB_FRAGS + 1)) 1033 if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
1032 ring->int_disable(priv, ring); 1034 if (netif_tx_queue_stopped(txq))
1033 1035 netif_tx_wake_queue(txq);
1034 if (netif_tx_queue_stopped(txq)) 1036 }
1035 netif_tx_wake_queue(txq);
1036 1037
1037 ring->c_index = c_index; 1038 ring->c_index = c_index;
1039
1040 return pkts_compl;
1038} 1041}
1039 1042
1040static void bcmgenet_tx_reclaim(struct net_device *dev, 1043static unsigned int bcmgenet_tx_reclaim(struct net_device *dev,
1041 struct bcmgenet_tx_ring *ring) 1044 struct bcmgenet_tx_ring *ring)
1042{ 1045{
1046 unsigned int released;
1043 unsigned long flags; 1047 unsigned long flags;
1044 1048
1045 spin_lock_irqsave(&ring->lock, flags); 1049 spin_lock_irqsave(&ring->lock, flags);
1046 __bcmgenet_tx_reclaim(dev, ring); 1050 released = __bcmgenet_tx_reclaim(dev, ring);
1047 spin_unlock_irqrestore(&ring->lock, flags); 1051 spin_unlock_irqrestore(&ring->lock, flags);
1052
1053 return released;
1054}
1055
1056static int bcmgenet_tx_poll(struct napi_struct *napi, int budget)
1057{
1058 struct bcmgenet_tx_ring *ring =
1059 container_of(napi, struct bcmgenet_tx_ring, napi);
1060 unsigned int work_done = 0;
1061
1062 work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring);
1063
1064 if (work_done == 0) {
1065 napi_complete(napi);
1066 ring->int_enable(ring->priv, ring);
1067
1068 return 0;
1069 }
1070
1071 return budget;
1048} 1072}
1049 1073
1050static void bcmgenet_tx_reclaim_all(struct net_device *dev) 1074static void bcmgenet_tx_reclaim_all(struct net_device *dev)
@@ -1302,10 +1326,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
1302 bcmgenet_tdma_ring_writel(priv, ring->index, 1326 bcmgenet_tdma_ring_writel(priv, ring->index,
1303 ring->prod_index, TDMA_PROD_INDEX); 1327 ring->prod_index, TDMA_PROD_INDEX);
1304 1328
1305 if (ring->free_bds <= (MAX_SKB_FRAGS + 1)) { 1329 if (ring->free_bds <= (MAX_SKB_FRAGS + 1))
1306 netif_tx_stop_queue(txq); 1330 netif_tx_stop_queue(txq);
1307 ring->int_enable(priv, ring);
1308 }
1309 1331
1310out: 1332out:
1311 spin_unlock_irqrestore(&ring->lock, flags); 1333 spin_unlock_irqrestore(&ring->lock, flags);
@@ -1621,6 +1643,7 @@ static int init_umac(struct bcmgenet_priv *priv)
1621 struct device *kdev = &priv->pdev->dev; 1643 struct device *kdev = &priv->pdev->dev;
1622 int ret; 1644 int ret;
1623 u32 reg, cpu_mask_clear; 1645 u32 reg, cpu_mask_clear;
1646 int index;
1624 1647
1625 dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n"); 1648 dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n");
1626 1649
@@ -1647,7 +1670,7 @@ static int init_umac(struct bcmgenet_priv *priv)
1647 1670
1648 bcmgenet_intr_disable(priv); 1671 bcmgenet_intr_disable(priv);
1649 1672
1650 cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE; 1673 cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_TXDMA_BDONE;
1651 1674
1652 dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__); 1675 dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__);
1653 1676
@@ -1674,6 +1697,10 @@ static int init_umac(struct bcmgenet_priv *priv)
1674 1697
1675 bcmgenet_intrl2_0_writel(priv, cpu_mask_clear, INTRL2_CPU_MASK_CLEAR); 1698 bcmgenet_intrl2_0_writel(priv, cpu_mask_clear, INTRL2_CPU_MASK_CLEAR);
1676 1699
1700 for (index = 0; index < priv->hw_params->tx_queues; index++)
1701 bcmgenet_intrl2_1_writel(priv, (1 << index),
1702 INTRL2_CPU_MASK_CLEAR);
1703
1677 /* Enable rx/tx engine.*/ 1704 /* Enable rx/tx engine.*/
1678 dev_dbg(kdev, "done init umac\n"); 1705 dev_dbg(kdev, "done init umac\n");
1679 1706
@@ -1693,6 +1720,8 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
1693 unsigned int first_bd; 1720 unsigned int first_bd;
1694 1721
1695 spin_lock_init(&ring->lock); 1722 spin_lock_init(&ring->lock);
1723 ring->priv = priv;
1724 netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
1696 ring->index = index; 1725 ring->index = index;
1697 if (index == DESC_INDEX) { 1726 if (index == DESC_INDEX) {
1698 ring->queue = 0; 1727 ring->queue = 0;
@@ -1738,6 +1767,17 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
1738 TDMA_WRITE_PTR); 1767 TDMA_WRITE_PTR);
1739 bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1, 1768 bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1,
1740 DMA_END_ADDR); 1769 DMA_END_ADDR);
1770
1771 napi_enable(&ring->napi);
1772}
1773
1774static void bcmgenet_fini_tx_ring(struct bcmgenet_priv *priv,
1775 unsigned int index)
1776{
1777 struct bcmgenet_tx_ring *ring = &priv->tx_rings[index];
1778
1779 napi_disable(&ring->napi);
1780 netif_napi_del(&ring->napi);
1741} 1781}
1742 1782
1743/* Initialize a RDMA ring */ 1783/* Initialize a RDMA ring */
@@ -1907,7 +1947,7 @@ static int bcmgenet_dma_teardown(struct bcmgenet_priv *priv)
1907 return ret; 1947 return ret;
1908} 1948}
1909 1949
1910static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) 1950static void __bcmgenet_fini_dma(struct bcmgenet_priv *priv)
1911{ 1951{
1912 int i; 1952 int i;
1913 1953
@@ -1926,6 +1966,18 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
1926 kfree(priv->tx_cbs); 1966 kfree(priv->tx_cbs);
1927} 1967}
1928 1968
1969static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
1970{
1971 int i;
1972
1973 bcmgenet_fini_tx_ring(priv, DESC_INDEX);
1974
1975 for (i = 0; i < priv->hw_params->tx_queues; i++)
1976 bcmgenet_fini_tx_ring(priv, i);
1977
1978 __bcmgenet_fini_dma(priv);
1979}
1980
1929/* init_edma: Initialize DMA control register */ 1981/* init_edma: Initialize DMA control register */
1930static int bcmgenet_init_dma(struct bcmgenet_priv *priv) 1982static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
1931{ 1983{
@@ -1952,7 +2004,7 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
1952 priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb), 2004 priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb),
1953 GFP_KERNEL); 2005 GFP_KERNEL);
1954 if (!priv->tx_cbs) { 2006 if (!priv->tx_cbs) {
1955 bcmgenet_fini_dma(priv); 2007 __bcmgenet_fini_dma(priv);
1956 return -ENOMEM; 2008 return -ENOMEM;
1957 } 2009 }
1958 2010
@@ -1975,9 +2027,6 @@ static int bcmgenet_poll(struct napi_struct *napi, int budget)
1975 struct bcmgenet_priv, napi); 2027 struct bcmgenet_priv, napi);
1976 unsigned int work_done; 2028 unsigned int work_done;
1977 2029
1978 /* tx reclaim */
1979 bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]);
1980
1981 work_done = bcmgenet_desc_rx(priv, budget); 2030 work_done = bcmgenet_desc_rx(priv, budget);
1982 2031
1983 /* Advancing our consumer index*/ 2032 /* Advancing our consumer index*/
@@ -2022,28 +2071,34 @@ static void bcmgenet_irq_task(struct work_struct *work)
2022static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) 2071static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
2023{ 2072{
2024 struct bcmgenet_priv *priv = dev_id; 2073 struct bcmgenet_priv *priv = dev_id;
2074 struct bcmgenet_tx_ring *ring;
2025 unsigned int index; 2075 unsigned int index;
2026 2076
2027 /* Save irq status for bottom-half processing. */ 2077 /* Save irq status for bottom-half processing. */
2028 priv->irq1_stat = 2078 priv->irq1_stat =
2029 bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & 2079 bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
2030 ~priv->int1_mask; 2080 ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
2031 /* clear interrupts */ 2081 /* clear interrupts */
2032 bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR); 2082 bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
2033 2083
2034 netif_dbg(priv, intr, priv->dev, 2084 netif_dbg(priv, intr, priv->dev,
2035 "%s: IRQ=0x%x\n", __func__, priv->irq1_stat); 2085 "%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
2086
2036 /* Check the MBDONE interrupts. 2087 /* Check the MBDONE interrupts.
2037 * packet is done, reclaim descriptors 2088 * packet is done, reclaim descriptors
2038 */ 2089 */
2039 if (priv->irq1_stat & 0x0000ffff) { 2090 for (index = 0; index < priv->hw_params->tx_queues; index++) {
2040 index = 0; 2091 if (!(priv->irq1_stat & BIT(index)))
2041 for (index = 0; index < 16; index++) { 2092 continue;
2042 if (priv->irq1_stat & (1 << index)) 2093
2043 bcmgenet_tx_reclaim(priv->dev, 2094 ring = &priv->tx_rings[index];
2044 &priv->tx_rings[index]); 2095
2096 if (likely(napi_schedule_prep(&ring->napi))) {
2097 ring->int_disable(priv, ring);
2098 __napi_schedule(&ring->napi);
2045 } 2099 }
2046 } 2100 }
2101
2047 return IRQ_HANDLED; 2102 return IRQ_HANDLED;
2048} 2103}
2049 2104
@@ -2075,8 +2130,12 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
2075 } 2130 }
2076 if (priv->irq0_stat & 2131 if (priv->irq0_stat &
2077 (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) { 2132 (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) {
2078 /* Tx reclaim */ 2133 struct bcmgenet_tx_ring *ring = &priv->tx_rings[DESC_INDEX];
2079 bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]); 2134
2135 if (likely(napi_schedule_prep(&ring->napi))) {
2136 ring->int_disable(priv, ring);
2137 __napi_schedule(&ring->napi);
2138 }
2080 } 2139 }
2081 if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | 2140 if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
2082 UMAC_IRQ_PHY_DET_F | 2141 UMAC_IRQ_PHY_DET_F |
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index b36ddec0cc0a..0d370d168aee 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -520,6 +520,7 @@ struct bcmgenet_hw_params {
520 520
521struct bcmgenet_tx_ring { 521struct bcmgenet_tx_ring {
522 spinlock_t lock; /* ring lock */ 522 spinlock_t lock; /* ring lock */
523 struct napi_struct napi; /* NAPI per tx queue */
523 unsigned int index; /* ring index */ 524 unsigned int index; /* ring index */
524 unsigned int queue; /* queue index */ 525 unsigned int queue; /* queue index */
525 struct enet_cb *cbs; /* tx ring buffer control block*/ 526 struct enet_cb *cbs; /* tx ring buffer control block*/
@@ -534,6 +535,7 @@ struct bcmgenet_tx_ring {
534 struct bcmgenet_tx_ring *); 535 struct bcmgenet_tx_ring *);
535 void (*int_disable)(struct bcmgenet_priv *priv, 536 void (*int_disable)(struct bcmgenet_priv *priv,
536 struct bcmgenet_tx_ring *); 537 struct bcmgenet_tx_ring *);
538 struct bcmgenet_priv *priv;
537}; 539};
538 540
539/* device context */ 541/* device context */