aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet
diff options
context:
space:
mode:
authorHariprasad Shenai <hariprasad@chelsio.com>2015-02-04 05:02:52 -0500
committerDavid S. Miller <davem@davemloft.net>2015-02-05 02:55:58 -0500
commit3a336cb17183b29827fdffaffb5e62f8912f5ca1 (patch)
tree0ca09203cd1f8ace3496ad697e85679c658f0861 /drivers/net/ethernet
parent3fcf9011188755c883f377764c463b4271d594a2 (diff)
cxgb4: Add low latency socket busy_poll support
cxgb_busy_poll, corresponding to ndo_busy_poll, gets called by the socket waiting for data. With busy_poll enabled, improvement is seen in latency numbers as observed by collecting netperf TCP_RR numbers. Below are latency number, with and without busy-poll, in a switched environment for a particular msg size: netperf command: netperf -4 -H <ip> -l 30 -t TCP_RR -- -r1,1 Latency without busy-poll: ~16.25 us Latency with busy-poll : ~08.79 us Based on original work by Kumar Sanghvi <kumaras@chelsio.com> Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h113
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c16
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c47
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_values.h1
4 files changed, 174 insertions, 3 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index fb6980a09981..55019c93387d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -476,6 +476,22 @@ struct sge_rspq { /* state for an SGE response queue */
476 struct adapter *adap; 476 struct adapter *adap;
477 struct net_device *netdev; /* associated net device */ 477 struct net_device *netdev; /* associated net device */
478 rspq_handler_t handler; 478 rspq_handler_t handler;
479#ifdef CONFIG_NET_RX_BUSY_POLL
480#define CXGB_POLL_STATE_IDLE 0
481#define CXGB_POLL_STATE_NAPI BIT(0) /* NAPI owns this poll */
482#define CXGB_POLL_STATE_POLL BIT(1) /* poll owns this poll */
483#define CXGB_POLL_STATE_NAPI_YIELD BIT(2) /* NAPI yielded this poll */
484#define CXGB_POLL_STATE_POLL_YIELD BIT(3) /* poll yielded this poll */
485#define CXGB_POLL_YIELD (CXGB_POLL_STATE_NAPI_YIELD | \
486 CXGB_POLL_STATE_POLL_YIELD)
487#define CXGB_POLL_LOCKED (CXGB_POLL_STATE_NAPI | \
488 CXGB_POLL_STATE_POLL)
489#define CXGB_POLL_USER_PEND (CXGB_POLL_STATE_POLL | \
490 CXGB_POLL_STATE_POLL_YIELD)
491 unsigned int bpoll_state;
492 spinlock_t bpoll_lock; /* lock for busy poll */
493#endif /* CONFIG_NET_RX_BUSY_POLL */
494
479}; 495};
480 496
481struct sge_eth_stats { /* Ethernet queue statistics */ 497struct sge_eth_stats { /* Ethernet queue statistics */
@@ -880,6 +896,102 @@ static inline struct adapter *netdev2adap(const struct net_device *dev)
880 return netdev2pinfo(dev)->adapter; 896 return netdev2pinfo(dev)->adapter;
881} 897}
882 898
899#ifdef CONFIG_NET_RX_BUSY_POLL
900static inline void cxgb_busy_poll_init_lock(struct sge_rspq *q)
901{
902 spin_lock_init(&q->bpoll_lock);
903 q->bpoll_state = CXGB_POLL_STATE_IDLE;
904}
905
906static inline bool cxgb_poll_lock_napi(struct sge_rspq *q)
907{
908 bool rc = true;
909
910 spin_lock(&q->bpoll_lock);
911 if (q->bpoll_state & CXGB_POLL_LOCKED) {
912 q->bpoll_state |= CXGB_POLL_STATE_NAPI_YIELD;
913 rc = false;
914 } else {
915 q->bpoll_state = CXGB_POLL_STATE_NAPI;
916 }
917 spin_unlock(&q->bpoll_lock);
918 return rc;
919}
920
921static inline bool cxgb_poll_unlock_napi(struct sge_rspq *q)
922{
923 bool rc = false;
924
925 spin_lock(&q->bpoll_lock);
926 if (q->bpoll_state & CXGB_POLL_STATE_POLL_YIELD)
927 rc = true;
928 q->bpoll_state = CXGB_POLL_STATE_IDLE;
929 spin_unlock(&q->bpoll_lock);
930 return rc;
931}
932
933static inline bool cxgb_poll_lock_poll(struct sge_rspq *q)
934{
935 bool rc = true;
936
937 spin_lock_bh(&q->bpoll_lock);
938 if (q->bpoll_state & CXGB_POLL_LOCKED) {
939 q->bpoll_state |= CXGB_POLL_STATE_POLL_YIELD;
940 rc = false;
941 } else {
942 q->bpoll_state |= CXGB_POLL_STATE_POLL;
943 }
944 spin_unlock_bh(&q->bpoll_lock);
945 return rc;
946}
947
948static inline bool cxgb_poll_unlock_poll(struct sge_rspq *q)
949{
950 bool rc = false;
951
952 spin_lock_bh(&q->bpoll_lock);
953 if (q->bpoll_state & CXGB_POLL_STATE_POLL_YIELD)
954 rc = true;
955 q->bpoll_state = CXGB_POLL_STATE_IDLE;
956 spin_unlock_bh(&q->bpoll_lock);
957 return rc;
958}
959
960static inline bool cxgb_poll_busy_polling(struct sge_rspq *q)
961{
962 return q->bpoll_state & CXGB_POLL_USER_PEND;
963}
964#else
965static inline void cxgb_busy_poll_init_lock(struct sge_rspq *q)
966{
967}
968
969static inline bool cxgb_poll_lock_napi(struct sge_rspq *q)
970{
971 return true;
972}
973
974static inline bool cxgb_poll_unlock_napi(struct sge_rspq *q)
975{
976 return false;
977}
978
979static inline bool cxgb_poll_lock_poll(struct sge_rspq *q)
980{
981 return false;
982}
983
984static inline bool cxgb_poll_unlock_poll(struct sge_rspq *q)
985{
986 return false;
987}
988
989static inline bool cxgb_poll_busy_polling(struct sge_rspq *q)
990{
991 return false;
992}
993#endif /* CONFIG_NET_RX_BUSY_POLL */
994
883void t4_os_portmod_changed(const struct adapter *adap, int port_id); 995void t4_os_portmod_changed(const struct adapter *adap, int port_id);
884void t4_os_link_changed(struct adapter *adap, int port_id, int link_stat); 996void t4_os_link_changed(struct adapter *adap, int port_id, int link_stat);
885 997
@@ -908,6 +1020,7 @@ irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
908int t4_sge_init(struct adapter *adap); 1020int t4_sge_init(struct adapter *adap);
909void t4_sge_start(struct adapter *adap); 1021void t4_sge_start(struct adapter *adap);
910void t4_sge_stop(struct adapter *adap); 1022void t4_sge_stop(struct adapter *adap);
1023int cxgb_busy_poll(struct napi_struct *napi);
911extern int dbfifo_int_thresh; 1024extern int dbfifo_int_thresh;
912 1025
913#define for_each_port(adapter, iter) \ 1026#define for_each_port(adapter, iter) \
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 5bf490a781aa..041742b5e0e8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -923,8 +923,14 @@ static void quiesce_rx(struct adapter *adap)
923 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) { 923 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
924 struct sge_rspq *q = adap->sge.ingr_map[i]; 924 struct sge_rspq *q = adap->sge.ingr_map[i];
925 925
926 if (q && q->handler) 926 if (q && q->handler) {
927 napi_disable(&q->napi); 927 napi_disable(&q->napi);
928 local_bh_disable();
929 while (!cxgb_poll_lock_napi(q))
930 mdelay(1);
931 local_bh_enable();
932 }
933
928 } 934 }
929} 935}
930 936
@@ -940,8 +946,10 @@ static void enable_rx(struct adapter *adap)
940 946
941 if (!q) 947 if (!q)
942 continue; 948 continue;
943 if (q->handler) 949 if (q->handler) {
950 cxgb_busy_poll_init_lock(q);
944 napi_enable(&q->napi); 951 napi_enable(&q->napi);
952 }
945 /* 0-increment GTS to start the timer and enable interrupts */ 953 /* 0-increment GTS to start the timer and enable interrupts */
946 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A), 954 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
947 SEINTARM_V(q->intr_params) | 955 SEINTARM_V(q->intr_params) |
@@ -4563,6 +4571,10 @@ static const struct net_device_ops cxgb4_netdev_ops = {
4563#ifdef CONFIG_NET_POLL_CONTROLLER 4571#ifdef CONFIG_NET_POLL_CONTROLLER
4564 .ndo_poll_controller = cxgb_netpoll, 4572 .ndo_poll_controller = cxgb_netpoll,
4565#endif 4573#endif
4574#ifdef CONFIG_NET_RX_BUSY_POLL
4575 .ndo_busy_poll = cxgb_busy_poll,
4576#endif
4577
4566}; 4578};
4567 4579
4568void t4_fatal_err(struct adapter *adap) 4580void t4_fatal_err(struct adapter *adap)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 619156112b21..b4b9f6048fe7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -43,6 +43,9 @@
43#include <linux/export.h> 43#include <linux/export.h>
44#include <net/ipv6.h> 44#include <net/ipv6.h>
45#include <net/tcp.h> 45#include <net/tcp.h>
46#ifdef CONFIG_NET_RX_BUSY_POLL
47#include <net/busy_poll.h>
48#endif /* CONFIG_NET_RX_BUSY_POLL */
46#include "cxgb4.h" 49#include "cxgb4.h"
47#include "t4_regs.h" 50#include "t4_regs.h"
48#include "t4_values.h" 51#include "t4_values.h"
@@ -1720,6 +1723,7 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
1720 skb->truesize += skb->data_len; 1723 skb->truesize += skb->data_len;
1721 skb->ip_summed = CHECKSUM_UNNECESSARY; 1724 skb->ip_summed = CHECKSUM_UNNECESSARY;
1722 skb_record_rx_queue(skb, rxq->rspq.idx); 1725 skb_record_rx_queue(skb, rxq->rspq.idx);
1726 skb_mark_napi_id(skb, &rxq->rspq.napi);
1723 if (rxq->rspq.netdev->features & NETIF_F_RXHASH) 1727 if (rxq->rspq.netdev->features & NETIF_F_RXHASH)
1724 skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val, 1728 skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val,
1725 PKT_HASH_TYPE_L3); 1729 PKT_HASH_TYPE_L3);
@@ -1763,6 +1767,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
1763 csum_ok = pkt->csum_calc && !pkt->err_vec && 1767 csum_ok = pkt->csum_calc && !pkt->err_vec &&
1764 (q->netdev->features & NETIF_F_RXCSUM); 1768 (q->netdev->features & NETIF_F_RXCSUM);
1765 if ((pkt->l2info & htonl(RXF_TCP_F)) && 1769 if ((pkt->l2info & htonl(RXF_TCP_F)) &&
1770 !(cxgb_poll_busy_polling(q)) &&
1766 (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) { 1771 (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
1767 do_gro(rxq, si, pkt); 1772 do_gro(rxq, si, pkt);
1768 return 0; 1773 return 0;
@@ -1801,6 +1806,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
1801 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); 1806 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan));
1802 rxq->stats.vlan_ex++; 1807 rxq->stats.vlan_ex++;
1803 } 1808 }
1809 skb_mark_napi_id(skb, &q->napi);
1804 netif_receive_skb(skb); 1810 netif_receive_skb(skb);
1805 return 0; 1811 return 0;
1806} 1812}
@@ -1963,6 +1969,38 @@ static int process_responses(struct sge_rspq *q, int budget)
1963 return budget - budget_left; 1969 return budget - budget_left;
1964} 1970}
1965 1971
1972#ifdef CONFIG_NET_RX_BUSY_POLL
1973int cxgb_busy_poll(struct napi_struct *napi)
1974{
1975 struct sge_rspq *q = container_of(napi, struct sge_rspq, napi);
1976 unsigned int params, work_done;
1977 u32 val;
1978
1979 if (!cxgb_poll_lock_poll(q))
1980 return LL_FLUSH_BUSY;
1981
1982 work_done = process_responses(q, 4);
1983 params = QINTR_TIMER_IDX(TIMERREG_COUNTER0_X) | QINTR_CNT_EN;
1984 q->next_intr_params = params;
1985 val = CIDXINC_V(work_done) | SEINTARM_V(params);
1986
1987 /* If we don't have access to the new User GTS (T5+), use the old
1988 * doorbell mechanism; otherwise use the new BAR2 mechanism.
1989 */
1990 if (unlikely(!q->bar2_addr))
1991 t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS_A),
1992 val | INGRESSQID_V((u32)q->cntxt_id));
1993 else {
1994 writel(val | INGRESSQID_V(q->bar2_qid),
1995 q->bar2_addr + SGE_UDB_GTS);
1996 wmb();
1997 }
1998
1999 cxgb_poll_unlock_poll(q);
2000 return work_done;
2001}
2002#endif /* CONFIG_NET_RX_BUSY_POLL */
2003
1966/** 2004/**
1967 * napi_rx_handler - the NAPI handler for Rx processing 2005 * napi_rx_handler - the NAPI handler for Rx processing
1968 * @napi: the napi instance 2006 * @napi: the napi instance
@@ -1978,9 +2016,13 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
1978{ 2016{
1979 unsigned int params; 2017 unsigned int params;
1980 struct sge_rspq *q = container_of(napi, struct sge_rspq, napi); 2018 struct sge_rspq *q = container_of(napi, struct sge_rspq, napi);
1981 int work_done = process_responses(q, budget); 2019 int work_done;
1982 u32 val; 2020 u32 val;
1983 2021
2022 if (!cxgb_poll_lock_napi(q))
2023 return budget;
2024
2025 work_done = process_responses(q, budget);
1984 if (likely(work_done < budget)) { 2026 if (likely(work_done < budget)) {
1985 int timer_index; 2027 int timer_index;
1986 2028
@@ -2018,6 +2060,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
2018 q->bar2_addr + SGE_UDB_GTS); 2060 q->bar2_addr + SGE_UDB_GTS);
2019 wmb(); 2061 wmb();
2020 } 2062 }
2063 cxgb_poll_unlock_napi(q);
2021 return work_done; 2064 return work_done;
2022} 2065}
2023 2066
@@ -2341,6 +2384,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
2341 goto err; 2384 goto err;
2342 2385
2343 netif_napi_add(dev, &iq->napi, napi_rx_handler, 64); 2386 netif_napi_add(dev, &iq->napi, napi_rx_handler, 64);
2387 napi_hash_add(&iq->napi);
2344 iq->cur_desc = iq->desc; 2388 iq->cur_desc = iq->desc;
2345 iq->cidx = 0; 2389 iq->cidx = 0;
2346 iq->gen = 1; 2390 iq->gen = 1;
@@ -2598,6 +2642,7 @@ static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
2598 rq->cntxt_id, fl_id, 0xffff); 2642 rq->cntxt_id, fl_id, 0xffff);
2599 dma_free_coherent(adap->pdev_dev, (rq->size + 1) * rq->iqe_len, 2643 dma_free_coherent(adap->pdev_dev, (rq->size + 1) * rq->iqe_len,
2600 rq->desc, rq->phys_addr); 2644 rq->desc, rq->phys_addr);
2645 napi_hash_del(&rq->napi);
2601 netif_napi_del(&rq->napi); 2646 netif_napi_del(&rq->napi);
2602 rq->netdev = NULL; 2647 rq->netdev = NULL;
2603 rq->cntxt_id = rq->abs_id = 0; 2648 rq->cntxt_id = rq->abs_id = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
index a40484432ebf..997ec87470c7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
@@ -59,6 +59,7 @@
59 59
60/* GTS register */ 60/* GTS register */
61#define SGE_TIMERREGS 6 61#define SGE_TIMERREGS 6
62#define TIMERREG_COUNTER0_X 0
62 63
63/* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues. 64/* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues.
64 * The User Doorbells are each 128 bytes in length with a Simple Doorbell at 65 * The User Doorbells are each 128 bytes in length with a Simple Doorbell at