aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c102
1 files changed, 82 insertions, 20 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 6deba68ad9e4..369dcc8efc01 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4902,6 +4902,12 @@ void __napi_complete(struct napi_struct *n)
4902{ 4902{
4903 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); 4903 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4904 4904
4905 /* Some drivers call us directly, instead of calling
4906 * napi_complete_done().
4907 */
4908 if (unlikely(test_bit(NAPI_STATE_IN_BUSY_POLL, &n->state)))
4909 return;
4910
4905 list_del_init(&n->poll_list); 4911 list_del_init(&n->poll_list);
4906 smp_mb__before_atomic(); 4912 smp_mb__before_atomic();
4907 clear_bit(NAPI_STATE_SCHED, &n->state); 4913 clear_bit(NAPI_STATE_SCHED, &n->state);
@@ -4913,10 +4919,13 @@ void napi_complete_done(struct napi_struct *n, int work_done)
4913 unsigned long flags; 4919 unsigned long flags;
4914 4920
4915 /* 4921 /*
4916 * don't let napi dequeue from the cpu poll list 4922 * 1) Don't let napi dequeue from the cpu poll list
4917 * just in case its running on a different cpu 4923 * just in case its running on a different cpu.
4924 * 2) If we are busy polling, do nothing here, we have
4925 * the guarantee we will be called later.
4918 */ 4926 */
4919 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) 4927 if (unlikely(n->state & (NAPIF_STATE_NPSVC |
4928 NAPIF_STATE_IN_BUSY_POLL)))
4920 return; 4929 return;
4921 4930
4922 if (n->gro_list) { 4931 if (n->gro_list) {
@@ -4956,13 +4965,41 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
4956} 4965}
4957 4966
4958#if defined(CONFIG_NET_RX_BUSY_POLL) 4967#if defined(CONFIG_NET_RX_BUSY_POLL)
4968
4959#define BUSY_POLL_BUDGET 8 4969#define BUSY_POLL_BUDGET 8
4970
4971static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
4972{
4973 int rc;
4974
4975 clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
4976
4977 local_bh_disable();
4978
4979 /* All we really want here is to re-enable device interrupts.
4980 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
4981 */
4982 rc = napi->poll(napi, BUSY_POLL_BUDGET);
4983 netpoll_poll_unlock(have_poll_lock);
4984 if (rc == BUSY_POLL_BUDGET)
4985 __napi_schedule(napi);
4986 local_bh_enable();
4987 if (local_softirq_pending())
4988 do_softirq();
4989}
4990
4960bool sk_busy_loop(struct sock *sk, int nonblock) 4991bool sk_busy_loop(struct sock *sk, int nonblock)
4961{ 4992{
4962 unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; 4993 unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
4994 int (*napi_poll)(struct napi_struct *napi, int budget);
4963 int (*busy_poll)(struct napi_struct *dev); 4995 int (*busy_poll)(struct napi_struct *dev);
4996 void *have_poll_lock = NULL;
4964 struct napi_struct *napi; 4997 struct napi_struct *napi;
4965 int rc = false; 4998 int rc;
4999
5000restart:
5001 rc = false;
5002 napi_poll = NULL;
4966 5003
4967 rcu_read_lock(); 5004 rcu_read_lock();
4968 5005
@@ -4973,24 +5010,33 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
4973 /* Note: ndo_busy_poll method is optional in linux-4.5 */ 5010 /* Note: ndo_busy_poll method is optional in linux-4.5 */
4974 busy_poll = napi->dev->netdev_ops->ndo_busy_poll; 5011 busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
4975 5012
4976 do { 5013 preempt_disable();
5014 for (;;) {
4977 rc = 0; 5015 rc = 0;
4978 local_bh_disable(); 5016 local_bh_disable();
4979 if (busy_poll) { 5017 if (busy_poll) {
4980 rc = busy_poll(napi); 5018 rc = busy_poll(napi);
4981 } else if (napi_schedule_prep(napi)) { 5019 goto count;
4982 void *have = netpoll_poll_lock(napi);
4983
4984 if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
4985 rc = napi->poll(napi, BUSY_POLL_BUDGET);
4986 trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
4987 if (rc == BUSY_POLL_BUDGET) {
4988 napi_complete_done(napi, rc);
4989 napi_schedule(napi);
4990 }
4991 }
4992 netpoll_poll_unlock(have);
4993 } 5020 }
5021 if (!napi_poll) {
5022 unsigned long val = READ_ONCE(napi->state);
5023
5024 /* If multiple threads are competing for this napi,
5025 * we avoid dirtying napi->state as much as we can.
5026 */
5027 if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
5028 NAPIF_STATE_IN_BUSY_POLL))
5029 goto count;
5030 if (cmpxchg(&napi->state, val,
5031 val | NAPIF_STATE_IN_BUSY_POLL |
5032 NAPIF_STATE_SCHED) != val)
5033 goto count;
5034 have_poll_lock = netpoll_poll_lock(napi);
5035 napi_poll = napi->poll;
5036 }
5037 rc = napi_poll(napi, BUSY_POLL_BUDGET);
5038 trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
5039count:
4994 if (rc > 0) 5040 if (rc > 0)
4995 __NET_ADD_STATS(sock_net(sk), 5041 __NET_ADD_STATS(sock_net(sk),
4996 LINUX_MIB_BUSYPOLLRXPACKETS, rc); 5042 LINUX_MIB_BUSYPOLLRXPACKETS, rc);
@@ -4999,10 +5045,26 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
4999 if (rc == LL_FLUSH_FAILED) 5045 if (rc == LL_FLUSH_FAILED)
5000 break; /* permanent failure */ 5046 break; /* permanent failure */
5001 5047
5002 cpu_relax(); 5048 if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
5003 } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && 5049 busy_loop_timeout(end_time))
5004 !need_resched() && !busy_loop_timeout(end_time)); 5050 break;
5005 5051
5052 if (unlikely(need_resched())) {
5053 if (napi_poll)
5054 busy_poll_stop(napi, have_poll_lock);
5055 preempt_enable();
5056 rcu_read_unlock();
5057 cond_resched();
5058 rc = !skb_queue_empty(&sk->sk_receive_queue);
5059 if (rc || busy_loop_timeout(end_time))
5060 return rc;
5061 goto restart;
5062 }
5063 cpu_relax_lowlatency();
5064 }
5065 if (napi_poll)
5066 busy_poll_stop(napi, have_poll_lock);
5067 preempt_enable();
5006 rc = !skb_queue_empty(&sk->sk_receive_queue); 5068 rc = !skb_queue_empty(&sk->sk_receive_queue);
5007out: 5069out:
5008 rcu_read_unlock(); 5070 rcu_read_unlock();