diff options
-rw-r--r-- | include/linux/netdevice.h | 10 | ||||
-rw-r--r-- | net/core/dev.c | 102 |
2 files changed, 92 insertions, 20 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 86bacf6a64f0..e71de66e3792 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -334,6 +334,16 @@ enum { | |||
334 | NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ | 334 | NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ |
335 | NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ | 335 | NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ |
336 | NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ | 336 | NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ |
337 | NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ | ||
338 | }; | ||
339 | |||
340 | enum { | ||
341 | NAPIF_STATE_SCHED = (1UL << NAPI_STATE_SCHED), | ||
342 | NAPIF_STATE_DISABLE = (1UL << NAPI_STATE_DISABLE), | ||
343 | NAPIF_STATE_NPSVC = (1UL << NAPI_STATE_NPSVC), | ||
344 | NAPIF_STATE_HASHED = (1UL << NAPI_STATE_HASHED), | ||
345 | NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL), | ||
346 | NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL), | ||
337 | }; | 347 | }; |
338 | 348 | ||
339 | enum gro_result { | 349 | enum gro_result { |
diff --git a/net/core/dev.c b/net/core/dev.c index 6deba68ad9e4..369dcc8efc01 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -4902,6 +4902,12 @@ void __napi_complete(struct napi_struct *n) | |||
4902 | { | 4902 | { |
4903 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); | 4903 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); |
4904 | 4904 | ||
4905 | /* Some drivers call us directly, instead of calling | ||
4906 | * napi_complete_done(). | ||
4907 | */ | ||
4908 | if (unlikely(test_bit(NAPI_STATE_IN_BUSY_POLL, &n->state))) | ||
4909 | return; | ||
4910 | |||
4905 | list_del_init(&n->poll_list); | 4911 | list_del_init(&n->poll_list); |
4906 | smp_mb__before_atomic(); | 4912 | smp_mb__before_atomic(); |
4907 | clear_bit(NAPI_STATE_SCHED, &n->state); | 4913 | clear_bit(NAPI_STATE_SCHED, &n->state); |
@@ -4913,10 +4919,13 @@ void napi_complete_done(struct napi_struct *n, int work_done) | |||
4913 | unsigned long flags; | 4919 | unsigned long flags; |
4914 | 4920 | ||
4915 | /* | 4921 | /* |
4916 | * don't let napi dequeue from the cpu poll list | 4922 | * 1) Don't let napi dequeue from the cpu poll list |
4917 | * just in case its running on a different cpu | 4923 | * just in case its running on a different cpu. |
4924 | * 2) If we are busy polling, do nothing here, we have | ||
4925 | * the guarantee we will be called later. | ||
4918 | */ | 4926 | */ |
4919 | if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) | 4927 | if (unlikely(n->state & (NAPIF_STATE_NPSVC | |
4928 | NAPIF_STATE_IN_BUSY_POLL))) | ||
4920 | return; | 4929 | return; |
4921 | 4930 | ||
4922 | if (n->gro_list) { | 4931 | if (n->gro_list) { |
@@ -4956,13 +4965,41 @@ static struct napi_struct *napi_by_id(unsigned int napi_id) | |||
4956 | } | 4965 | } |
4957 | 4966 | ||
4958 | #if defined(CONFIG_NET_RX_BUSY_POLL) | 4967 | #if defined(CONFIG_NET_RX_BUSY_POLL) |
4968 | |||
4959 | #define BUSY_POLL_BUDGET 8 | 4969 | #define BUSY_POLL_BUDGET 8 |
4970 | |||
4971 | static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) | ||
4972 | { | ||
4973 | int rc; | ||
4974 | |||
4975 | clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state); | ||
4976 | |||
4977 | local_bh_disable(); | ||
4978 | |||
4979 | /* All we really want here is to re-enable device interrupts. | ||
4980 | * Ideally, a new ndo_busy_poll_stop() could avoid another round. | ||
4981 | */ | ||
4982 | rc = napi->poll(napi, BUSY_POLL_BUDGET); | ||
4983 | netpoll_poll_unlock(have_poll_lock); | ||
4984 | if (rc == BUSY_POLL_BUDGET) | ||
4985 | __napi_schedule(napi); | ||
4986 | local_bh_enable(); | ||
4987 | if (local_softirq_pending()) | ||
4988 | do_softirq(); | ||
4989 | } | ||
4990 | |||
4960 | bool sk_busy_loop(struct sock *sk, int nonblock) | 4991 | bool sk_busy_loop(struct sock *sk, int nonblock) |
4961 | { | 4992 | { |
4962 | unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; | 4993 | unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; |
4994 | int (*napi_poll)(struct napi_struct *napi, int budget); | ||
4963 | int (*busy_poll)(struct napi_struct *dev); | 4995 | int (*busy_poll)(struct napi_struct *dev); |
4996 | void *have_poll_lock = NULL; | ||
4964 | struct napi_struct *napi; | 4997 | struct napi_struct *napi; |
4965 | int rc = false; | 4998 | int rc; |
4999 | |||
5000 | restart: | ||
5001 | rc = false; | ||
5002 | napi_poll = NULL; | ||
4966 | 5003 | ||
4967 | rcu_read_lock(); | 5004 | rcu_read_lock(); |
4968 | 5005 | ||
@@ -4973,24 +5010,33 @@ bool sk_busy_loop(struct sock *sk, int nonblock) | |||
4973 | /* Note: ndo_busy_poll method is optional in linux-4.5 */ | 5010 | /* Note: ndo_busy_poll method is optional in linux-4.5 */ |
4974 | busy_poll = napi->dev->netdev_ops->ndo_busy_poll; | 5011 | busy_poll = napi->dev->netdev_ops->ndo_busy_poll; |
4975 | 5012 | ||
4976 | do { | 5013 | preempt_disable(); |
5014 | for (;;) { | ||
4977 | rc = 0; | 5015 | rc = 0; |
4978 | local_bh_disable(); | 5016 | local_bh_disable(); |
4979 | if (busy_poll) { | 5017 | if (busy_poll) { |
4980 | rc = busy_poll(napi); | 5018 | rc = busy_poll(napi); |
4981 | } else if (napi_schedule_prep(napi)) { | 5019 | goto count; |
4982 | void *have = netpoll_poll_lock(napi); | ||
4983 | |||
4984 | if (test_bit(NAPI_STATE_SCHED, &napi->state)) { | ||
4985 | rc = napi->poll(napi, BUSY_POLL_BUDGET); | ||
4986 | trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); | ||
4987 | if (rc == BUSY_POLL_BUDGET) { | ||
4988 | napi_complete_done(napi, rc); | ||
4989 | napi_schedule(napi); | ||
4990 | } | ||
4991 | } | ||
4992 | netpoll_poll_unlock(have); | ||
4993 | } | 5020 | } |
5021 | if (!napi_poll) { | ||
5022 | unsigned long val = READ_ONCE(napi->state); | ||
5023 | |||
5024 | /* If multiple threads are competing for this napi, | ||
5025 | * we avoid dirtying napi->state as much as we can. | ||
5026 | */ | ||
5027 | if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED | | ||
5028 | NAPIF_STATE_IN_BUSY_POLL)) | ||
5029 | goto count; | ||
5030 | if (cmpxchg(&napi->state, val, | ||
5031 | val | NAPIF_STATE_IN_BUSY_POLL | | ||
5032 | NAPIF_STATE_SCHED) != val) | ||
5033 | goto count; | ||
5034 | have_poll_lock = netpoll_poll_lock(napi); | ||
5035 | napi_poll = napi->poll; | ||
5036 | } | ||
5037 | rc = napi_poll(napi, BUSY_POLL_BUDGET); | ||
5038 | trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); | ||
5039 | count: | ||
4994 | if (rc > 0) | 5040 | if (rc > 0) |
4995 | __NET_ADD_STATS(sock_net(sk), | 5041 | __NET_ADD_STATS(sock_net(sk), |
4996 | LINUX_MIB_BUSYPOLLRXPACKETS, rc); | 5042 | LINUX_MIB_BUSYPOLLRXPACKETS, rc); |
@@ -4999,10 +5045,26 @@ bool sk_busy_loop(struct sock *sk, int nonblock) | |||
4999 | if (rc == LL_FLUSH_FAILED) | 5045 | if (rc == LL_FLUSH_FAILED) |
5000 | break; /* permanent failure */ | 5046 | break; /* permanent failure */ |
5001 | 5047 | ||
5002 | cpu_relax(); | 5048 | if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) || |
5003 | } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && | 5049 | busy_loop_timeout(end_time)) |
5004 | !need_resched() && !busy_loop_timeout(end_time)); | 5050 | break; |
5005 | 5051 | ||
5052 | if (unlikely(need_resched())) { | ||
5053 | if (napi_poll) | ||
5054 | busy_poll_stop(napi, have_poll_lock); | ||
5055 | preempt_enable(); | ||
5056 | rcu_read_unlock(); | ||
5057 | cond_resched(); | ||
5058 | rc = !skb_queue_empty(&sk->sk_receive_queue); | ||
5059 | if (rc || busy_loop_timeout(end_time)) | ||
5060 | return rc; | ||
5061 | goto restart; | ||
5062 | } | ||
5063 | cpu_relax_lowlatency(); | ||
5064 | } | ||
5065 | if (napi_poll) | ||
5066 | busy_poll_stop(napi, have_poll_lock); | ||
5067 | preempt_enable(); | ||
5006 | rc = !skb_queue_empty(&sk->sk_receive_queue); | 5068 | rc = !skb_queue_empty(&sk->sk_receive_queue); |
5007 | out: | 5069 | out: |
5008 | rcu_read_unlock(); | 5070 | rcu_read_unlock(); |