diff options
author | Eric Dumazet <edumazet@google.com> | 2018-05-10 17:59:43 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-05-11 12:24:37 -0400 |
commit | 73a6bab5aa2a83cb7df85805e08bc03b4065aea7 (patch) | |
tree | 7f542e5b0873c4dc56003c784c2df12fed79364c /net/ipv4/tcp_output.c | |
parent | 4cbd7a7d3c0fb1373bf981c5498b51c050668acc (diff) |
tcp: switch pacing timer to softirq based hrtimer
linux-4.16 got support for softirq based hrtimers.
TCP can switch its pacing hrtimer to this variant, since this
avoids going through a tasklet and some atomic operations.
pacing timer logic looks like other (jiffies based) tcp timers.
v2: use hrtimer_try_to_cancel() in tcp_clear_xmit_timers()
to correctly release reference on socket if needed.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 69 |
1 files changed, 25 insertions, 44 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d07c0dcc99aa..0d8f950a9006 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -772,7 +772,7 @@ struct tsq_tasklet { | |||
772 | }; | 772 | }; |
773 | static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); | 773 | static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); |
774 | 774 | ||
775 | static void tcp_tsq_handler(struct sock *sk) | 775 | static void tcp_tsq_write(struct sock *sk) |
776 | { | 776 | { |
777 | if ((1 << sk->sk_state) & | 777 | if ((1 << sk->sk_state) & |
778 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | | 778 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | |
@@ -789,6 +789,16 @@ static void tcp_tsq_handler(struct sock *sk) | |||
789 | 0, GFP_ATOMIC); | 789 | 0, GFP_ATOMIC); |
790 | } | 790 | } |
791 | } | 791 | } |
792 | |||
793 | static void tcp_tsq_handler(struct sock *sk) | ||
794 | { | ||
795 | bh_lock_sock(sk); | ||
796 | if (!sock_owned_by_user(sk)) | ||
797 | tcp_tsq_write(sk); | ||
798 | else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) | ||
799 | sock_hold(sk); | ||
800 | bh_unlock_sock(sk); | ||
801 | } | ||
792 | /* | 802 | /* |
793 | * One tasklet per cpu tries to send more skbs. | 803 | * One tasklet per cpu tries to send more skbs. |
794 | * We run in tasklet context but need to disable irqs when | 804 | * We run in tasklet context but need to disable irqs when |
@@ -816,16 +826,7 @@ static void tcp_tasklet_func(unsigned long data) | |||
816 | smp_mb__before_atomic(); | 826 | smp_mb__before_atomic(); |
817 | clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); | 827 | clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); |
818 | 828 | ||
819 | if (!sk->sk_lock.owned && | 829 | tcp_tsq_handler(sk); |
820 | test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) { | ||
821 | bh_lock_sock(sk); | ||
822 | if (!sock_owned_by_user(sk)) { | ||
823 | clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); | ||
824 | tcp_tsq_handler(sk); | ||
825 | } | ||
826 | bh_unlock_sock(sk); | ||
827 | } | ||
828 | |||
829 | sk_free(sk); | 830 | sk_free(sk); |
830 | } | 831 | } |
831 | } | 832 | } |
@@ -853,9 +854,10 @@ void tcp_release_cb(struct sock *sk) | |||
853 | nflags = flags & ~TCP_DEFERRED_ALL; | 854 | nflags = flags & ~TCP_DEFERRED_ALL; |
854 | } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); | 855 | } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); |
855 | 856 | ||
856 | if (flags & TCPF_TSQ_DEFERRED) | 857 | if (flags & TCPF_TSQ_DEFERRED) { |
857 | tcp_tsq_handler(sk); | 858 | tcp_tsq_write(sk); |
858 | 859 | __sock_put(sk); | |
860 | } | ||
859 | /* Here begins the tricky part : | 861 | /* Here begins the tricky part : |
860 | * We are called from release_sock() with : | 862 | * We are called from release_sock() with : |
861 | * 1) BH disabled | 863 | * 1) BH disabled |
@@ -929,7 +931,7 @@ void tcp_wfree(struct sk_buff *skb) | |||
929 | if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED)) | 931 | if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED)) |
930 | goto out; | 932 | goto out; |
931 | 933 | ||
932 | nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; | 934 | nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED; |
933 | nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); | 935 | nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); |
934 | if (nval != oval) | 936 | if (nval != oval) |
935 | continue; | 937 | continue; |
@@ -948,37 +950,17 @@ out: | |||
948 | sk_free(sk); | 950 | sk_free(sk); |
949 | } | 951 | } |
950 | 952 | ||
951 | /* Note: Called under hard irq. | 953 | /* Note: Called under soft irq. |
952 | * We can not call TCP stack right away. | 954 | * We can call TCP stack right away, unless socket is owned by user. |
953 | */ | 955 | */ |
954 | enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) | 956 | enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) |
955 | { | 957 | { |
956 | struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer); | 958 | struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer); |
957 | struct sock *sk = (struct sock *)tp; | 959 | struct sock *sk = (struct sock *)tp; |
958 | unsigned long nval, oval; | ||
959 | 960 | ||
960 | for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { | 961 | tcp_tsq_handler(sk); |
961 | struct tsq_tasklet *tsq; | 962 | sock_put(sk); |
962 | bool empty; | ||
963 | 963 | ||
964 | if (oval & TSQF_QUEUED) | ||
965 | break; | ||
966 | |||
967 | nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; | ||
968 | nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); | ||
969 | if (nval != oval) | ||
970 | continue; | ||
971 | |||
972 | if (!refcount_inc_not_zero(&sk->sk_wmem_alloc)) | ||
973 | break; | ||
974 | /* queue this socket to tasklet queue */ | ||
975 | tsq = this_cpu_ptr(&tsq_tasklet); | ||
976 | empty = list_empty(&tsq->head); | ||
977 | list_add(&tp->tsq_node, &tsq->head); | ||
978 | if (empty) | ||
979 | tasklet_schedule(&tsq->tasklet); | ||
980 | break; | ||
981 | } | ||
982 | return HRTIMER_NORESTART; | 964 | return HRTIMER_NORESTART; |
983 | } | 965 | } |
984 | 966 | ||
@@ -1011,7 +993,8 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) | |||
1011 | do_div(len_ns, rate); | 993 | do_div(len_ns, rate); |
1012 | hrtimer_start(&tcp_sk(sk)->pacing_timer, | 994 | hrtimer_start(&tcp_sk(sk)->pacing_timer, |
1013 | ktime_add_ns(ktime_get(), len_ns), | 995 | ktime_add_ns(ktime_get(), len_ns), |
1014 | HRTIMER_MODE_ABS_PINNED); | 996 | HRTIMER_MODE_ABS_PINNED_SOFT); |
997 | sock_hold(sk); | ||
1015 | } | 998 | } |
1016 | 999 | ||
1017 | static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) | 1000 | static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) |
@@ -1078,7 +1061,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
1078 | 1061 | ||
1079 | /* if no packet is in qdisc/device queue, then allow XPS to select | 1062 | /* if no packet is in qdisc/device queue, then allow XPS to select |
1080 | * another queue. We can be called from tcp_tsq_handler() | 1063 | * another queue. We can be called from tcp_tsq_handler() |
1081 | * which holds one reference to sk_wmem_alloc. | 1064 | * which holds one reference to sk. |
1082 | * | 1065 | * |
1083 | * TODO: Ideally, in-flight pure ACK packets should not matter here. | 1066 | * TODO: Ideally, in-flight pure ACK packets should not matter here. |
1084 | * One way to get this would be to set skb->truesize = 2 on them. | 1067 | * One way to get this would be to set skb->truesize = 2 on them. |
@@ -2185,7 +2168,7 @@ static int tcp_mtu_probe(struct sock *sk) | |||
2185 | static bool tcp_pacing_check(const struct sock *sk) | 2168 | static bool tcp_pacing_check(const struct sock *sk) |
2186 | { | 2169 | { |
2187 | return tcp_needs_internal_pacing(sk) && | 2170 | return tcp_needs_internal_pacing(sk) && |
2188 | hrtimer_active(&tcp_sk(sk)->pacing_timer); | 2171 | hrtimer_is_queued(&tcp_sk(sk)->pacing_timer); |
2189 | } | 2172 | } |
2190 | 2173 | ||
2191 | /* TCP Small Queues : | 2174 | /* TCP Small Queues : |
@@ -2365,8 +2348,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2365 | skb, limit, mss_now, gfp))) | 2348 | skb, limit, mss_now, gfp))) |
2366 | break; | 2349 | break; |
2367 | 2350 | ||
2368 | if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) | ||
2369 | clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); | ||
2370 | if (tcp_small_queue_check(sk, skb, 0)) | 2351 | if (tcp_small_queue_check(sk, skb, 0)) |
2371 | break; | 2352 | break; |
2372 | 2353 | ||