aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2018-05-10 17:59:43 -0400
committerDavid S. Miller <davem@davemloft.net>2018-05-11 12:24:37 -0400
commit73a6bab5aa2a83cb7df85805e08bc03b4065aea7 (patch)
tree7f542e5b0873c4dc56003c784c2df12fed79364c /net/ipv4/tcp_output.c
parent4cbd7a7d3c0fb1373bf981c5498b51c050668acc (diff)
tcp: switch pacing timer to softirq based hrtimer
linux-4.16 got support for softirq based hrtimers. TCP can switch its pacing hrtimer to this variant, since this avoids going through a tasklet and some atomic operations. pacing timer logic looks like other (jiffies based) tcp timers. v2: use hrtimer_try_to_cancel() in tcp_clear_xmit_timers() to correctly release reference on socket if needed. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c69
1 files changed, 25 insertions, 44 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d07c0dcc99aa..0d8f950a9006 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -772,7 +772,7 @@ struct tsq_tasklet {
772}; 772};
773static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); 773static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
774 774
775static void tcp_tsq_handler(struct sock *sk) 775static void tcp_tsq_write(struct sock *sk)
776{ 776{
777 if ((1 << sk->sk_state) & 777 if ((1 << sk->sk_state) &
778 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | 778 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
@@ -789,6 +789,16 @@ static void tcp_tsq_handler(struct sock *sk)
789 0, GFP_ATOMIC); 789 0, GFP_ATOMIC);
790 } 790 }
791} 791}
792
793static void tcp_tsq_handler(struct sock *sk)
794{
795 bh_lock_sock(sk);
796 if (!sock_owned_by_user(sk))
797 tcp_tsq_write(sk);
798 else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
799 sock_hold(sk);
800 bh_unlock_sock(sk);
801}
792/* 802/*
793 * One tasklet per cpu tries to send more skbs. 803 * One tasklet per cpu tries to send more skbs.
794 * We run in tasklet context but need to disable irqs when 804 * We run in tasklet context but need to disable irqs when
@@ -816,16 +826,7 @@ static void tcp_tasklet_func(unsigned long data)
816 smp_mb__before_atomic(); 826 smp_mb__before_atomic();
817 clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); 827 clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
818 828
819 if (!sk->sk_lock.owned && 829 tcp_tsq_handler(sk);
820 test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
821 bh_lock_sock(sk);
822 if (!sock_owned_by_user(sk)) {
823 clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
824 tcp_tsq_handler(sk);
825 }
826 bh_unlock_sock(sk);
827 }
828
829 sk_free(sk); 830 sk_free(sk);
830 } 831 }
831} 832}
@@ -853,9 +854,10 @@ void tcp_release_cb(struct sock *sk)
853 nflags = flags & ~TCP_DEFERRED_ALL; 854 nflags = flags & ~TCP_DEFERRED_ALL;
854 } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); 855 } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
855 856
856 if (flags & TCPF_TSQ_DEFERRED) 857 if (flags & TCPF_TSQ_DEFERRED) {
857 tcp_tsq_handler(sk); 858 tcp_tsq_write(sk);
858 859 __sock_put(sk);
860 }
859 /* Here begins the tricky part : 861 /* Here begins the tricky part :
860 * We are called from release_sock() with : 862 * We are called from release_sock() with :
861 * 1) BH disabled 863 * 1) BH disabled
@@ -929,7 +931,7 @@ void tcp_wfree(struct sk_buff *skb)
929 if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED)) 931 if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
930 goto out; 932 goto out;
931 933
932 nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; 934 nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
933 nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); 935 nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
934 if (nval != oval) 936 if (nval != oval)
935 continue; 937 continue;
@@ -948,37 +950,17 @@ out:
948 sk_free(sk); 950 sk_free(sk);
949} 951}
950 952
951/* Note: Called under hard irq. 953/* Note: Called under soft irq.
952 * We can not call TCP stack right away. 954 * We can call TCP stack right away, unless socket is owned by user.
953 */ 955 */
954enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) 956enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
955{ 957{
956 struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer); 958 struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
957 struct sock *sk = (struct sock *)tp; 959 struct sock *sk = (struct sock *)tp;
958 unsigned long nval, oval;
959 960
960 for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { 961 tcp_tsq_handler(sk);
961 struct tsq_tasklet *tsq; 962 sock_put(sk);
962 bool empty;
963 963
964 if (oval & TSQF_QUEUED)
965 break;
966
967 nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
968 nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
969 if (nval != oval)
970 continue;
971
972 if (!refcount_inc_not_zero(&sk->sk_wmem_alloc))
973 break;
974 /* queue this socket to tasklet queue */
975 tsq = this_cpu_ptr(&tsq_tasklet);
976 empty = list_empty(&tsq->head);
977 list_add(&tp->tsq_node, &tsq->head);
978 if (empty)
979 tasklet_schedule(&tsq->tasklet);
980 break;
981 }
982 return HRTIMER_NORESTART; 964 return HRTIMER_NORESTART;
983} 965}
984 966
@@ -1011,7 +993,8 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
1011 do_div(len_ns, rate); 993 do_div(len_ns, rate);
1012 hrtimer_start(&tcp_sk(sk)->pacing_timer, 994 hrtimer_start(&tcp_sk(sk)->pacing_timer,
1013 ktime_add_ns(ktime_get(), len_ns), 995 ktime_add_ns(ktime_get(), len_ns),
1014 HRTIMER_MODE_ABS_PINNED); 996 HRTIMER_MODE_ABS_PINNED_SOFT);
997 sock_hold(sk);
1015} 998}
1016 999
1017static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) 1000static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
@@ -1078,7 +1061,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1078 1061
1079 /* if no packet is in qdisc/device queue, then allow XPS to select 1062 /* if no packet is in qdisc/device queue, then allow XPS to select
1080 * another queue. We can be called from tcp_tsq_handler() 1063 * another queue. We can be called from tcp_tsq_handler()
1081 * which holds one reference to sk_wmem_alloc. 1064 * which holds one reference to sk.
1082 * 1065 *
1083 * TODO: Ideally, in-flight pure ACK packets should not matter here. 1066 * TODO: Ideally, in-flight pure ACK packets should not matter here.
1084 * One way to get this would be to set skb->truesize = 2 on them. 1067 * One way to get this would be to set skb->truesize = 2 on them.
@@ -2185,7 +2168,7 @@ static int tcp_mtu_probe(struct sock *sk)
2185static bool tcp_pacing_check(const struct sock *sk) 2168static bool tcp_pacing_check(const struct sock *sk)
2186{ 2169{
2187 return tcp_needs_internal_pacing(sk) && 2170 return tcp_needs_internal_pacing(sk) &&
2188 hrtimer_active(&tcp_sk(sk)->pacing_timer); 2171 hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
2189} 2172}
2190 2173
2191/* TCP Small Queues : 2174/* TCP Small Queues :
@@ -2365,8 +2348,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2365 skb, limit, mss_now, gfp))) 2348 skb, limit, mss_now, gfp)))
2366 break; 2349 break;
2367 2350
2368 if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
2369 clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
2370 if (tcp_small_queue_check(sk, skb, 0)) 2351 if (tcp_small_queue_check(sk, skb, 0))
2371 break; 2352 break;
2372 2353