diff options
author | Eric Dumazet <edumazet@google.com> | 2012-07-20 01:45:50 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-07-20 13:59:41 -0400 |
commit | 6f458dfb409272082c9bfa412f77ff2fc21c626f (patch) | |
tree | 7475cd12eb01023b9852cbc957080b9aa7cfdc64 /net/ipv4/tcp_output.c | |
parent | 9dc274151a548ffd215caecec5a8872db8799447 (diff) |
tcp: improve latencies of timer triggered events
Modern TCP stack highly depends on tcp_write_timer() having a small
latency, but current implementation doesn't exactly meet the
expectations.
When a timer fires but finds the socket is owned by the user, it rearms
itself for an additional delay hoping next run will be more
successful.
tcp_write_timer() for example uses a 50ms delay for next try, and it
defeats many attempts to get predictable TCP behavior in term of
latencies.
Use the recently introduced tcp_release_cb(), so that the user owning
the socket will call various handlers right before socket release.
This will permit us to post a followup patch to address the
tcp_tso_should_defer() syndrome (some deferred packets have to wait
RTO timer to be transmitted, while cwnd should allow us to send them
sooner)
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Cc: H.K. Jerry Chu <hkchu@google.com>
Cc: John Heffner <johnwheffner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 46 |
1 files changed, 29 insertions, 17 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 27a32acfdb62..950aebfd9967 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -837,6 +837,13 @@ struct tsq_tasklet { | |||
837 | }; | 837 | }; |
838 | static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); | 838 | static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); |
839 | 839 | ||
840 | static void tcp_tsq_handler(struct sock *sk) | ||
841 | { | ||
842 | if ((1 << sk->sk_state) & | ||
843 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | | ||
844 | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) | ||
845 | tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); | ||
846 | } | ||
840 | /* | 847 | /* |
841 | * One tasklest per cpu tries to send more skbs. | 848 | * One tasklest per cpu tries to send more skbs. |
842 | * We run in tasklet context but need to disable irqs when | 849 | * We run in tasklet context but need to disable irqs when |
@@ -864,16 +871,10 @@ static void tcp_tasklet_func(unsigned long data) | |||
864 | bh_lock_sock(sk); | 871 | bh_lock_sock(sk); |
865 | 872 | ||
866 | if (!sock_owned_by_user(sk)) { | 873 | if (!sock_owned_by_user(sk)) { |
867 | if ((1 << sk->sk_state) & | 874 | tcp_tsq_handler(sk); |
868 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | | ||
869 | TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) | ||
870 | tcp_write_xmit(sk, | ||
871 | tcp_current_mss(sk), | ||
872 | 0, 0, | ||
873 | GFP_ATOMIC); | ||
874 | } else { | 875 | } else { |
875 | /* defer the work to tcp_release_cb() */ | 876 | /* defer the work to tcp_release_cb() */ |
876 | set_bit(TSQ_OWNED, &tp->tsq_flags); | 877 | set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); |
877 | } | 878 | } |
878 | bh_unlock_sock(sk); | 879 | bh_unlock_sock(sk); |
879 | 880 | ||
@@ -882,6 +883,9 @@ static void tcp_tasklet_func(unsigned long data) | |||
882 | } | 883 | } |
883 | } | 884 | } |
884 | 885 | ||
886 | #define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ | ||
887 | (1UL << TCP_WRITE_TIMER_DEFERRED) | \ | ||
888 | (1UL << TCP_DELACK_TIMER_DEFERRED)) | ||
885 | /** | 889 | /** |
886 | * tcp_release_cb - tcp release_sock() callback | 890 | * tcp_release_cb - tcp release_sock() callback |
887 | * @sk: socket | 891 | * @sk: socket |
@@ -892,16 +896,24 @@ static void tcp_tasklet_func(unsigned long data) | |||
892 | void tcp_release_cb(struct sock *sk) | 896 | void tcp_release_cb(struct sock *sk) |
893 | { | 897 | { |
894 | struct tcp_sock *tp = tcp_sk(sk); | 898 | struct tcp_sock *tp = tcp_sk(sk); |
899 | unsigned long flags, nflags; | ||
895 | 900 | ||
896 | if (test_and_clear_bit(TSQ_OWNED, &tp->tsq_flags)) { | 901 | /* perform an atomic operation only if at least one flag is set */ |
897 | if ((1 << sk->sk_state) & | 902 | do { |
898 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | | 903 | flags = tp->tsq_flags; |
899 | TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) | 904 | if (!(flags & TCP_DEFERRED_ALL)) |
900 | tcp_write_xmit(sk, | 905 | return; |
901 | tcp_current_mss(sk), | 906 | nflags = flags & ~TCP_DEFERRED_ALL; |
902 | 0, 0, | 907 | } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); |
903 | GFP_ATOMIC); | 908 | |
904 | } | 909 | if (flags & (1UL << TCP_TSQ_DEFERRED)) |
910 | tcp_tsq_handler(sk); | ||
911 | |||
912 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) | ||
913 | tcp_write_timer_handler(sk); | ||
914 | |||
915 | if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) | ||
916 | tcp_delack_timer_handler(sk); | ||
905 | } | 917 | } |
906 | EXPORT_SYMBOL(tcp_release_cb); | 918 | EXPORT_SYMBOL(tcp_release_cb); |
907 | 919 | ||