diff options
author | Eric Dumazet <edumazet@google.com> | 2012-07-20 01:45:50 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-07-20 13:59:41 -0400 |
commit | 6f458dfb409272082c9bfa412f77ff2fc21c626f (patch) | |
tree | 7475cd12eb01023b9852cbc957080b9aa7cfdc64 /net/ipv4/tcp_timer.c | |
parent | 9dc274151a548ffd215caecec5a8872db8799447 (diff) |
tcp: improve latencies of timer triggered events
Modern TCP stack highly depends on tcp_write_timer() having a small
latency, but current implementation doesn't exactly meet the
expectations.
When a timer fires but finds the socket is owned by the user, it rearms
itself for an additional delay hoping next run will be more
successful.
tcp_write_timer() for example uses a 50ms delay for next try, and it
defeats many attempts to get predictable TCP behavior in term of
latencies.
Use the recently introduced tcp_release_cb(), so that the user owning
the socket will call various handlers right before socket release.
This will permit us to post a followup patch to address the
tcp_tso_should_defer() syndrome (some deferred packets have to wait
RTO timer to be transmitted, while cwnd should allow us to send them
sooner)
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Cc: H.K. Jerry Chu <hkchu@google.com>
Cc: John Heffner <johnwheffner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_timer.c')
-rw-r--r-- | net/ipv4/tcp_timer.c | 70 |
1 files changed, 37 insertions, 33 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e911e6c523ec..6df36ad55a38 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -32,17 +32,6 @@ int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; | |||
32 | int sysctl_tcp_orphan_retries __read_mostly; | 32 | int sysctl_tcp_orphan_retries __read_mostly; |
33 | int sysctl_tcp_thin_linear_timeouts __read_mostly; | 33 | int sysctl_tcp_thin_linear_timeouts __read_mostly; |
34 | 34 | ||
35 | static void tcp_write_timer(unsigned long); | ||
36 | static void tcp_delack_timer(unsigned long); | ||
37 | static void tcp_keepalive_timer (unsigned long data); | ||
38 | |||
39 | void tcp_init_xmit_timers(struct sock *sk) | ||
40 | { | ||
41 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, | ||
42 | &tcp_keepalive_timer); | ||
43 | } | ||
44 | EXPORT_SYMBOL(tcp_init_xmit_timers); | ||
45 | |||
46 | static void tcp_write_err(struct sock *sk) | 35 | static void tcp_write_err(struct sock *sk) |
47 | { | 36 | { |
48 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; | 37 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; |
@@ -205,21 +194,11 @@ static int tcp_write_timeout(struct sock *sk) | |||
205 | return 0; | 194 | return 0; |
206 | } | 195 | } |
207 | 196 | ||
208 | static void tcp_delack_timer(unsigned long data) | 197 | void tcp_delack_timer_handler(struct sock *sk) |
209 | { | 198 | { |
210 | struct sock *sk = (struct sock *)data; | ||
211 | struct tcp_sock *tp = tcp_sk(sk); | 199 | struct tcp_sock *tp = tcp_sk(sk); |
212 | struct inet_connection_sock *icsk = inet_csk(sk); | 200 | struct inet_connection_sock *icsk = inet_csk(sk); |
213 | 201 | ||
214 | bh_lock_sock(sk); | ||
215 | if (sock_owned_by_user(sk)) { | ||
216 | /* Try again later. */ | ||
217 | icsk->icsk_ack.blocked = 1; | ||
218 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); | ||
219 | sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); | ||
220 | goto out_unlock; | ||
221 | } | ||
222 | |||
223 | sk_mem_reclaim_partial(sk); | 202 | sk_mem_reclaim_partial(sk); |
224 | 203 | ||
225 | if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) | 204 | if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) |
@@ -260,7 +239,21 @@ static void tcp_delack_timer(unsigned long data) | |||
260 | out: | 239 | out: |
261 | if (sk_under_memory_pressure(sk)) | 240 | if (sk_under_memory_pressure(sk)) |
262 | sk_mem_reclaim(sk); | 241 | sk_mem_reclaim(sk); |
263 | out_unlock: | 242 | } |
243 | |||
244 | static void tcp_delack_timer(unsigned long data) | ||
245 | { | ||
246 | struct sock *sk = (struct sock *)data; | ||
247 | |||
248 | bh_lock_sock(sk); | ||
249 | if (!sock_owned_by_user(sk)) { | ||
250 | tcp_delack_timer_handler(sk); | ||
251 | } else { | ||
252 | inet_csk(sk)->icsk_ack.blocked = 1; | ||
253 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); | ||
254 | /* deleguate our work to tcp_release_cb() */ | ||
255 | set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); | ||
256 | } | ||
264 | bh_unlock_sock(sk); | 257 | bh_unlock_sock(sk); |
265 | sock_put(sk); | 258 | sock_put(sk); |
266 | } | 259 | } |
@@ -450,19 +443,11 @@ out_reset_timer: | |||
450 | out:; | 443 | out:; |
451 | } | 444 | } |
452 | 445 | ||
453 | static void tcp_write_timer(unsigned long data) | 446 | void tcp_write_timer_handler(struct sock *sk) |
454 | { | 447 | { |
455 | struct sock *sk = (struct sock *)data; | ||
456 | struct inet_connection_sock *icsk = inet_csk(sk); | 448 | struct inet_connection_sock *icsk = inet_csk(sk); |
457 | int event; | 449 | int event; |
458 | 450 | ||
459 | bh_lock_sock(sk); | ||
460 | if (sock_owned_by_user(sk)) { | ||
461 | /* Try again later */ | ||
462 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); | ||
463 | goto out_unlock; | ||
464 | } | ||
465 | |||
466 | if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) | 451 | if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) |
467 | goto out; | 452 | goto out; |
468 | 453 | ||
@@ -485,7 +470,19 @@ static void tcp_write_timer(unsigned long data) | |||
485 | 470 | ||
486 | out: | 471 | out: |
487 | sk_mem_reclaim(sk); | 472 | sk_mem_reclaim(sk); |
488 | out_unlock: | 473 | } |
474 | |||
475 | static void tcp_write_timer(unsigned long data) | ||
476 | { | ||
477 | struct sock *sk = (struct sock *)data; | ||
478 | |||
479 | bh_lock_sock(sk); | ||
480 | if (!sock_owned_by_user(sk)) { | ||
481 | tcp_write_timer_handler(sk); | ||
482 | } else { | ||
483 | /* deleguate our work to tcp_release_cb() */ | ||
484 | set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); | ||
485 | } | ||
489 | bh_unlock_sock(sk); | 486 | bh_unlock_sock(sk); |
490 | sock_put(sk); | 487 | sock_put(sk); |
491 | } | 488 | } |
@@ -602,3 +599,10 @@ out: | |||
602 | bh_unlock_sock(sk); | 599 | bh_unlock_sock(sk); |
603 | sock_put(sk); | 600 | sock_put(sk); |
604 | } | 601 | } |
602 | |||
603 | void tcp_init_xmit_timers(struct sock *sk) | ||
604 | { | ||
605 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, | ||
606 | &tcp_keepalive_timer); | ||
607 | } | ||
608 | EXPORT_SYMBOL(tcp_init_xmit_timers); | ||