diff options
author | Eric Dumazet <edumazet@google.com> | 2015-04-12 21:51:09 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-04-13 16:40:05 -0400 |
commit | 789f558cfb3680aeb52de137418637f6b04b7d22 (patch) | |
tree | 0031c54a2fe41480ed509ba140a1c12ecad075a6 /net/dccp | |
parent | 20a1d16526b79c76cd45e29cb637aec1d43c41de (diff) |
tcp/dccp: get rid of central timewait timer
Using a timer wheel for timewait sockets was nice ~15 years ago when
memory was expensive and machines had a single processor.
This does not scale, code is ugly and source of huge latencies
(Typically 30 ms have been seen, cpus spinning on death_lock spinlock.)
We can afford to use an extra 64 bytes per timewait sock and spread
timewait load to all cpus to have better behavior.
Tested:
On following test, /proc/sys/net/ipv4/tcp_tw_recycle is set to 1
on the target (lpaa24)
Before patch :
lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0
419594
lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0
437171
While test is running, we can observe 25 or even 33 ms latencies.
lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23
...
1000 packets transmitted, 1000 received, 0% packet loss, time 20601ms
rtt min/avg/max/mdev = 0.020/0.217/25.771/1.535 ms, pipe 2
lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23
...
1000 packets transmitted, 1000 received, 0% packet loss, time 20702ms
rtt min/avg/max/mdev = 0.019/0.183/33.761/1.441 ms, pipe 2
After patch :
About 90% increase of throughput :
lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0
810442
lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0
800992
And latencies are kept to minimal values during this load, even
if network utilization is 90% higher :
lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23
...
1000 packets transmitted, 1000 received, 0% packet loss, time 19991ms
rtt min/avg/max/mdev = 0.023/0.064/0.360/0.042 ms
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/minisocks.c | 19 |
1 files changed, 3 insertions, 16 deletions
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 332f7d6d9942..5f566663e47f 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
@@ -27,28 +27,16 @@ | |||
27 | 27 | ||
28 | struct inet_timewait_death_row dccp_death_row = { | 28 | struct inet_timewait_death_row dccp_death_row = { |
29 | .sysctl_max_tw_buckets = NR_FILE * 2, | 29 | .sysctl_max_tw_buckets = NR_FILE * 2, |
30 | .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, | ||
31 | .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock), | ||
32 | .hashinfo = &dccp_hashinfo, | 30 | .hashinfo = &dccp_hashinfo, |
33 | .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, | ||
34 | (unsigned long)&dccp_death_row), | ||
35 | .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work, | ||
36 | inet_twdr_twkill_work), | ||
37 | /* Short-time timewait calendar */ | ||
38 | |||
39 | .twcal_hand = -1, | ||
40 | .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, | ||
41 | (unsigned long)&dccp_death_row), | ||
42 | }; | 31 | }; |
43 | 32 | ||
44 | EXPORT_SYMBOL_GPL(dccp_death_row); | 33 | EXPORT_SYMBOL_GPL(dccp_death_row); |
45 | 34 | ||
46 | void dccp_time_wait(struct sock *sk, int state, int timeo) | 35 | void dccp_time_wait(struct sock *sk, int state, int timeo) |
47 | { | 36 | { |
48 | struct inet_timewait_sock *tw = NULL; | 37 | struct inet_timewait_sock *tw; |
49 | 38 | ||
50 | if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets) | 39 | tw = inet_twsk_alloc(sk, &dccp_death_row, state); |
51 | tw = inet_twsk_alloc(sk, state); | ||
52 | 40 | ||
53 | if (tw != NULL) { | 41 | if (tw != NULL) { |
54 | const struct inet_connection_sock *icsk = inet_csk(sk); | 42 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -71,8 +59,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) | |||
71 | if (state == DCCP_TIME_WAIT) | 59 | if (state == DCCP_TIME_WAIT) |
72 | timeo = DCCP_TIMEWAIT_LEN; | 60 | timeo = DCCP_TIMEWAIT_LEN; |
73 | 61 | ||
74 | inet_twsk_schedule(tw, &dccp_death_row, timeo, | 62 | inet_twsk_schedule(tw, timeo); |
75 | DCCP_TIMEWAIT_LEN); | ||
76 | inet_twsk_put(tw); | 63 | inet_twsk_put(tw); |
77 | } else { | 64 | } else { |
78 | /* Sorry, if we're out of memory, just CLOSE this | 65 | /* Sorry, if we're out of memory, just CLOSE this |