aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-04-12 21:51:09 -0400
committerDavid S. Miller <davem@davemloft.net>2015-04-13 16:40:05 -0400
commit789f558cfb3680aeb52de137418637f6b04b7d22 (patch)
tree0031c54a2fe41480ed509ba140a1c12ecad075a6 /net/dccp
parent20a1d16526b79c76cd45e29cb637aec1d43c41de (diff)
tcp/dccp: get rid of central timewait timer
Using a timer wheel for timewait sockets was nice ~15 years ago when memory was expensive and machines had a single processor. This does not scale, code is ugly and source of huge latencies (Typically 30 ms have been seen, cpus spinning on death_lock spinlock.) We can afford to use an extra 64 bytes per timewait sock and spread timewait load to all cpus to have better behavior. Tested: On following test, /proc/sys/net/ipv4/tcp_tw_recycle is set to 1 on the target (lpaa24) Before patch : lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 419594 lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 437171 While test is running, we can observe 25 or even 33 ms latencies. lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 20601ms rtt min/avg/max/mdev = 0.020/0.217/25.771/1.535 ms, pipe 2 lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 20702ms rtt min/avg/max/mdev = 0.019/0.183/33.761/1.441 ms, pipe 2 After patch : About 90% increase of throughput : lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 810442 lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 800992 And latencies are kept to minimal values during this load, even if network utilization is 90% higher : lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 19991ms rtt min/avg/max/mdev = 0.023/0.064/0.360/0.042 ms Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/minisocks.c19
1 files changed, 3 insertions, 16 deletions
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 332f7d6d9942..5f566663e47f 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -27,28 +27,16 @@
27 27
28struct inet_timewait_death_row dccp_death_row = { 28struct inet_timewait_death_row dccp_death_row = {
29 .sysctl_max_tw_buckets = NR_FILE * 2, 29 .sysctl_max_tw_buckets = NR_FILE * 2,
30 .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
31 .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock),
32 .hashinfo = &dccp_hashinfo, 30 .hashinfo = &dccp_hashinfo,
33 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
34 (unsigned long)&dccp_death_row),
35 .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work,
36 inet_twdr_twkill_work),
37/* Short-time timewait calendar */
38
39 .twcal_hand = -1,
40 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
41 (unsigned long)&dccp_death_row),
42}; 31};
43 32
44EXPORT_SYMBOL_GPL(dccp_death_row); 33EXPORT_SYMBOL_GPL(dccp_death_row);
45 34
46void dccp_time_wait(struct sock *sk, int state, int timeo) 35void dccp_time_wait(struct sock *sk, int state, int timeo)
47{ 36{
48 struct inet_timewait_sock *tw = NULL; 37 struct inet_timewait_sock *tw;
49 38
50 if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets) 39 tw = inet_twsk_alloc(sk, &dccp_death_row, state);
51 tw = inet_twsk_alloc(sk, state);
52 40
53 if (tw != NULL) { 41 if (tw != NULL) {
54 const struct inet_connection_sock *icsk = inet_csk(sk); 42 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -71,8 +59,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
71 if (state == DCCP_TIME_WAIT) 59 if (state == DCCP_TIME_WAIT)
72 timeo = DCCP_TIMEWAIT_LEN; 60 timeo = DCCP_TIMEWAIT_LEN;
73 61
74 inet_twsk_schedule(tw, &dccp_death_row, timeo, 62 inet_twsk_schedule(tw, timeo);
75 DCCP_TIMEWAIT_LEN);
76 inet_twsk_put(tw); 63 inet_twsk_put(tw);
77 } else { 64 } else {
78 /* Sorry, if we're out of memory, just CLOSE this 65 /* Sorry, if we're out of memory, just CLOSE this