diff options
author | Eric Dumazet <edumazet@google.com> | 2015-04-12 21:51:09 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-04-13 16:40:05 -0400 |
commit | 789f558cfb3680aeb52de137418637f6b04b7d22 (patch) | |
tree | 0031c54a2fe41480ed509ba140a1c12ecad075a6 /net/ipv6 | |
parent | 20a1d16526b79c76cd45e29cb637aec1d43c41de (diff) |
tcp/dccp: get rid of central timewait timer
Using a timer wheel for timewait sockets was nice ~15 years ago when
memory was expensive and machines had a single processor.
This does not scale, code is ugly and source of huge latencies
(Typically 30 ms have been seen, cpus spinning on death_lock spinlock.)
We can afford to use an extra 64 bytes per timewait sock and spread
timewait load to all cpus to have better behavior.
Tested:
On following test, /proc/sys/net/ipv4/tcp_tw_recycle is set to 1
on the target (lpaa24)
Before patch :
lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0
419594
lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0
437171
While test is running, we can observe 25 or even 33 ms latencies.
lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23
...
1000 packets transmitted, 1000 received, 0% packet loss, time 20601ms
rtt min/avg/max/mdev = 0.020/0.217/25.771/1.535 ms, pipe 2
lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23
...
1000 packets transmitted, 1000 received, 0% packet loss, time 20702ms
rtt min/avg/max/mdev = 0.019/0.183/33.761/1.441 ms, pipe 2
After patch :
About 90% increase of throughput :
lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0
810442
lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0
800992
And latencies are kept to minimal values during this load, even
if network utilization is 90% higher :
lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23
...
1000 packets transmitted, 1000 received, 0% packet loss, time 19991ms
rtt min/avg/max/mdev = 0.023/0.064/0.360/0.042 ms
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 2 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 4 |
2 files changed, 3 insertions, 3 deletions
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 033f17816ef4..871641bc1ed4 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c | |||
@@ -246,7 +246,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, | |||
246 | *twp = tw; | 246 | *twp = tw; |
247 | } else if (tw) { | 247 | } else if (tw) { |
248 | /* Silly. Should hash-dance instead... */ | 248 | /* Silly. Should hash-dance instead... */ |
249 | inet_twsk_deschedule(tw, death_row); | 249 | inet_twsk_deschedule(tw); |
250 | 250 | ||
251 | inet_twsk_put(tw); | 251 | inet_twsk_put(tw); |
252 | } | 252 | } |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f73a97f6e68e..ad51df85aa00 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -1486,7 +1486,7 @@ do_time_wait: | |||
1486 | ntohs(th->dest), tcp_v6_iif(skb)); | 1486 | ntohs(th->dest), tcp_v6_iif(skb)); |
1487 | if (sk2) { | 1487 | if (sk2) { |
1488 | struct inet_timewait_sock *tw = inet_twsk(sk); | 1488 | struct inet_timewait_sock *tw = inet_twsk(sk); |
1489 | inet_twsk_deschedule(tw, &tcp_death_row); | 1489 | inet_twsk_deschedule(tw); |
1490 | inet_twsk_put(tw); | 1490 | inet_twsk_put(tw); |
1491 | sk = sk2; | 1491 | sk = sk2; |
1492 | tcp_v6_restore_cb(skb); | 1492 | tcp_v6_restore_cb(skb); |
@@ -1728,9 +1728,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) | |||
1728 | static void get_timewait6_sock(struct seq_file *seq, | 1728 | static void get_timewait6_sock(struct seq_file *seq, |
1729 | struct inet_timewait_sock *tw, int i) | 1729 | struct inet_timewait_sock *tw, int i) |
1730 | { | 1730 | { |
1731 | long delta = tw->tw_timer.expires - jiffies; | ||
1731 | const struct in6_addr *dest, *src; | 1732 | const struct in6_addr *dest, *src; |
1732 | __u16 destp, srcp; | 1733 | __u16 destp, srcp; |
1733 | s32 delta = tw->tw_ttd - inet_tw_time_stamp(); | ||
1734 | 1734 | ||
1735 | dest = &tw->tw_v6_daddr; | 1735 | dest = &tw->tw_v6_daddr; |
1736 | src = &tw->tw_v6_rcv_saddr; | 1736 | src = &tw->tw_v6_rcv_saddr; |