aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-04-12 21:51:09 -0400
committerDavid S. Miller <davem@davemloft.net>2015-04-13 16:40:05 -0400
commit789f558cfb3680aeb52de137418637f6b04b7d22 (patch)
tree0031c54a2fe41480ed509ba140a1c12ecad075a6 /net/ipv6
parent20a1d16526b79c76cd45e29cb637aec1d43c41de (diff)
tcp/dccp: get rid of central timewait timer
Using a timer wheel for timewait sockets was nice ~15 years ago when memory was expensive and machines had a single processor. This does not scale, code is ugly and source of huge latencies (Typically 30 ms have been seen, cpus spinning on death_lock spinlock.) We can afford to use an extra 64 bytes per timewait sock and spread timewait load to all cpus to have better behavior. Tested: On following test, /proc/sys/net/ipv4/tcp_tw_recycle is set to 1 on the target (lpaa24) Before patch : lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 419594 lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 437171 While test is running, we can observe 25 or even 33 ms latencies. lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 20601ms rtt min/avg/max/mdev = 0.020/0.217/25.771/1.535 ms, pipe 2 lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 20702ms rtt min/avg/max/mdev = 0.019/0.183/33.761/1.441 ms, pipe 2 After patch : About 90% increase of throughput : lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 810442 lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 800992 And latencies are kept to minimal values during this load, even if network utilization is 90% higher : lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 19991ms rtt min/avg/max/mdev = 0.023/0.064/0.360/0.042 ms Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/tcp_ipv6.c4
2 files changed, 3 insertions, 3 deletions
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 033f17816ef4..871641bc1ed4 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -246,7 +246,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
246 *twp = tw; 246 *twp = tw;
247 } else if (tw) { 247 } else if (tw) {
248 /* Silly. Should hash-dance instead... */ 248 /* Silly. Should hash-dance instead... */
249 inet_twsk_deschedule(tw, death_row); 249 inet_twsk_deschedule(tw);
250 250
251 inet_twsk_put(tw); 251 inet_twsk_put(tw);
252 } 252 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f73a97f6e68e..ad51df85aa00 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1486,7 +1486,7 @@ do_time_wait:
1486 ntohs(th->dest), tcp_v6_iif(skb)); 1486 ntohs(th->dest), tcp_v6_iif(skb));
1487 if (sk2) { 1487 if (sk2) {
1488 struct inet_timewait_sock *tw = inet_twsk(sk); 1488 struct inet_timewait_sock *tw = inet_twsk(sk);
1489 inet_twsk_deschedule(tw, &tcp_death_row); 1489 inet_twsk_deschedule(tw);
1490 inet_twsk_put(tw); 1490 inet_twsk_put(tw);
1491 sk = sk2; 1491 sk = sk2;
1492 tcp_v6_restore_cb(skb); 1492 tcp_v6_restore_cb(skb);
@@ -1728,9 +1728,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1728static void get_timewait6_sock(struct seq_file *seq, 1728static void get_timewait6_sock(struct seq_file *seq,
1729 struct inet_timewait_sock *tw, int i) 1729 struct inet_timewait_sock *tw, int i)
1730{ 1730{
1731 long delta = tw->tw_timer.expires - jiffies;
1731 const struct in6_addr *dest, *src; 1732 const struct in6_addr *dest, *src;
1732 __u16 destp, srcp; 1733 __u16 destp, srcp;
1733 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
1734 1734
1735 dest = &tw->tw_v6_daddr; 1735 dest = &tw->tw_v6_daddr;
1736 src = &tw->tw_v6_rcv_saddr; 1736 src = &tw->tw_v6_rcv_saddr;