aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-03-19 22:04:20 -0400
committerDavid S. Miller <davem@davemloft.net>2015-03-20 12:40:25 -0400
commitfa76ce7328b289b6edd476e24eb52fd634261720 (patch)
tree2e4c116a4e299700c185d73018bbb3518e46e1bb /net/ipv4/tcp_ipv4.c
parent52452c542559ac980b48dbf22a30ee7fa0af507c (diff)
inet: get rid of central tcp/dccp listener timer
One of the major issue for TCP is the SYNACK rtx handling, done by inet_csk_reqsk_queue_prune(), fired by the keepalive timer of a TCP_LISTEN socket. This function runs for awful long times, with socket lock held, meaning that other cpus needing this lock have to spin for hundred of ms. SYNACK are sent in huge bursts, likely to cause severe drops anyway. This model was OK 15 years ago when memory was very tight. We now can afford to have a timer per request sock. Timer invocations no longer need to lock the listener, and can be run from all cpus in parallel. With following patch increasing somaxconn width to 32 bits, I tested a listener with more than 4 million active request sockets, and a steady SYNFLOOD of ~200,000 SYN per second. Host was sending ~830,000 SYNACK per second. This is ~100 times more what we could achieve before this patch. Later, we will get rid of the listener hash and use ehash instead. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c11
1 files changed, 8 insertions, 3 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 19c3770f1e97..5554b8f33d41 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -475,6 +475,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
475 475
476 if (seq != tcp_rsk(req)->snt_isn) { 476 if (seq != tcp_rsk(req)->snt_isn) {
477 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 477 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
478 reqsk_put(req);
478 goto out; 479 goto out;
479 } 480 }
480 481
@@ -486,6 +487,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
486 */ 487 */
487 inet_csk_reqsk_queue_drop(sk, req); 488 inet_csk_reqsk_queue_drop(sk, req);
488 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 489 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
490 reqsk_put(req);
489 goto out; 491 goto out;
490 492
491 case TCP_SYN_SENT: 493 case TCP_SYN_SENT:
@@ -1398,8 +1400,11 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1398 struct sock *nsk; 1400 struct sock *nsk;
1399 1401
1400 req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); 1402 req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
1401 if (req) 1403 if (req) {
1402 return tcp_check_req(sk, skb, req, false); 1404 nsk = tcp_check_req(sk, skb, req, false);
1405 reqsk_put(req);
1406 return nsk;
1407 }
1403 1408
1404 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1409 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1405 th->source, iph->daddr, th->dest, inet_iif(skb)); 1410 th->source, iph->daddr, th->dest, inet_iif(skb));
@@ -2208,7 +2213,7 @@ static void get_openreq4(const struct request_sock *req,
2208 struct seq_file *f, int i, kuid_t uid) 2213 struct seq_file *f, int i, kuid_t uid)
2209{ 2214{
2210 const struct inet_request_sock *ireq = inet_rsk(req); 2215 const struct inet_request_sock *ireq = inet_rsk(req);
2211 long delta = req->expires - jiffies; 2216 long delta = req->rsk_timer.expires - jiffies;
2212 2217
2213 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2218 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2214 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", 2219 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",