aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-03-19 22:04:20 -0400
committerDavid S. Miller <davem@davemloft.net>2015-03-20 12:40:25 -0400
commitfa76ce7328b289b6edd476e24eb52fd634261720 (patch)
tree2e4c116a4e299700c185d73018bbb3518e46e1bb /net/dccp
parent52452c542559ac980b48dbf22a30ee7fa0af507c (diff)
inet: get rid of central tcp/dccp listener timer
One of the major issue for TCP is the SYNACK rtx handling, done by inet_csk_reqsk_queue_prune(), fired by the keepalive timer of a TCP_LISTEN socket. This function runs for awful long times, with socket lock held, meaning that other cpus needing this lock have to spin for hundred of ms. SYNACK are sent in huge bursts, likely to cause severe drops anyway. This model was OK 15 years ago when memory was very tight. We now can afford to have a timer per request sock. Timer invocations no longer need to lock the listener, and can be run from all cpus in parallel. With following patch increasing somaxconn width to 32 bits, I tested a listener with more than 4 million active request sockets, and a steady SYNFLOOD of ~200,000 SYN per second. Host was sending ~830,000 SYNACK per second. This is ~100 times more what we could achieve before this patch. Later, we will get rid of the listener hash and use ehash instead. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/ipv4.c10
-rw-r--r--net/dccp/ipv6.c12
-rw-r--r--net/dccp/timer.c24
3 files changed, 16 insertions, 30 deletions
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 5bffbbaf1fac..25a9615b3b88 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -306,6 +306,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
306 if (!between48(seq, dccp_rsk(req)->dreq_iss, 306 if (!between48(seq, dccp_rsk(req)->dreq_iss,
307 dccp_rsk(req)->dreq_gss)) { 307 dccp_rsk(req)->dreq_gss)) {
308 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 308 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
309 reqsk_put(req);
309 goto out; 310 goto out;
310 } 311 }
311 /* 312 /*
@@ -315,6 +316,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
315 * errors returned from accept(). 316 * errors returned from accept().
316 */ 317 */
317 inet_csk_reqsk_queue_drop(sk, req); 318 inet_csk_reqsk_queue_drop(sk, req);
319 reqsk_put(req);
318 goto out; 320 goto out;
319 321
320 case DCCP_REQUESTING: 322 case DCCP_REQUESTING:
@@ -451,9 +453,11 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
451 /* Find possible connection requests. */ 453 /* Find possible connection requests. */
452 struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport, 454 struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport,
453 iph->saddr, iph->daddr); 455 iph->saddr, iph->daddr);
454 if (req) 456 if (req) {
455 return dccp_check_req(sk, skb, req); 457 nsk = dccp_check_req(sk, skb, req);
456 458 reqsk_put(req);
459 return nsk;
460 }
457 nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, 461 nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
458 iph->saddr, dh->dccph_sport, 462 iph->saddr, dh->dccph_sport,
459 iph->daddr, dh->dccph_dport, 463 iph->daddr, dh->dccph_dport,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ae2184039fe3..69d8f13895ba 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -157,7 +157,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
157 req = inet6_csk_search_req(sk, dh->dccph_dport, 157 req = inet6_csk_search_req(sk, dh->dccph_dport,
158 &hdr->daddr, &hdr->saddr, 158 &hdr->daddr, &hdr->saddr,
159 inet6_iif(skb)); 159 inet6_iif(skb));
160 if (req == NULL) 160 if (!req)
161 goto out; 161 goto out;
162 162
163 /* 163 /*
@@ -169,10 +169,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
169 if (!between48(seq, dccp_rsk(req)->dreq_iss, 169 if (!between48(seq, dccp_rsk(req)->dreq_iss,
170 dccp_rsk(req)->dreq_gss)) { 170 dccp_rsk(req)->dreq_gss)) {
171 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 171 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
172 reqsk_put(req);
172 goto out; 173 goto out;
173 } 174 }
174 175
175 inet_csk_reqsk_queue_drop(sk, req); 176 inet_csk_reqsk_queue_drop(sk, req);
177 reqsk_put(req);
176 goto out; 178 goto out;
177 179
178 case DCCP_REQUESTING: 180 case DCCP_REQUESTING:
@@ -322,9 +324,11 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
322 324
323 req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr, 325 req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr,
324 &iph->daddr, inet6_iif(skb)); 326 &iph->daddr, inet6_iif(skb));
325 if (req != NULL) 327 if (req) {
326 return dccp_check_req(sk, skb, req); 328 nsk = dccp_check_req(sk, skb, req);
327 329 reqsk_put(req);
330 return nsk;
331 }
328 nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, 332 nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo,
329 &iph->saddr, dh->dccph_sport, 333 &iph->saddr, dh->dccph_sport,
330 &iph->daddr, ntohs(dh->dccph_dport), 334 &iph->daddr, ntohs(dh->dccph_dport),
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1cd46a345cb0..3ef7acef3ce8 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -161,33 +161,11 @@ out:
161 sock_put(sk); 161 sock_put(sk);
162} 162}
163 163
164/*
165 * Timer for listening sockets
166 */
167static void dccp_response_timer(struct sock *sk)
168{
169 inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT,
170 DCCP_RTO_MAX);
171}
172
173static void dccp_keepalive_timer(unsigned long data) 164static void dccp_keepalive_timer(unsigned long data)
174{ 165{
175 struct sock *sk = (struct sock *)data; 166 struct sock *sk = (struct sock *)data;
176 167
177 /* Only process if socket is not in use. */ 168 pr_err("dccp should not use a keepalive timer !\n");
178 bh_lock_sock(sk);
179 if (sock_owned_by_user(sk)) {
180 /* Try again later. */
181 inet_csk_reset_keepalive_timer(sk, HZ / 20);
182 goto out;
183 }
184
185 if (sk->sk_state == DCCP_LISTEN) {
186 dccp_response_timer(sk);
187 goto out;
188 }
189out:
190 bh_unlock_sock(sk);
191 sock_put(sk); 169 sock_put(sk);
192} 170}
193 171