aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-10-02 14:43:35 -0400
committerDavid S. Miller <davem@davemloft.net>2015-10-03 07:32:43 -0400
commitca6fb06518836ef9b65dc0aac02ff97704d52a05 (patch)
tree3fbe433aac9dcf1cae49e6715ced18bd3505d811 /net/ipv4/tcp_input.c
parent1b33bc3e9e903f7293f7dfe80a875b2a5d0305aa (diff)
tcp: attach SYNACK messages to request sockets instead of listener
If a listen backlog is very big (to avoid syncookies), then the listener sk->sk_wmem_alloc is the main source of false sharing, as we need to touch it twice per SYNACK re-transmit and TX completion. (One SYN packet takes listener lock once, but up to 6 SYNACK are generated) By attaching the skb to the request socket, we remove this source of contention. Tested: listen(fd, 10485760); // single listener (no SO_REUSEPORT) 16 RX/TX queue NIC Sustain a SYNFLOOD attack of ~320,000 SYN per second, Sending ~1,400,000 SYNACK per second. Perf profiles now show listener spinlock being next bottleneck. 20.29% [kernel] [k] queued_spin_lock_slowpath 10.06% [kernel] [k] __inet_lookup_established 5.12% [kernel] [k] reqsk_timer_handler 3.22% [kernel] [k] get_next_timer_interrupt 3.00% [kernel] [k] tcp_make_synack 2.77% [kernel] [k] ipt_do_table 2.70% [kernel] [k] run_timer_softirq 2.50% [kernel] [k] ip_finish_output 2.04% [kernel] [k] cascade Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c23
1 files changed, 12 insertions, 11 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a56912772354..27108757c310 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6120,8 +6120,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6120 struct request_sock *req; 6120 struct request_sock *req;
6121 bool want_cookie = false; 6121 bool want_cookie = false;
6122 struct flowi fl; 6122 struct flowi fl;
6123 int err;
6124
6125 6123
6126 /* TW buckets are converted to open requests without 6124 /* TW buckets are converted to open requests without
6127 * limitations, they conserve resources and peer is 6125 * limitations, they conserve resources and peer is
@@ -6230,21 +6228,24 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6230 tcp_rsk(req)->snt_isn = isn; 6228 tcp_rsk(req)->snt_isn = isn;
6231 tcp_rsk(req)->txhash = net_tx_rndhash(); 6229 tcp_rsk(req)->txhash = net_tx_rndhash();
6232 tcp_openreq_init_rwin(req, sk, dst); 6230 tcp_openreq_init_rwin(req, sk, dst);
6233 if (!want_cookie) 6231 if (!want_cookie) {
6234 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); 6232 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6235 err = af_ops->send_synack(fastopen_sk ?: sk, dst, &fl, req, 6233 tcp_reqsk_record_syn(sk, req, skb);
6236 skb_get_queue_mapping(skb), &foc); 6234 }
6237 if (fastopen_sk) { 6235 if (fastopen_sk) {
6236 af_ops->send_synack(fastopen_sk, dst, &fl, req,
6237 skb_get_queue_mapping(skb), &foc, false);
6238 sock_put(fastopen_sk); 6238 sock_put(fastopen_sk);
6239 } else { 6239 } else {
6240 if (err || want_cookie)
6241 goto drop_and_free;
6242
6243 tcp_rsk(req)->tfo_listener = false; 6240 tcp_rsk(req)->tfo_listener = false;
6244 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 6241 if (!want_cookie)
6242 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
6243 af_ops->send_synack(sk, dst, &fl, req,
6244 skb_get_queue_mapping(skb), &foc, !want_cookie);
6245 if (want_cookie)
6246 goto drop_and_free;
6245 } 6247 }
6246 tcp_reqsk_record_syn(sk, req, skb); 6248 reqsk_put(req);
6247
6248 return 0; 6249 return 0;
6249 6250
6250drop_and_release: 6251drop_and_release: