diff options
author | Eric Dumazet <edumazet@google.com> | 2015-10-02 14:43:35 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-10-03 07:32:43 -0400 |
commit | ca6fb06518836ef9b65dc0aac02ff97704d52a05 (patch) | |
tree | 3fbe433aac9dcf1cae49e6715ced18bd3505d811 /net/ipv4/tcp_input.c | |
parent | 1b33bc3e9e903f7293f7dfe80a875b2a5d0305aa (diff) |
tcp: attach SYNACK messages to request sockets instead of listener
If a listen backlog is very big (to avoid syncookies), then
the listener sk->sk_wmem_alloc is the main source of false
sharing, as we need to touch it twice per SYNACK re-transmit
and TX completion.
(One SYN packet takes listener lock once, but up to 6 SYNACK
are generated)
By attaching the skb to the request socket, we remove this
source of contention.
Tested:
listen(fd, 10485760); // single listener (no SO_REUSEPORT)
16 RX/TX queue NIC
Sustain a SYNFLOOD attack of ~320,000 SYN per second,
Sending ~1,400,000 SYNACK per second.
Perf profiles now show listener spinlock being next bottleneck.
20.29% [kernel] [k] queued_spin_lock_slowpath
10.06% [kernel] [k] __inet_lookup_established
5.12% [kernel] [k] reqsk_timer_handler
3.22% [kernel] [k] get_next_timer_interrupt
3.00% [kernel] [k] tcp_make_synack
2.77% [kernel] [k] ipt_do_table
2.70% [kernel] [k] run_timer_softirq
2.50% [kernel] [k] ip_finish_output
2.04% [kernel] [k] cascade
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 23 |
1 files changed, 12 insertions, 11 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a56912772354..27108757c310 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -6120,8 +6120,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
6120 | struct request_sock *req; | 6120 | struct request_sock *req; |
6121 | bool want_cookie = false; | 6121 | bool want_cookie = false; |
6122 | struct flowi fl; | 6122 | struct flowi fl; |
6123 | int err; | ||
6124 | |||
6125 | 6123 | ||
6126 | /* TW buckets are converted to open requests without | 6124 | /* TW buckets are converted to open requests without |
6127 | * limitations, they conserve resources and peer is | 6125 | * limitations, they conserve resources and peer is |
@@ -6230,21 +6228,24 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
6230 | tcp_rsk(req)->snt_isn = isn; | 6228 | tcp_rsk(req)->snt_isn = isn; |
6231 | tcp_rsk(req)->txhash = net_tx_rndhash(); | 6229 | tcp_rsk(req)->txhash = net_tx_rndhash(); |
6232 | tcp_openreq_init_rwin(req, sk, dst); | 6230 | tcp_openreq_init_rwin(req, sk, dst); |
6233 | if (!want_cookie) | 6231 | if (!want_cookie) { |
6234 | fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); | 6232 | fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); |
6235 | err = af_ops->send_synack(fastopen_sk ?: sk, dst, &fl, req, | 6233 | tcp_reqsk_record_syn(sk, req, skb); |
6236 | skb_get_queue_mapping(skb), &foc); | 6234 | } |
6237 | if (fastopen_sk) { | 6235 | if (fastopen_sk) { |
6236 | af_ops->send_synack(fastopen_sk, dst, &fl, req, | ||
6237 | skb_get_queue_mapping(skb), &foc, false); | ||
6238 | sock_put(fastopen_sk); | 6238 | sock_put(fastopen_sk); |
6239 | } else { | 6239 | } else { |
6240 | if (err || want_cookie) | ||
6241 | goto drop_and_free; | ||
6242 | |||
6243 | tcp_rsk(req)->tfo_listener = false; | 6240 | tcp_rsk(req)->tfo_listener = false; |
6244 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | 6241 | if (!want_cookie) |
6242 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | ||
6243 | af_ops->send_synack(sk, dst, &fl, req, | ||
6244 | skb_get_queue_mapping(skb), &foc, !want_cookie); | ||
6245 | if (want_cookie) | ||
6246 | goto drop_and_free; | ||
6245 | } | 6247 | } |
6246 | tcp_reqsk_record_syn(sk, req, skb); | 6248 | reqsk_put(req); |
6247 | |||
6248 | return 0; | 6249 | return 0; |
6249 | 6250 | ||
6250 | drop_and_release: | 6251 | drop_and_release: |