aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-03-19 22:04:20 -0400
committerDavid S. Miller <davem@davemloft.net>2015-03-20 12:40:25 -0400
commitfa76ce7328b289b6edd476e24eb52fd634261720 (patch)
tree2e4c116a4e299700c185d73018bbb3518e46e1bb /net/ipv6
parent52452c542559ac980b48dbf22a30ee7fa0af507c (diff)
inet: get rid of central tcp/dccp listener timer
One of the major issue for TCP is the SYNACK rtx handling, done by inet_csk_reqsk_queue_prune(), fired by the keepalive timer of a TCP_LISTEN socket. This function runs for awful long times, with socket lock held, meaning that other cpus needing this lock have to spin for hundred of ms. SYNACK are sent in huge bursts, likely to cause severe drops anyway. This model was OK 15 years ago when memory was very tight. We now can afford to have a timer per request sock. Timer invocations no longer need to lock the listener, and can be run from all cpus in parallel. With following patch increasing somaxconn width to 32 bits, I tested a listener with more than 4 million active request sockets, and a steady SYNFLOOD of ~200,000 SYN per second. Host was sending ~830,000 SYNACK per second. This is ~100 times more what we could achieve before this patch. Later, we will get rid of the listener hash and use ehash instead. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/inet6_connection_sock.c19
-rw-r--r--net/ipv6/syncookies.c1
-rw-r--r--net/ipv6/tcp_ipv6.c12
3 files changed, 18 insertions, 14 deletions
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index b7acb9ebc4f5..2f3bbe569e8f 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -112,21 +112,20 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
112 return c & (synq_hsize - 1); 112 return c & (synq_hsize - 1);
113} 113}
114 114
115struct request_sock *inet6_csk_search_req(const struct sock *sk, 115struct request_sock *inet6_csk_search_req(struct sock *sk,
116 const __be16 rport, 116 const __be16 rport,
117 const struct in6_addr *raddr, 117 const struct in6_addr *raddr,
118 const struct in6_addr *laddr, 118 const struct in6_addr *laddr,
119 const int iif) 119 const int iif)
120{ 120{
121 const struct inet_connection_sock *icsk = inet_csk(sk); 121 struct inet_connection_sock *icsk = inet_csk(sk);
122 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 122 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
123 struct request_sock *req; 123 struct request_sock *req;
124 u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd,
125 lopt->nr_table_entries);
124 126
125 for (req = lopt->syn_table[inet6_synq_hash(raddr, rport, 127 write_lock(&icsk->icsk_accept_queue.syn_wait_lock);
126 lopt->hash_rnd, 128 for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
127 lopt->nr_table_entries)];
128 req != NULL;
129 req = req->dl_next) {
130 const struct inet_request_sock *ireq = inet_rsk(req); 129 const struct inet_request_sock *ireq = inet_rsk(req);
131 130
132 if (ireq->ir_rmt_port == rport && 131 if (ireq->ir_rmt_port == rport &&
@@ -134,12 +133,14 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
134 ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && 133 ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
135 ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && 134 ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
136 (!ireq->ir_iif || ireq->ir_iif == iif)) { 135 (!ireq->ir_iif || ireq->ir_iif == iif)) {
136 atomic_inc(&req->rsk_refcnt);
137 WARN_ON(req->sk != NULL); 137 WARN_ON(req->sk != NULL);
138 return req; 138 break;
139 } 139 }
140 } 140 }
141 write_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
141 142
142 return NULL; 143 return req;
143} 144}
144EXPORT_SYMBOL_GPL(inet6_csk_search_req); 145EXPORT_SYMBOL_GPL(inet6_csk_search_req);
145 146
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index da5823e5e5a7..2819137fc87d 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -222,7 +222,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
222 222
223 ireq->ir_mark = inet_request_mark(sk, skb); 223 ireq->ir_mark = inet_request_mark(sk, skb);
224 224
225 req->expires = 0UL;
226 req->num_retrans = 0; 225 req->num_retrans = 0;
227 ireq->snd_wscale = tcp_opt.snd_wscale; 226 ireq->snd_wscale = tcp_opt.snd_wscale;
228 ireq->sack_ok = tcp_opt.sack_ok; 227 ireq->sack_ok = tcp_opt.sack_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 146f123b52c9..6e3f90db038c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -421,11 +421,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
421 421
422 if (seq != tcp_rsk(req)->snt_isn) { 422 if (seq != tcp_rsk(req)->snt_isn) {
423 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 423 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
424 reqsk_put(req);
424 goto out; 425 goto out;
425 } 426 }
426 427
427 inet_csk_reqsk_queue_drop(sk, req); 428 inet_csk_reqsk_queue_drop(sk, req);
428 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 429 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
430 reqsk_put(req);
429 goto out; 431 goto out;
430 432
431 case TCP_SYN_SENT: 433 case TCP_SYN_SENT:
@@ -988,9 +990,11 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
988 req = inet6_csk_search_req(sk, th->source, 990 req = inet6_csk_search_req(sk, th->source,
989 &ipv6_hdr(skb)->saddr, 991 &ipv6_hdr(skb)->saddr,
990 &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); 992 &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
991 if (req) 993 if (req) {
992 return tcp_check_req(sk, skb, req, false); 994 nsk = tcp_check_req(sk, skb, req, false);
993 995 reqsk_put(req);
996 return nsk;
997 }
994 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, 998 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
995 &ipv6_hdr(skb)->saddr, th->source, 999 &ipv6_hdr(skb)->saddr, th->source,
996 &ipv6_hdr(skb)->daddr, ntohs(th->dest), 1000 &ipv6_hdr(skb)->daddr, ntohs(th->dest),
@@ -1670,7 +1674,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
1670static void get_openreq6(struct seq_file *seq, 1674static void get_openreq6(struct seq_file *seq,
1671 struct request_sock *req, int i, kuid_t uid) 1675 struct request_sock *req, int i, kuid_t uid)
1672{ 1676{
1673 int ttd = req->expires - jiffies; 1677 long ttd = req->rsk_timer.expires - jiffies;
1674 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1678 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1675 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1679 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1676 1680