aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-10-05 05:46:26 -0400
committerDavid S. Miller <davem@davemloft.net>2015-10-05 05:46:26 -0400
commit2472186f58ee1e4b9ca194245fef03931f6de90a (patch)
tree75c793a05dd64db3df05f804e429bae45ead20d1
parent3e087caa23ef36370bfb925d3bbca78e8302d3ce (diff)
parenta1a5344ddbe8fd3e080013b317ac9a664490cfdf (diff)
Merge branch 'tcp-listener-fixes-and-improvement'
Eric Dumazet says: ==================== tcp: lockless listener fixes and improvement This fixes issues with TCP FastOpen vs lockless listeners, and SYNACK being attached to request sockets. Then, last patch brings performance improvement for syncookies generation and validation. Tested under a 4.3 Mpps SYNFLOOD attack, new perf profile looks like : 12.11% [kernel] [k] sha_transform 5.83% [kernel] [k] tcp_conn_request 4.59% [kernel] [k] __inet_lookup_listener 4.11% [kernel] [k] ipt_do_table 3.91% [kernel] [k] tcp_make_synack 3.05% [kernel] [k] fib_table_lookup 2.74% [kernel] [k] sock_wfree 2.66% [kernel] [k] memcpy_erms 2.12% [kernel] [k] tcp_v4_rcv ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/ipv6.h4
-rw-r--r--include/net/inet_sock.h3
-rw-r--r--include/net/ip.h9
-rw-r--r--include/net/request_sock.h12
-rw-r--r--net/core/dev.c1
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp_fastopen.c26
-rw-r--r--net/ipv4/tcp_input.c14
-rw-r--r--net/ipv6/syncookies.c2
11 files changed, 41 insertions, 36 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index f1f32af6d9b9..0ef2a97ccdb5 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -264,9 +264,9 @@ struct tcp6_timewait_sock {
264}; 264};
265 265
266#if IS_ENABLED(CONFIG_IPV6) 266#if IS_ENABLED(CONFIG_IPV6)
267static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) 267static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk)
268{ 268{
269 return inet_sk(__sk)->pinet6; 269 return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL;
270} 270}
271 271
272static inline struct raw6_sock *raw6_sk(const struct sock *sk) 272static inline struct raw6_sock *raw6_sk(const struct sock *sk)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 47eb67b08abd..f5bf7310e334 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -245,7 +245,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr,
245} 245}
246 246
247struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, 247struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
248 struct sock *sk_listener); 248 struct sock *sk_listener,
249 bool attach_listener);
249 250
250static inline __u8 inet_sk_flowi_flags(const struct sock *sk) 251static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
251{ 252{
diff --git a/include/net/ip.h b/include/net/ip.h
index 91a6b2c88341..aa7811993276 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -323,12 +323,15 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
323 323
324static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) 324static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
325{ 325{
326 if (!skb->sk || ip_sk_use_pmtu(skb->sk)) { 326 struct sock *sk = skb->sk;
327
328 if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
327 bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; 329 bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
330
328 return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); 331 return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
329 } else {
330 return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
331 } 332 }
333
334 return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
332} 335}
333 336
334u32 ip_idents_reserve(u32 hash, int segs); 337u32 ip_idents_reserve(u32 hash, int segs);
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index dd423d840852..95ab5d7aab96 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -80,7 +80,8 @@ static inline struct sock *req_to_sk(struct request_sock *req)
80} 80}
81 81
82static inline struct request_sock * 82static inline struct request_sock *
83reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) 83reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
84 bool attach_listener)
84{ 85{
85 struct request_sock *req; 86 struct request_sock *req;
86 87
@@ -88,10 +89,15 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
88 89
89 if (req) { 90 if (req) {
90 req->rsk_ops = ops; 91 req->rsk_ops = ops;
91 sock_hold(sk_listener); 92 if (attach_listener) {
92 req->rsk_listener = sk_listener; 93 sock_hold(sk_listener);
94 req->rsk_listener = sk_listener;
95 } else {
96 req->rsk_listener = NULL;
97 }
93 req_to_sk(req)->sk_prot = sk_listener->sk_prot; 98 req_to_sk(req)->sk_prot = sk_listener->sk_prot;
94 sk_node_init(&req_to_sk(req)->sk_node); 99 sk_node_init(&req_to_sk(req)->sk_node);
100 sk_tx_queue_clear(req_to_sk(req));
95 req->saved_syn = NULL; 101 req->saved_syn = NULL;
96 /* Following is temporary. It is coupled with debugging 102 /* Following is temporary. It is coupled with debugging
97 * helpers in reqsk_put() & reqsk_free() 103 * helpers in reqsk_put() & reqsk_free()
diff --git a/net/core/dev.c b/net/core/dev.c
index 323c04edd779..a229bf0d649d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2974,6 +2974,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
2974 new_index = skb_tx_hash(dev, skb); 2974 new_index = skb_tx_hash(dev, skb);
2975 2975
2976 if (queue_index != new_index && sk && 2976 if (queue_index != new_index && sk &&
2977 sk_fullsock(sk) &&
2977 rcu_access_pointer(sk->sk_dst_cache)) 2978 rcu_access_pointer(sk->sk_dst_cache))
2978 sk_tx_queue_set(sk, new_index); 2979 sk_tx_queue_set(sk, new_index);
2979 2980
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8910c9567719..8e99681c8189 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -595,7 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
595 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 595 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
596 goto drop; 596 goto drop;
597 597
598 req = inet_reqsk_alloc(&dccp_request_sock_ops, sk); 598 req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true);
599 if (req == NULL) 599 if (req == NULL)
600 goto drop; 600 goto drop;
601 601
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1361a3f45df7..aed314f8c7c6 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -319,7 +319,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
319 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 319 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
320 goto drop; 320 goto drop;
321 321
322 req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk); 322 req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
323 if (req == NULL) 323 if (req == NULL)
324 goto drop; 324 goto drop;
325 325
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 729ceb5f63c6..8113c30ccf96 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -326,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
326 goto out; 326 goto out;
327 327
328 ret = NULL; 328 ret = NULL;
329 req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */ 329 req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
330 if (!req) 330 if (!req)
331 goto out; 331 goto out;
332 332
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 410ac481fda0..93396bf7b475 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -168,8 +168,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
168 TCP_TIMEOUT_INIT, TCP_RTO_MAX); 168 TCP_TIMEOUT_INIT, TCP_RTO_MAX);
169 169
170 atomic_set(&req->rsk_refcnt, 2); 170 atomic_set(&req->rsk_refcnt, 2);
171 /* Add the child socket directly into the accept queue */
172 inet_csk_reqsk_queue_add(sk, req, child);
173 171
174 /* Now finish processing the fastopen child socket. */ 172 /* Now finish processing the fastopen child socket. */
175 inet_csk(child)->icsk_af_ops->rebuild_header(child); 173 inet_csk(child)->icsk_af_ops->rebuild_header(child);
@@ -178,12 +176,10 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
178 tcp_init_metrics(child); 176 tcp_init_metrics(child);
179 tcp_init_buffer_space(child); 177 tcp_init_buffer_space(child);
180 178
181 /* Queue the data carried in the SYN packet. We need to first 179 /* Queue the data carried in the SYN packet.
182 * bump skb's refcnt because the caller will attempt to free it. 180 * We used to play tricky games with skb_get().
183 * Note that IPv6 might also have used skb_get() trick 181 * With lockless listener, it is a dead end.
184 * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts) 182 * Do not think about it.
185 * So we need to eventually get a clone of the packet,
186 * before inserting it in sk_receive_queue.
187 * 183 *
188 * XXX (TFO) - we honor a zero-payload TFO request for now, 184 * XXX (TFO) - we honor a zero-payload TFO request for now,
189 * (any reason not to?) but no need to queue the skb since 185 * (any reason not to?) but no need to queue the skb since
@@ -191,12 +187,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
191 */ 187 */
192 end_seq = TCP_SKB_CB(skb)->end_seq; 188 end_seq = TCP_SKB_CB(skb)->end_seq;
193 if (end_seq != TCP_SKB_CB(skb)->seq + 1) { 189 if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
194 struct sk_buff *skb2; 190 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
195
196 if (unlikely(skb_shared(skb)))
197 skb2 = skb_clone(skb, GFP_ATOMIC);
198 else
199 skb2 = skb_get(skb);
200 191
201 if (likely(skb2)) { 192 if (likely(skb2)) {
202 skb_dst_drop(skb2); 193 skb_dst_drop(skb2);
@@ -214,12 +205,9 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
214 } 205 }
215 } 206 }
216 tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq; 207 tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
217 sk->sk_data_ready(sk); 208 /* tcp_conn_request() is sending the SYNACK,
218 bh_unlock_sock(child); 209 * and queues the child into listener accept queue.
219 /* Note: sock_put(child) will be done by tcp_conn_request()
220 * after SYNACK packet is sent.
221 */ 210 */
222 WARN_ON(!req->sk);
223 return child; 211 return child;
224} 212}
225 213
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 27108757c310..ddadb318e850 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6042,9 +6042,11 @@ static void tcp_openreq_init(struct request_sock *req,
6042} 6042}
6043 6043
6044struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, 6044struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6045 struct sock *sk_listener) 6045 struct sock *sk_listener,
6046 bool attach_listener)
6046{ 6047{
6047 struct request_sock *req = reqsk_alloc(ops, sk_listener); 6048 struct request_sock *req = reqsk_alloc(ops, sk_listener,
6049 attach_listener);
6048 6050
6049 if (req) { 6051 if (req) {
6050 struct inet_request_sock *ireq = inet_rsk(req); 6052 struct inet_request_sock *ireq = inet_rsk(req);
@@ -6143,7 +6145,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6143 goto drop; 6145 goto drop;
6144 } 6146 }
6145 6147
6146 req = inet_reqsk_alloc(rsk_ops, sk); 6148 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
6147 if (!req) 6149 if (!req)
6148 goto drop; 6150 goto drop;
6149 6151
@@ -6229,12 +6231,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6229 tcp_rsk(req)->txhash = net_tx_rndhash(); 6231 tcp_rsk(req)->txhash = net_tx_rndhash();
6230 tcp_openreq_init_rwin(req, sk, dst); 6232 tcp_openreq_init_rwin(req, sk, dst);
6231 if (!want_cookie) { 6233 if (!want_cookie) {
6232 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6233 tcp_reqsk_record_syn(sk, req, skb); 6234 tcp_reqsk_record_syn(sk, req, skb);
6235 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6234 } 6236 }
6235 if (fastopen_sk) { 6237 if (fastopen_sk) {
6236 af_ops->send_synack(fastopen_sk, dst, &fl, req, 6238 af_ops->send_synack(fastopen_sk, dst, &fl, req,
6237 skb_get_queue_mapping(skb), &foc, false); 6239 skb_get_queue_mapping(skb), &foc, false);
6240 /* Add the child socket directly into the accept queue */
6241 inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
6242 sk->sk_data_ready(sk);
6243 bh_unlock_sock(fastopen_sk);
6238 sock_put(fastopen_sk); 6244 sock_put(fastopen_sk);
6239 } else { 6245 } else {
6240 tcp_rsk(req)->tfo_listener = false; 6246 tcp_rsk(req)->tfo_listener = false;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7606eba83e7b..f610b5310b17 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -170,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
170 goto out; 170 goto out;
171 171
172 ret = NULL; 172 ret = NULL;
173 req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk); 173 req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
174 if (!req) 174 if (!req)
175 goto out; 175 goto out;
176 176