diff options
author | Yuchung Cheng <ycheng@google.com> | 2014-05-11 23:22:11 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-05-13 17:53:02 -0400 |
commit | 843f4a55e336e6d0c7bb92e7f9621535bc8d5fcd (patch) | |
tree | 17010fcb1b56174476b471758c3ca4f825ccbe7f /net/ipv4 | |
parent | 89278c9dc922272df921042aafa18311f3398c6c (diff) |
tcp: use tcp_v4_send_synack on first SYN-ACK
To avoid large code duplication in IPv6, we need to first simplify
the complicate SYN-ACK sending code in tcp_v4_conn_request().
To use tcp_v4(6)_send_synack() to send all SYN-ACKs, we need to
initialize the mini socket's receive window before trying to
create the child socket and/or building the SYN-ACK packet. So we move
that initialization from tcp_make_synack() to tcp_v4_conn_request()
as a new function tcp_openreq_init_req_rwin().
After this refactoring the SYN-ACK sending code is simpler and easier
to implement Fast Open for IPv6.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Daniel Lee <longinus00@gmail.com>
Signed-off-by: Jerry Chu <hkchu@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 67 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 57 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 21 |
4 files changed, 78 insertions, 98 deletions
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 5a98277b9a82..9b947a9aaf6e 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c | |||
@@ -95,34 +95,22 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, | |||
95 | rcu_read_unlock(); | 95 | rcu_read_unlock(); |
96 | } | 96 | } |
97 | 97 | ||
98 | int tcp_fastopen_create_child(struct sock *sk, | 98 | static bool tcp_fastopen_create_child(struct sock *sk, |
99 | struct sk_buff *skb, | 99 | struct sk_buff *skb, |
100 | struct sk_buff *skb_synack, | 100 | struct dst_entry *dst, |
101 | struct request_sock *req) | 101 | struct request_sock *req) |
102 | { | 102 | { |
103 | struct tcp_sock *tp = tcp_sk(sk); | 103 | struct tcp_sock *tp = tcp_sk(sk); |
104 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; | 104 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; |
105 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
106 | struct sock *child; | 105 | struct sock *child; |
107 | int err; | ||
108 | 106 | ||
109 | req->num_retrans = 0; | 107 | req->num_retrans = 0; |
110 | req->num_timeout = 0; | 108 | req->num_timeout = 0; |
111 | req->sk = NULL; | 109 | req->sk = NULL; |
112 | 110 | ||
113 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); | 111 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); |
114 | if (child == NULL) { | 112 | if (child == NULL) |
115 | NET_INC_STATS_BH(sock_net(sk), | 113 | return false; |
116 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL); | ||
117 | kfree_skb(skb_synack); | ||
118 | return -1; | ||
119 | } | ||
120 | err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, | ||
121 | ireq->ir_rmt_addr, ireq->opt); | ||
122 | err = net_xmit_eval(err); | ||
123 | if (!err) | ||
124 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
125 | /* XXX (TFO) - is it ok to ignore error and continue? */ | ||
126 | 114 | ||
127 | spin_lock(&queue->fastopenq->lock); | 115 | spin_lock(&queue->fastopenq->lock); |
128 | queue->fastopenq->qlen++; | 116 | queue->fastopenq->qlen++; |
@@ -167,28 +155,24 @@ int tcp_fastopen_create_child(struct sock *sk, | |||
167 | /* Queue the data carried in the SYN packet. We need to first | 155 | /* Queue the data carried in the SYN packet. We need to first |
168 | * bump skb's refcnt because the caller will attempt to free it. | 156 | * bump skb's refcnt because the caller will attempt to free it. |
169 | * | 157 | * |
170 | * XXX (TFO) - we honor a zero-payload TFO request for now. | 158 | * XXX (TFO) - we honor a zero-payload TFO request for now, |
171 | * (Any reason not to?) | 159 | * (any reason not to?) but no need to queue the skb since |
160 | * there is no data. How about SYN+FIN? | ||
172 | */ | 161 | */ |
173 | if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { | 162 | if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) { |
174 | /* Don't queue the skb if there is no payload in SYN. | ||
175 | * XXX (TFO) - How about SYN+FIN? | ||
176 | */ | ||
177 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
178 | } else { | ||
179 | skb = skb_get(skb); | 163 | skb = skb_get(skb); |
180 | skb_dst_drop(skb); | 164 | skb_dst_drop(skb); |
181 | __skb_pull(skb, tcp_hdr(skb)->doff * 4); | 165 | __skb_pull(skb, tcp_hdr(skb)->doff * 4); |
182 | skb_set_owner_r(skb, child); | 166 | skb_set_owner_r(skb, child); |
183 | __skb_queue_tail(&child->sk_receive_queue, skb); | 167 | __skb_queue_tail(&child->sk_receive_queue, skb); |
184 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
185 | tp->syn_data_acked = 1; | 168 | tp->syn_data_acked = 1; |
186 | } | 169 | } |
170 | tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
187 | sk->sk_data_ready(sk); | 171 | sk->sk_data_ready(sk); |
188 | bh_unlock_sock(child); | 172 | bh_unlock_sock(child); |
189 | sock_put(child); | 173 | sock_put(child); |
190 | WARN_ON(req->sk == NULL); | 174 | WARN_ON(req->sk == NULL); |
191 | return 0; | 175 | return true; |
192 | } | 176 | } |
193 | EXPORT_SYMBOL(tcp_fastopen_create_child); | 177 | EXPORT_SYMBOL(tcp_fastopen_create_child); |
194 | 178 | ||
@@ -232,9 +216,10 @@ static bool tcp_fastopen_queue_check(struct sock *sk) | |||
232 | * may be updated and return the client in the SYN-ACK later. E.g., Fast Open | 216 | * may be updated and return the client in the SYN-ACK later. E.g., Fast Open |
233 | * cookie request (foc->len == 0). | 217 | * cookie request (foc->len == 0). |
234 | */ | 218 | */ |
235 | bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, | 219 | bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, |
236 | struct request_sock *req, | 220 | struct request_sock *req, |
237 | struct tcp_fastopen_cookie *foc) | 221 | struct tcp_fastopen_cookie *foc, |
222 | struct dst_entry *dst) | ||
238 | { | 223 | { |
239 | struct tcp_fastopen_cookie valid_foc = { .len = -1 }; | 224 | struct tcp_fastopen_cookie valid_foc = { .len = -1 }; |
240 | bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; | 225 | bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; |
@@ -255,11 +240,21 @@ bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, | |||
255 | if (foc->len == TCP_FASTOPEN_COOKIE_SIZE && | 240 | if (foc->len == TCP_FASTOPEN_COOKIE_SIZE && |
256 | foc->len == valid_foc.len && | 241 | foc->len == valid_foc.len && |
257 | !memcmp(foc->val, valid_foc.val, foc->len)) { | 242 | !memcmp(foc->val, valid_foc.val, foc->len)) { |
243 | /* Cookie is valid. Create a (full) child socket to accept | ||
244 | * the data in SYN before returning a SYN-ACK to ack the | ||
245 | * data. If we fail to create the socket, fall back and | ||
246 | * ack the ISN only but includes the same cookie. | ||
247 | * | ||
248 | * Note: Data-less SYN with valid cookie is allowed to send | ||
249 | * data in SYN_RECV state. | ||
250 | */ | ||
258 | fastopen: | 251 | fastopen: |
259 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 252 | if (tcp_fastopen_create_child(sk, skb, dst, req)) { |
260 | foc->len = -1; | 253 | foc->len = -1; |
261 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); | 254 | NET_INC_STATS_BH(sock_net(sk), |
262 | return true; | 255 | LINUX_MIB_TCPFASTOPENPASSIVE); |
256 | return true; | ||
257 | } | ||
263 | } | 258 | } |
264 | 259 | ||
265 | NET_INC_STATS_BH(sock_net(sk), foc->len ? | 260 | NET_INC_STATS_BH(sock_net(sk), foc->len ? |
@@ -268,4 +263,4 @@ fastopen: | |||
268 | *foc = valid_foc; | 263 | *foc = valid_foc; |
269 | return false; | 264 | return false; |
270 | } | 265 | } |
271 | EXPORT_SYMBOL(tcp_fastopen_check); | 266 | EXPORT_SYMBOL(tcp_try_fastopen); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5ea0949dadfd..1665f0f84233 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -822,7 +822,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
822 | */ | 822 | */ |
823 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | 823 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, |
824 | struct request_sock *req, | 824 | struct request_sock *req, |
825 | u16 queue_mapping) | 825 | u16 queue_mapping, |
826 | struct tcp_fastopen_cookie *foc) | ||
826 | { | 827 | { |
827 | const struct inet_request_sock *ireq = inet_rsk(req); | 828 | const struct inet_request_sock *ireq = inet_rsk(req); |
828 | struct flowi4 fl4; | 829 | struct flowi4 fl4; |
@@ -833,7 +834,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
833 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) | 834 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) |
834 | return -1; | 835 | return -1; |
835 | 836 | ||
836 | skb = tcp_make_synack(sk, dst, req, NULL); | 837 | skb = tcp_make_synack(sk, dst, req, foc); |
837 | 838 | ||
838 | if (skb) { | 839 | if (skb) { |
839 | __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); | 840 | __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); |
@@ -852,7 +853,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
852 | 853 | ||
853 | static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) | 854 | static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) |
854 | { | 855 | { |
855 | int res = tcp_v4_send_synack(sk, NULL, req, 0); | 856 | int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); |
856 | 857 | ||
857 | if (!res) { | 858 | if (!res) { |
858 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); | 859 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); |
@@ -1270,11 +1271,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1270 | __be32 saddr = ip_hdr(skb)->saddr; | 1271 | __be32 saddr = ip_hdr(skb)->saddr; |
1271 | __be32 daddr = ip_hdr(skb)->daddr; | 1272 | __be32 daddr = ip_hdr(skb)->daddr; |
1272 | __u32 isn = TCP_SKB_CB(skb)->when; | 1273 | __u32 isn = TCP_SKB_CB(skb)->when; |
1273 | bool want_cookie = false; | 1274 | bool want_cookie = false, fastopen; |
1274 | struct flowi4 fl4; | 1275 | struct flowi4 fl4; |
1275 | struct tcp_fastopen_cookie foc = { .len = -1 }; | 1276 | struct tcp_fastopen_cookie foc = { .len = -1 }; |
1276 | struct sk_buff *skb_synack; | 1277 | int err; |
1277 | int do_fastopen; | ||
1278 | 1278 | ||
1279 | /* Never answer to SYNs send to broadcast or multicast */ | 1279 | /* Never answer to SYNs send to broadcast or multicast */ |
1280 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) | 1280 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) |
@@ -1373,49 +1373,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1373 | 1373 | ||
1374 | isn = tcp_v4_init_sequence(skb); | 1374 | isn = tcp_v4_init_sequence(skb); |
1375 | } | 1375 | } |
1376 | tcp_rsk(req)->snt_isn = isn; | 1376 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) |
1377 | |||
1378 | if (dst == NULL) { | ||
1379 | dst = inet_csk_route_req(sk, &fl4, req); | ||
1380 | if (dst == NULL) | ||
1381 | goto drop_and_free; | ||
1382 | } | ||
1383 | do_fastopen = !want_cookie && | ||
1384 | tcp_fastopen_check(sk, skb, req, &foc); | ||
1385 | |||
1386 | /* We don't call tcp_v4_send_synack() directly because we need | ||
1387 | * to make sure a child socket can be created successfully before | ||
1388 | * sending back synack! | ||
1389 | * | ||
1390 | * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() | ||
1391 | * (or better yet, call tcp_send_synack() in the child context | ||
1392 | * directly, but will have to fix bunch of other code first) | ||
1393 | * after syn_recv_sock() except one will need to first fix the | ||
1394 | * latter to remove its dependency on the current implementation | ||
1395 | * of tcp_v4_send_synack()->tcp_select_initial_window(). | ||
1396 | */ | ||
1397 | skb_synack = tcp_make_synack(sk, dst, req, &foc); | ||
1398 | |||
1399 | if (skb_synack) { | ||
1400 | __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); | ||
1401 | skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); | ||
1402 | } else | ||
1403 | goto drop_and_free; | 1377 | goto drop_and_free; |
1404 | 1378 | ||
1405 | if (likely(!do_fastopen)) { | 1379 | tcp_rsk(req)->snt_isn = isn; |
1406 | int err; | 1380 | tcp_rsk(req)->snt_synack = tcp_time_stamp; |
1407 | err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, | 1381 | tcp_openreq_init_rwin(req, sk, dst); |
1408 | ireq->ir_rmt_addr, ireq->opt); | 1382 | fastopen = !want_cookie && |
1409 | err = net_xmit_eval(err); | 1383 | tcp_try_fastopen(sk, skb, req, &foc, dst); |
1384 | err = tcp_v4_send_synack(sk, dst, req, | ||
1385 | skb_get_queue_mapping(skb), &foc); | ||
1386 | if (!fastopen) { | ||
1410 | if (err || want_cookie) | 1387 | if (err || want_cookie) |
1411 | goto drop_and_free; | 1388 | goto drop_and_free; |
1412 | 1389 | ||
1413 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | 1390 | tcp_rsk(req)->snt_synack = tcp_time_stamp; |
1414 | tcp_rsk(req)->listener = NULL; | 1391 | tcp_rsk(req)->listener = NULL; |
1415 | /* Add the request_sock to the SYN table */ | ||
1416 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | 1392 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
1417 | } else if (tcp_fastopen_create_child(sk, skb, skb_synack, req)) | 1393 | } |
1418 | goto drop_and_release; | ||
1419 | 1394 | ||
1420 | return 0; | 1395 | return 0; |
1421 | 1396 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 05c1b155251d..e68e0d4af6c9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -362,6 +362,37 @@ void tcp_twsk_destructor(struct sock *sk) | |||
362 | } | 362 | } |
363 | EXPORT_SYMBOL_GPL(tcp_twsk_destructor); | 363 | EXPORT_SYMBOL_GPL(tcp_twsk_destructor); |
364 | 364 | ||
365 | void tcp_openreq_init_rwin(struct request_sock *req, | ||
366 | struct sock *sk, struct dst_entry *dst) | ||
367 | { | ||
368 | struct inet_request_sock *ireq = inet_rsk(req); | ||
369 | struct tcp_sock *tp = tcp_sk(sk); | ||
370 | __u8 rcv_wscale; | ||
371 | int mss = dst_metric_advmss(dst); | ||
372 | |||
373 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) | ||
374 | mss = tp->rx_opt.user_mss; | ||
375 | |||
376 | /* Set this up on the first call only */ | ||
377 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); | ||
378 | |||
379 | /* limit the window selection if the user enforce a smaller rx buffer */ | ||
380 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && | ||
381 | (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) | ||
382 | req->window_clamp = tcp_full_space(sk); | ||
383 | |||
384 | /* tcp_full_space because it is guaranteed to be the first packet */ | ||
385 | tcp_select_initial_window(tcp_full_space(sk), | ||
386 | mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), | ||
387 | &req->rcv_wnd, | ||
388 | &req->window_clamp, | ||
389 | ireq->wscale_ok, | ||
390 | &rcv_wscale, | ||
391 | dst_metric(dst, RTAX_INITRWND)); | ||
392 | ireq->rcv_wscale = rcv_wscale; | ||
393 | } | ||
394 | EXPORT_SYMBOL(tcp_openreq_init_rwin); | ||
395 | |||
365 | static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, | 396 | static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, |
366 | struct request_sock *req) | 397 | struct request_sock *req) |
367 | { | 398 | { |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b20fc02920f9..3d61c52bdf79 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2803,27 +2803,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2803 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) | 2803 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) |
2804 | mss = tp->rx_opt.user_mss; | 2804 | mss = tp->rx_opt.user_mss; |
2805 | 2805 | ||
2806 | if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ | ||
2807 | __u8 rcv_wscale; | ||
2808 | /* Set this up on the first call only */ | ||
2809 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); | ||
2810 | |||
2811 | /* limit the window selection if the user enforce a smaller rx buffer */ | ||
2812 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && | ||
2813 | (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) | ||
2814 | req->window_clamp = tcp_full_space(sk); | ||
2815 | |||
2816 | /* tcp_full_space because it is guaranteed to be the first packet */ | ||
2817 | tcp_select_initial_window(tcp_full_space(sk), | ||
2818 | mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), | ||
2819 | &req->rcv_wnd, | ||
2820 | &req->window_clamp, | ||
2821 | ireq->wscale_ok, | ||
2822 | &rcv_wscale, | ||
2823 | dst_metric(dst, RTAX_INITRWND)); | ||
2824 | ireq->rcv_wscale = rcv_wscale; | ||
2825 | } | ||
2826 | |||
2827 | memset(&opts, 0, sizeof(opts)); | 2806 | memset(&opts, 0, sizeof(opts)); |
2828 | #ifdef CONFIG_SYN_COOKIES | 2807 | #ifdef CONFIG_SYN_COOKIES |
2829 | if (unlikely(req->cookie_ts)) | 2808 | if (unlikely(req->cookie_ts)) |