aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_fastopen.c
diff options
context:
space:
mode:
authorWei Wang <weiwan@google.com>2017-04-20 17:45:46 -0400
committerDavid S. Miller <davem@davemloft.net>2017-04-24 14:27:17 -0400
commitcf1ef3f0719b4dcb74810ed507e2a2540f9811b4 (patch)
tree1b070be7a31eb3557e7f0701a6aca667b1eb13dd /net/ipv4/tcp_fastopen.c
parentbc95cd8e8b2fc779b96ed4d7a2608c6a0e8dc240 (diff)
net/tcp_fastopen: Disable active side TFO in certain scenarios
Middlebox firewall issues can potentially cause server's data being blackholed after a successful 3WHS using TFO. Following are the related reports from Apple: https://www.nanog.org/sites/default/files/Paasch_Network_Support.pdf Slide 31 identifies an issue where the client ACK to the server's data sent during a TFO'd handshake is dropped. C ---> syn-data ---> S C <--- syn/ack ----- S C (accept & write) C <---- data ------- S C ----- ACK -> X S [retry and timeout] https://www.ietf.org/proceedings/94/slides/slides-94-tcpm-13.pdf Slide 5 shows a similar situation that the server's data gets dropped after 3WHS. C ---- syn-data ---> S C <--- syn/ack ----- S C ---- ack --------> S S (accept & write) C? X <- data ------ S [retry and timeout] This is the worst failure b/c the client can not detect such behavior to mitigate the situation (such as disabling TFO). Failing to proceed, the application (e.g., SSL library) may simply timeout and retry with TFO again, and the process repeats indefinitely. The proposed solution is to disable active TFO globally under the following circumstances: 1. client side TFO socket detects out of order FIN 2. client side TFO socket receives out of order RST We disable active side TFO globally for 1hr at first. Then if it happens again, we disable it for 2h, then 4h, 8h, ... And we reset the timeout to 1hr if a client side TFO sockets not opened on loopback has successfully received data segs from server. And we examine this condition during close(). The rational behind it is that when such firewall issue happens, application running on the client should eventually close the socket as it is not able to get the data it is expecting. Or application running on the server should close the socket as it is not able to receive any response from client. In both cases, out of order FIN or RST will get received on the client given that the firewall will not block them as no data are in those frames. And we want to disable active TFO globally as it helps if the middle box is very close to the client and most of the connections are likely to fail. Also, add a debug sysctl: tcp_fastopen_blackhole_detect_timeout_sec: the initial timeout to use when firewall blackhole issue happens. This can be set and read. When setting it to 0, it means to disable the active disable logic. Signed-off-by: Wei Wang <weiwan@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_fastopen.c')
-rw-r--r--net/ipv4/tcp_fastopen.c101
1 files changed, 101 insertions, 0 deletions
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 8ea4e9787f82..ff2d30ffc6f3 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -341,6 +341,13 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
341 cookie->len = -1; 341 cookie->len = -1;
342 return false; 342 return false;
343 } 343 }
344
345 /* Firewall blackhole issue check */
346 if (tcp_fastopen_active_should_disable(sk)) {
347 cookie->len = -1;
348 return false;
349 }
350
344 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { 351 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
345 cookie->len = -1; 352 cookie->len = -1;
346 return true; 353 return true;
@@ -380,3 +387,97 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
380 return false; 387 return false;
381} 388}
382EXPORT_SYMBOL(tcp_fastopen_defer_connect); 389EXPORT_SYMBOL(tcp_fastopen_defer_connect);
390
391/*
392 * The following code block is to deal with middle box issues with TFO:
393 * Middlebox firewall issues can potentially cause server's data being
394 * blackholed after a successful 3WHS using TFO.
395 * The proposed solution is to disable active TFO globally under the
396 * following circumstances:
397 * 1. client side TFO socket receives out of order FIN
398 * 2. client side TFO socket receives out of order RST
399 * We disable active side TFO globally for 1hr at first. Then if it
400 * happens again, we disable it for 2h, then 4h, 8h, ...
401 * And we reset the timeout back to 1hr when we see a successful active
402 * TFO connection with data exchanges.
403 */
404
405/* Default to 1hr */
406unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
407static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
408static unsigned long tfo_active_disable_stamp __read_mostly;
409
410/* Disable active TFO and record current jiffies and
411 * tfo_active_disable_times
412 */
413void tcp_fastopen_active_disable(void)
414{
415 atomic_inc(&tfo_active_disable_times);
416 tfo_active_disable_stamp = jiffies;
417}
418
419/* Reset tfo_active_disable_times to 0 */
420void tcp_fastopen_active_timeout_reset(void)
421{
422 atomic_set(&tfo_active_disable_times, 0);
423}
424
425/* Calculate timeout for tfo active disable
426 * Return true if we are still in the active TFO disable period
427 * Return false if timeout already expired and we should use active TFO
428 */
429bool tcp_fastopen_active_should_disable(struct sock *sk)
430{
431 int tfo_da_times = atomic_read(&tfo_active_disable_times);
432 int multiplier;
433 unsigned long timeout;
434
435 if (!tfo_da_times)
436 return false;
437
438 /* Limit timout to max: 2^6 * initial timeout */
439 multiplier = 1 << min(tfo_da_times - 1, 6);
440 timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ;
441 if (time_before(jiffies, tfo_active_disable_stamp + timeout))
442 return true;
443
444 /* Mark check bit so we can check for successful active TFO
445 * condition and reset tfo_active_disable_times
446 */
447 tcp_sk(sk)->syn_fastopen_ch = 1;
448 return false;
449}
450
451/* Disable active TFO if FIN is the only packet in the ofo queue
452 * and no data is received.
453 * Also check if we can reset tfo_active_disable_times if data is
454 * received successfully on a marked active TFO sockets opened on
455 * a non-loopback interface
456 */
457void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
458{
459 struct tcp_sock *tp = tcp_sk(sk);
460 struct rb_node *p;
461 struct sk_buff *skb;
462 struct dst_entry *dst;
463
464 if (!tp->syn_fastopen)
465 return;
466
467 if (!tp->data_segs_in) {
468 p = rb_first(&tp->out_of_order_queue);
469 if (p && !rb_next(p)) {
470 skb = rb_entry(p, struct sk_buff, rbnode);
471 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
472 tcp_fastopen_active_disable();
473 return;
474 }
475 }
476 } else if (tp->syn_fastopen_ch &&
477 atomic_read(&tfo_active_disable_times)) {
478 dst = sk_dst_get(sk);
479 if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
480 tcp_fastopen_active_timeout_reset();
481 dst_release(dst);
482 }
483}