diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 394 |
1 files changed, 162 insertions, 232 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2b4461074da..536d40929ba6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -65,28 +65,24 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; | |||
65 | /* By default, RFC2861 behavior. */ | 65 | /* By default, RFC2861 behavior. */ |
66 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 66 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
67 | 67 | ||
68 | int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ | ||
69 | EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); | ||
70 | |||
71 | static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | 68 | static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, |
72 | int push_one, gfp_t gfp); | 69 | int push_one, gfp_t gfp); |
73 | 70 | ||
74 | /* Account for new data that has been sent to the network. */ | 71 | /* Account for new data that has been sent to the network. */ |
75 | static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) | 72 | static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) |
76 | { | 73 | { |
74 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
77 | struct tcp_sock *tp = tcp_sk(sk); | 75 | struct tcp_sock *tp = tcp_sk(sk); |
78 | unsigned int prior_packets = tp->packets_out; | 76 | unsigned int prior_packets = tp->packets_out; |
79 | 77 | ||
80 | tcp_advance_send_head(sk, skb); | 78 | tcp_advance_send_head(sk, skb); |
81 | tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; | 79 | tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; |
82 | 80 | ||
83 | /* Don't override Nagle indefinitely with F-RTO */ | ||
84 | if (tp->frto_counter == 2) | ||
85 | tp->frto_counter = 3; | ||
86 | |||
87 | tp->packets_out += tcp_skb_pcount(skb); | 81 | tp->packets_out += tcp_skb_pcount(skb); |
88 | if (!prior_packets || tp->early_retrans_delayed) | 82 | if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || |
83 | icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { | ||
89 | tcp_rearm_rto(sk); | 84 | tcp_rearm_rto(sk); |
85 | } | ||
90 | } | 86 | } |
91 | 87 | ||
92 | /* SND.NXT, if window was not shrunk. | 88 | /* SND.NXT, if window was not shrunk. |
@@ -384,7 +380,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) | |||
384 | #define OPTION_TS (1 << 1) | 380 | #define OPTION_TS (1 << 1) |
385 | #define OPTION_MD5 (1 << 2) | 381 | #define OPTION_MD5 (1 << 2) |
386 | #define OPTION_WSCALE (1 << 3) | 382 | #define OPTION_WSCALE (1 << 3) |
387 | #define OPTION_COOKIE_EXTENSION (1 << 4) | ||
388 | #define OPTION_FAST_OPEN_COOKIE (1 << 8) | 383 | #define OPTION_FAST_OPEN_COOKIE (1 << 8) |
389 | 384 | ||
390 | struct tcp_out_options { | 385 | struct tcp_out_options { |
@@ -398,36 +393,6 @@ struct tcp_out_options { | |||
398 | struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ | 393 | struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ |
399 | }; | 394 | }; |
400 | 395 | ||
401 | /* The sysctl int routines are generic, so check consistency here. | ||
402 | */ | ||
403 | static u8 tcp_cookie_size_check(u8 desired) | ||
404 | { | ||
405 | int cookie_size; | ||
406 | |||
407 | if (desired > 0) | ||
408 | /* previously specified */ | ||
409 | return desired; | ||
410 | |||
411 | cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); | ||
412 | if (cookie_size <= 0) | ||
413 | /* no default specified */ | ||
414 | return 0; | ||
415 | |||
416 | if (cookie_size <= TCP_COOKIE_MIN) | ||
417 | /* value too small, specify minimum */ | ||
418 | return TCP_COOKIE_MIN; | ||
419 | |||
420 | if (cookie_size >= TCP_COOKIE_MAX) | ||
421 | /* value too large, specify maximum */ | ||
422 | return TCP_COOKIE_MAX; | ||
423 | |||
424 | if (cookie_size & 1) | ||
425 | /* 8-bit multiple, illegal, fix it */ | ||
426 | cookie_size++; | ||
427 | |||
428 | return (u8)cookie_size; | ||
429 | } | ||
430 | |||
431 | /* Write previously computed TCP options to the packet. | 396 | /* Write previously computed TCP options to the packet. |
432 | * | 397 | * |
433 | * Beware: Something in the Internet is very sensitive to the ordering of | 398 | * Beware: Something in the Internet is very sensitive to the ordering of |
@@ -446,27 +411,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
446 | { | 411 | { |
447 | u16 options = opts->options; /* mungable copy */ | 412 | u16 options = opts->options; /* mungable copy */ |
448 | 413 | ||
449 | /* Having both authentication and cookies for security is redundant, | ||
450 | * and there's certainly not enough room. Instead, the cookie-less | ||
451 | * extension variant is proposed. | ||
452 | * | ||
453 | * Consider the pessimal case with authentication. The options | ||
454 | * could look like: | ||
455 | * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 | ||
456 | */ | ||
457 | if (unlikely(OPTION_MD5 & options)) { | 414 | if (unlikely(OPTION_MD5 & options)) { |
458 | if (unlikely(OPTION_COOKIE_EXTENSION & options)) { | 415 | *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | |
459 | *ptr++ = htonl((TCPOPT_COOKIE << 24) | | 416 | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); |
460 | (TCPOLEN_COOKIE_BASE << 16) | | ||
461 | (TCPOPT_MD5SIG << 8) | | ||
462 | TCPOLEN_MD5SIG); | ||
463 | } else { | ||
464 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
465 | (TCPOPT_NOP << 16) | | ||
466 | (TCPOPT_MD5SIG << 8) | | ||
467 | TCPOLEN_MD5SIG); | ||
468 | } | ||
469 | options &= ~OPTION_COOKIE_EXTENSION; | ||
470 | /* overload cookie hash location */ | 417 | /* overload cookie hash location */ |
471 | opts->hash_location = (__u8 *)ptr; | 418 | opts->hash_location = (__u8 *)ptr; |
472 | ptr += 4; | 419 | ptr += 4; |
@@ -495,44 +442,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
495 | *ptr++ = htonl(opts->tsecr); | 442 | *ptr++ = htonl(opts->tsecr); |
496 | } | 443 | } |
497 | 444 | ||
498 | /* Specification requires after timestamp, so do it now. | ||
499 | * | ||
500 | * Consider the pessimal case without authentication. The options | ||
501 | * could look like: | ||
502 | * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 | ||
503 | */ | ||
504 | if (unlikely(OPTION_COOKIE_EXTENSION & options)) { | ||
505 | __u8 *cookie_copy = opts->hash_location; | ||
506 | u8 cookie_size = opts->hash_size; | ||
507 | |||
508 | /* 8-bit multiple handled in tcp_cookie_size_check() above, | ||
509 | * and elsewhere. | ||
510 | */ | ||
511 | if (0x2 & cookie_size) { | ||
512 | __u8 *p = (__u8 *)ptr; | ||
513 | |||
514 | /* 16-bit multiple */ | ||
515 | *p++ = TCPOPT_COOKIE; | ||
516 | *p++ = TCPOLEN_COOKIE_BASE + cookie_size; | ||
517 | *p++ = *cookie_copy++; | ||
518 | *p++ = *cookie_copy++; | ||
519 | ptr++; | ||
520 | cookie_size -= 2; | ||
521 | } else { | ||
522 | /* 32-bit multiple */ | ||
523 | *ptr++ = htonl(((TCPOPT_NOP << 24) | | ||
524 | (TCPOPT_NOP << 16) | | ||
525 | (TCPOPT_COOKIE << 8) | | ||
526 | TCPOLEN_COOKIE_BASE) + | ||
527 | cookie_size); | ||
528 | } | ||
529 | |||
530 | if (cookie_size > 0) { | ||
531 | memcpy(ptr, cookie_copy, cookie_size); | ||
532 | ptr += (cookie_size / 4); | ||
533 | } | ||
534 | } | ||
535 | |||
536 | if (unlikely(OPTION_SACK_ADVERTISE & options)) { | 445 | if (unlikely(OPTION_SACK_ADVERTISE & options)) { |
537 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 446 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
538 | (TCPOPT_NOP << 16) | | 447 | (TCPOPT_NOP << 16) | |
@@ -591,11 +500,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
591 | struct tcp_md5sig_key **md5) | 500 | struct tcp_md5sig_key **md5) |
592 | { | 501 | { |
593 | struct tcp_sock *tp = tcp_sk(sk); | 502 | struct tcp_sock *tp = tcp_sk(sk); |
594 | struct tcp_cookie_values *cvp = tp->cookie_values; | ||
595 | unsigned int remaining = MAX_TCP_OPTION_SPACE; | 503 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
596 | u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? | ||
597 | tcp_cookie_size_check(cvp->cookie_desired) : | ||
598 | 0; | ||
599 | struct tcp_fastopen_request *fastopen = tp->fastopen_req; | 504 | struct tcp_fastopen_request *fastopen = tp->fastopen_req; |
600 | 505 | ||
601 | #ifdef CONFIG_TCP_MD5SIG | 506 | #ifdef CONFIG_TCP_MD5SIG |
@@ -647,52 +552,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
647 | tp->syn_fastopen = 1; | 552 | tp->syn_fastopen = 1; |
648 | } | 553 | } |
649 | } | 554 | } |
650 | /* Note that timestamps are required by the specification. | ||
651 | * | ||
652 | * Odd numbers of bytes are prohibited by the specification, ensuring | ||
653 | * that the cookie is 16-bit aligned, and the resulting cookie pair is | ||
654 | * 32-bit aligned. | ||
655 | */ | ||
656 | if (*md5 == NULL && | ||
657 | (OPTION_TS & opts->options) && | ||
658 | cookie_size > 0) { | ||
659 | int need = TCPOLEN_COOKIE_BASE + cookie_size; | ||
660 | |||
661 | if (0x2 & need) { | ||
662 | /* 32-bit multiple */ | ||
663 | need += 2; /* NOPs */ | ||
664 | |||
665 | if (need > remaining) { | ||
666 | /* try shrinking cookie to fit */ | ||
667 | cookie_size -= 2; | ||
668 | need -= 4; | ||
669 | } | ||
670 | } | ||
671 | while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { | ||
672 | cookie_size -= 4; | ||
673 | need -= 4; | ||
674 | } | ||
675 | if (TCP_COOKIE_MIN <= cookie_size) { | ||
676 | opts->options |= OPTION_COOKIE_EXTENSION; | ||
677 | opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; | ||
678 | opts->hash_size = cookie_size; | ||
679 | |||
680 | /* Remember for future incarnations. */ | ||
681 | cvp->cookie_desired = cookie_size; | ||
682 | |||
683 | if (cvp->cookie_desired != cvp->cookie_pair_size) { | ||
684 | /* Currently use random bytes as a nonce, | ||
685 | * assuming these are completely unpredictable | ||
686 | * by hostile users of the same system. | ||
687 | */ | ||
688 | get_random_bytes(&cvp->cookie_pair[0], | ||
689 | cookie_size); | ||
690 | cvp->cookie_pair_size = cookie_size; | ||
691 | } | ||
692 | 555 | ||
693 | remaining -= need; | ||
694 | } | ||
695 | } | ||
696 | return MAX_TCP_OPTION_SPACE - remaining; | 556 | return MAX_TCP_OPTION_SPACE - remaining; |
697 | } | 557 | } |
698 | 558 | ||
@@ -702,14 +562,10 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
702 | unsigned int mss, struct sk_buff *skb, | 562 | unsigned int mss, struct sk_buff *skb, |
703 | struct tcp_out_options *opts, | 563 | struct tcp_out_options *opts, |
704 | struct tcp_md5sig_key **md5, | 564 | struct tcp_md5sig_key **md5, |
705 | struct tcp_extend_values *xvp, | ||
706 | struct tcp_fastopen_cookie *foc) | 565 | struct tcp_fastopen_cookie *foc) |
707 | { | 566 | { |
708 | struct inet_request_sock *ireq = inet_rsk(req); | 567 | struct inet_request_sock *ireq = inet_rsk(req); |
709 | unsigned int remaining = MAX_TCP_OPTION_SPACE; | 568 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
710 | u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? | ||
711 | xvp->cookie_plus : | ||
712 | 0; | ||
713 | 569 | ||
714 | #ifdef CONFIG_TCP_MD5SIG | 570 | #ifdef CONFIG_TCP_MD5SIG |
715 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); | 571 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); |
@@ -757,28 +613,7 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
757 | remaining -= need; | 613 | remaining -= need; |
758 | } | 614 | } |
759 | } | 615 | } |
760 | /* Similar rationale to tcp_syn_options() applies here, too. | 616 | |
761 | * If the <SYN> options fit, the same options should fit now! | ||
762 | */ | ||
763 | if (*md5 == NULL && | ||
764 | ireq->tstamp_ok && | ||
765 | cookie_plus > TCPOLEN_COOKIE_BASE) { | ||
766 | int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ | ||
767 | |||
768 | if (0x2 & need) { | ||
769 | /* 32-bit multiple */ | ||
770 | need += 2; /* NOPs */ | ||
771 | } | ||
772 | if (need <= remaining) { | ||
773 | opts->options |= OPTION_COOKIE_EXTENSION; | ||
774 | opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; | ||
775 | remaining -= need; | ||
776 | } else { | ||
777 | /* There's no error return, so flag it. */ | ||
778 | xvp->cookie_out_never = 1; /* true */ | ||
779 | opts->hash_size = 0; | ||
780 | } | ||
781 | } | ||
782 | return MAX_TCP_OPTION_SPACE - remaining; | 617 | return MAX_TCP_OPTION_SPACE - remaining; |
783 | } | 618 | } |
784 | 619 | ||
@@ -953,7 +788,7 @@ void __init tcp_tasklet_init(void) | |||
953 | * We cant xmit new skbs from this context, as we might already | 788 | * We cant xmit new skbs from this context, as we might already |
954 | * hold qdisc lock. | 789 | * hold qdisc lock. |
955 | */ | 790 | */ |
956 | static void tcp_wfree(struct sk_buff *skb) | 791 | void tcp_wfree(struct sk_buff *skb) |
957 | { | 792 | { |
958 | struct sock *sk = skb->sk; | 793 | struct sock *sk = skb->sk; |
959 | struct tcp_sock *tp = tcp_sk(sk); | 794 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -1012,6 +847,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
1012 | __net_timestamp(skb); | 847 | __net_timestamp(skb); |
1013 | 848 | ||
1014 | if (likely(clone_it)) { | 849 | if (likely(clone_it)) { |
850 | const struct sk_buff *fclone = skb + 1; | ||
851 | |||
852 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | ||
853 | fclone->fclone == SKB_FCLONE_CLONE)) | ||
854 | NET_INC_STATS_BH(sock_net(sk), | ||
855 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); | ||
856 | |||
1015 | if (unlikely(skb_cloned(skb))) | 857 | if (unlikely(skb_cloned(skb))) |
1016 | skb = pskb_copy(skb, gfp_mask); | 858 | skb = pskb_copy(skb, gfp_mask); |
1017 | else | 859 | else |
@@ -1298,7 +1140,6 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) | |||
1298 | eat = min_t(int, len, skb_headlen(skb)); | 1140 | eat = min_t(int, len, skb_headlen(skb)); |
1299 | if (eat) { | 1141 | if (eat) { |
1300 | __skb_pull(skb, eat); | 1142 | __skb_pull(skb, eat); |
1301 | skb->avail_size -= eat; | ||
1302 | len -= eat; | 1143 | len -= eat; |
1303 | if (!len) | 1144 | if (!len) |
1304 | return; | 1145 | return; |
@@ -1633,11 +1474,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf | |||
1633 | if (nonagle & TCP_NAGLE_PUSH) | 1474 | if (nonagle & TCP_NAGLE_PUSH) |
1634 | return true; | 1475 | return true; |
1635 | 1476 | ||
1636 | /* Don't use the nagle rule for urgent data (or for the final FIN). | 1477 | /* Don't use the nagle rule for urgent data (or for the final FIN). */ |
1637 | * Nagle can be ignored during F-RTO too (see RFC4138). | 1478 | if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) |
1638 | */ | ||
1639 | if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || | ||
1640 | (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) | ||
1641 | return true; | 1479 | return true; |
1642 | 1480 | ||
1643 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) | 1481 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) |
@@ -1810,8 +1648,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1810 | goto send_now; | 1648 | goto send_now; |
1811 | } | 1649 | } |
1812 | 1650 | ||
1813 | /* Ok, it looks like it is advisable to defer. */ | 1651 | /* Ok, it looks like it is advisable to defer. |
1814 | tp->tso_deferred = 1 | (jiffies << 1); | 1652 | * Do not rearm the timer if already set to not break TCP ACK clocking. |
1653 | */ | ||
1654 | if (!tp->tso_deferred) | ||
1655 | tp->tso_deferred = 1 | (jiffies << 1); | ||
1815 | 1656 | ||
1816 | return true; | 1657 | return true; |
1817 | 1658 | ||
@@ -1959,6 +1800,9 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1959 | * snd_up-64k-mss .. snd_up cannot be large. However, taking into | 1800 | * snd_up-64k-mss .. snd_up cannot be large. However, taking into |
1960 | * account rare use of URG, this is not a big flaw. | 1801 | * account rare use of URG, this is not a big flaw. |
1961 | * | 1802 | * |
1803 | * Send at most one packet when push_one > 0. Temporarily ignore | ||
1804 | * cwnd limit to force at most one packet out when push_one == 2. | ||
1805 | |||
1962 | * Returns true, if no segments are in flight and we have queued segments, | 1806 | * Returns true, if no segments are in flight and we have queued segments, |
1963 | * but cannot send anything now because of SWS or another problem. | 1807 | * but cannot send anything now because of SWS or another problem. |
1964 | */ | 1808 | */ |
@@ -1994,8 +1838,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1994 | goto repair; /* Skip network transmission */ | 1838 | goto repair; /* Skip network transmission */ |
1995 | 1839 | ||
1996 | cwnd_quota = tcp_cwnd_test(tp, skb); | 1840 | cwnd_quota = tcp_cwnd_test(tp, skb); |
1997 | if (!cwnd_quota) | 1841 | if (!cwnd_quota) { |
1998 | break; | 1842 | if (push_one == 2) |
1843 | /* Force out a loss probe pkt. */ | ||
1844 | cwnd_quota = 1; | ||
1845 | else | ||
1846 | break; | ||
1847 | } | ||
1999 | 1848 | ||
2000 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) | 1849 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) |
2001 | break; | 1850 | break; |
@@ -2049,10 +1898,129 @@ repair: | |||
2049 | if (likely(sent_pkts)) { | 1898 | if (likely(sent_pkts)) { |
2050 | if (tcp_in_cwnd_reduction(sk)) | 1899 | if (tcp_in_cwnd_reduction(sk)) |
2051 | tp->prr_out += sent_pkts; | 1900 | tp->prr_out += sent_pkts; |
1901 | |||
1902 | /* Send one loss probe per tail loss episode. */ | ||
1903 | if (push_one != 2) | ||
1904 | tcp_schedule_loss_probe(sk); | ||
2052 | tcp_cwnd_validate(sk); | 1905 | tcp_cwnd_validate(sk); |
2053 | return false; | 1906 | return false; |
2054 | } | 1907 | } |
2055 | return !tp->packets_out && tcp_send_head(sk); | 1908 | return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); |
1909 | } | ||
1910 | |||
1911 | bool tcp_schedule_loss_probe(struct sock *sk) | ||
1912 | { | ||
1913 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1914 | struct tcp_sock *tp = tcp_sk(sk); | ||
1915 | u32 timeout, tlp_time_stamp, rto_time_stamp; | ||
1916 | u32 rtt = tp->srtt >> 3; | ||
1917 | |||
1918 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) | ||
1919 | return false; | ||
1920 | /* No consecutive loss probes. */ | ||
1921 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { | ||
1922 | tcp_rearm_rto(sk); | ||
1923 | return false; | ||
1924 | } | ||
1925 | /* Don't do any loss probe on a Fast Open connection before 3WHS | ||
1926 | * finishes. | ||
1927 | */ | ||
1928 | if (sk->sk_state == TCP_SYN_RECV) | ||
1929 | return false; | ||
1930 | |||
1931 | /* TLP is only scheduled when next timer event is RTO. */ | ||
1932 | if (icsk->icsk_pending != ICSK_TIME_RETRANS) | ||
1933 | return false; | ||
1934 | |||
1935 | /* Schedule a loss probe in 2*RTT for SACK capable connections | ||
1936 | * in Open state, that are either limited by cwnd or application. | ||
1937 | */ | ||
1938 | if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || | ||
1939 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | ||
1940 | return false; | ||
1941 | |||
1942 | if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && | ||
1943 | tcp_send_head(sk)) | ||
1944 | return false; | ||
1945 | |||
1946 | /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account | ||
1947 | * for delayed ack when there's one outstanding packet. | ||
1948 | */ | ||
1949 | timeout = rtt << 1; | ||
1950 | if (tp->packets_out == 1) | ||
1951 | timeout = max_t(u32, timeout, | ||
1952 | (rtt + (rtt >> 1) + TCP_DELACK_MAX)); | ||
1953 | timeout = max_t(u32, timeout, msecs_to_jiffies(10)); | ||
1954 | |||
1955 | /* If RTO is shorter, just schedule TLP in its place. */ | ||
1956 | tlp_time_stamp = tcp_time_stamp + timeout; | ||
1957 | rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; | ||
1958 | if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { | ||
1959 | s32 delta = rto_time_stamp - tcp_time_stamp; | ||
1960 | if (delta > 0) | ||
1961 | timeout = delta; | ||
1962 | } | ||
1963 | |||
1964 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, | ||
1965 | TCP_RTO_MAX); | ||
1966 | return true; | ||
1967 | } | ||
1968 | |||
1969 | /* When probe timeout (PTO) fires, send a new segment if one exists, else | ||
1970 | * retransmit the last segment. | ||
1971 | */ | ||
1972 | void tcp_send_loss_probe(struct sock *sk) | ||
1973 | { | ||
1974 | struct tcp_sock *tp = tcp_sk(sk); | ||
1975 | struct sk_buff *skb; | ||
1976 | int pcount; | ||
1977 | int mss = tcp_current_mss(sk); | ||
1978 | int err = -1; | ||
1979 | |||
1980 | if (tcp_send_head(sk) != NULL) { | ||
1981 | err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); | ||
1982 | goto rearm_timer; | ||
1983 | } | ||
1984 | |||
1985 | /* At most one outstanding TLP retransmission. */ | ||
1986 | if (tp->tlp_high_seq) | ||
1987 | goto rearm_timer; | ||
1988 | |||
1989 | /* Retransmit last segment. */ | ||
1990 | skb = tcp_write_queue_tail(sk); | ||
1991 | if (WARN_ON(!skb)) | ||
1992 | goto rearm_timer; | ||
1993 | |||
1994 | pcount = tcp_skb_pcount(skb); | ||
1995 | if (WARN_ON(!pcount)) | ||
1996 | goto rearm_timer; | ||
1997 | |||
1998 | if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { | ||
1999 | if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) | ||
2000 | goto rearm_timer; | ||
2001 | skb = tcp_write_queue_tail(sk); | ||
2002 | } | ||
2003 | |||
2004 | if (WARN_ON(!skb || !tcp_skb_pcount(skb))) | ||
2005 | goto rearm_timer; | ||
2006 | |||
2007 | /* Probe with zero data doesn't trigger fast recovery. */ | ||
2008 | if (skb->len > 0) | ||
2009 | err = __tcp_retransmit_skb(sk, skb); | ||
2010 | |||
2011 | /* Record snd_nxt for loss detection. */ | ||
2012 | if (likely(!err)) | ||
2013 | tp->tlp_high_seq = tp->snd_nxt; | ||
2014 | |||
2015 | rearm_timer: | ||
2016 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
2017 | inet_csk(sk)->icsk_rto, | ||
2018 | TCP_RTO_MAX); | ||
2019 | |||
2020 | if (likely(!err)) | ||
2021 | NET_INC_STATS_BH(sock_net(sk), | ||
2022 | LINUX_MIB_TCPLOSSPROBES); | ||
2023 | return; | ||
2056 | } | 2024 | } |
2057 | 2025 | ||
2058 | /* Push out any pending frames which were held back due to | 2026 | /* Push out any pending frames which were held back due to |
@@ -2386,8 +2354,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2386 | */ | 2354 | */ |
2387 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2355 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2388 | 2356 | ||
2389 | /* make sure skb->data is aligned on arches that require it */ | 2357 | /* make sure skb->data is aligned on arches that require it |
2390 | if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { | 2358 | * and check if ack-trimming & collapsing extended the headroom |
2359 | * beyond what csum_start can cover. | ||
2360 | */ | ||
2361 | if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || | ||
2362 | skb_headroom(skb) >= 0xFFFF)) { | ||
2391 | struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, | 2363 | struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, |
2392 | GFP_ATOMIC); | 2364 | GFP_ATOMIC); |
2393 | return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : | 2365 | return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : |
@@ -2673,32 +2645,24 @@ int tcp_send_synack(struct sock *sk) | |||
2673 | * sk: listener socket | 2645 | * sk: listener socket |
2674 | * dst: dst entry attached to the SYNACK | 2646 | * dst: dst entry attached to the SYNACK |
2675 | * req: request_sock pointer | 2647 | * req: request_sock pointer |
2676 | * rvp: request_values pointer | ||
2677 | * | 2648 | * |
2678 | * Allocate one skb and build a SYNACK packet. | 2649 | * Allocate one skb and build a SYNACK packet. |
2679 | * @dst is consumed : Caller should not use it again. | 2650 | * @dst is consumed : Caller should not use it again. |
2680 | */ | 2651 | */ |
2681 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | 2652 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, |
2682 | struct request_sock *req, | 2653 | struct request_sock *req, |
2683 | struct request_values *rvp, | ||
2684 | struct tcp_fastopen_cookie *foc) | 2654 | struct tcp_fastopen_cookie *foc) |
2685 | { | 2655 | { |
2686 | struct tcp_out_options opts; | 2656 | struct tcp_out_options opts; |
2687 | struct tcp_extend_values *xvp = tcp_xv(rvp); | ||
2688 | struct inet_request_sock *ireq = inet_rsk(req); | 2657 | struct inet_request_sock *ireq = inet_rsk(req); |
2689 | struct tcp_sock *tp = tcp_sk(sk); | 2658 | struct tcp_sock *tp = tcp_sk(sk); |
2690 | const struct tcp_cookie_values *cvp = tp->cookie_values; | ||
2691 | struct tcphdr *th; | 2659 | struct tcphdr *th; |
2692 | struct sk_buff *skb; | 2660 | struct sk_buff *skb; |
2693 | struct tcp_md5sig_key *md5; | 2661 | struct tcp_md5sig_key *md5; |
2694 | int tcp_header_size; | 2662 | int tcp_header_size; |
2695 | int mss; | 2663 | int mss; |
2696 | int s_data_desired = 0; | ||
2697 | 2664 | ||
2698 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) | 2665 | skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); |
2699 | s_data_desired = cvp->s_data_desired; | ||
2700 | skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, | ||
2701 | sk_gfp_atomic(sk, GFP_ATOMIC)); | ||
2702 | if (unlikely(!skb)) { | 2666 | if (unlikely(!skb)) { |
2703 | dst_release(dst); | 2667 | dst_release(dst); |
2704 | return NULL; | 2668 | return NULL; |
@@ -2707,6 +2671,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2707 | skb_reserve(skb, MAX_TCP_HEADER); | 2671 | skb_reserve(skb, MAX_TCP_HEADER); |
2708 | 2672 | ||
2709 | skb_dst_set(skb, dst); | 2673 | skb_dst_set(skb, dst); |
2674 | security_skb_owned_by(skb, sk); | ||
2710 | 2675 | ||
2711 | mss = dst_metric_advmss(dst); | 2676 | mss = dst_metric_advmss(dst); |
2712 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) | 2677 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) |
@@ -2740,9 +2705,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2740 | else | 2705 | else |
2741 | #endif | 2706 | #endif |
2742 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2707 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2743 | tcp_header_size = tcp_synack_options(sk, req, mss, | 2708 | tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, |
2744 | skb, &opts, &md5, xvp, foc) | 2709 | foc) + sizeof(*th); |
2745 | + sizeof(*th); | ||
2746 | 2710 | ||
2747 | skb_push(skb, tcp_header_size); | 2711 | skb_push(skb, tcp_header_size); |
2748 | skb_reset_transport_header(skb); | 2712 | skb_reset_transport_header(skb); |
@@ -2760,40 +2724,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2760 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, | 2724 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, |
2761 | TCPHDR_SYN | TCPHDR_ACK); | 2725 | TCPHDR_SYN | TCPHDR_ACK); |
2762 | 2726 | ||
2763 | if (OPTION_COOKIE_EXTENSION & opts.options) { | ||
2764 | if (s_data_desired) { | ||
2765 | u8 *buf = skb_put(skb, s_data_desired); | ||
2766 | |||
2767 | /* copy data directly from the listening socket. */ | ||
2768 | memcpy(buf, cvp->s_data_payload, s_data_desired); | ||
2769 | TCP_SKB_CB(skb)->end_seq += s_data_desired; | ||
2770 | } | ||
2771 | |||
2772 | if (opts.hash_size > 0) { | ||
2773 | __u32 workspace[SHA_WORKSPACE_WORDS]; | ||
2774 | u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; | ||
2775 | u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; | ||
2776 | |||
2777 | /* Secret recipe depends on the Timestamp, (future) | ||
2778 | * Sequence and Acknowledgment Numbers, Initiator | ||
2779 | * Cookie, and others handled by IP variant caller. | ||
2780 | */ | ||
2781 | *tail-- ^= opts.tsval; | ||
2782 | *tail-- ^= tcp_rsk(req)->rcv_isn + 1; | ||
2783 | *tail-- ^= TCP_SKB_CB(skb)->seq + 1; | ||
2784 | |||
2785 | /* recommended */ | ||
2786 | *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); | ||
2787 | *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ | ||
2788 | |||
2789 | sha_transform((__u32 *)&xvp->cookie_bakery[0], | ||
2790 | (char *)mess, | ||
2791 | &workspace[0]); | ||
2792 | opts.hash_location = | ||
2793 | (__u8 *)&xvp->cookie_bakery[0]; | ||
2794 | } | ||
2795 | } | ||
2796 | |||
2797 | th->seq = htonl(TCP_SKB_CB(skb)->seq); | 2727 | th->seq = htonl(TCP_SKB_CB(skb)->seq); |
2798 | /* XXX data is queued and acked as is. No buffer/window check */ | 2728 | /* XXX data is queued and acked as is. No buffer/window check */ |
2799 | th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); | 2729 | th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); |