diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 377 |
1 files changed, 150 insertions, 227 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 509912a5ff98..536d40929ba6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -65,28 +65,24 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; | |||
| 65 | /* By default, RFC2861 behavior. */ | 65 | /* By default, RFC2861 behavior. */ |
| 66 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 66 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
| 67 | 67 | ||
| 68 | int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ | ||
| 69 | EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); | ||
| 70 | |||
| 71 | static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | 68 | static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, |
| 72 | int push_one, gfp_t gfp); | 69 | int push_one, gfp_t gfp); |
| 73 | 70 | ||
| 74 | /* Account for new data that has been sent to the network. */ | 71 | /* Account for new data that has been sent to the network. */ |
| 75 | static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) | 72 | static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) |
| 76 | { | 73 | { |
| 74 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 77 | struct tcp_sock *tp = tcp_sk(sk); | 75 | struct tcp_sock *tp = tcp_sk(sk); |
| 78 | unsigned int prior_packets = tp->packets_out; | 76 | unsigned int prior_packets = tp->packets_out; |
| 79 | 77 | ||
| 80 | tcp_advance_send_head(sk, skb); | 78 | tcp_advance_send_head(sk, skb); |
| 81 | tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; | 79 | tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; |
| 82 | 80 | ||
| 83 | /* Don't override Nagle indefinitely with F-RTO */ | ||
| 84 | if (tp->frto_counter == 2) | ||
| 85 | tp->frto_counter = 3; | ||
| 86 | |||
| 87 | tp->packets_out += tcp_skb_pcount(skb); | 81 | tp->packets_out += tcp_skb_pcount(skb); |
| 88 | if (!prior_packets || tp->early_retrans_delayed) | 82 | if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || |
| 83 | icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { | ||
| 89 | tcp_rearm_rto(sk); | 84 | tcp_rearm_rto(sk); |
| 85 | } | ||
| 90 | } | 86 | } |
| 91 | 87 | ||
| 92 | /* SND.NXT, if window was not shrunk. | 88 | /* SND.NXT, if window was not shrunk. |
| @@ -384,7 +380,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) | |||
| 384 | #define OPTION_TS (1 << 1) | 380 | #define OPTION_TS (1 << 1) |
| 385 | #define OPTION_MD5 (1 << 2) | 381 | #define OPTION_MD5 (1 << 2) |
| 386 | #define OPTION_WSCALE (1 << 3) | 382 | #define OPTION_WSCALE (1 << 3) |
| 387 | #define OPTION_COOKIE_EXTENSION (1 << 4) | ||
| 388 | #define OPTION_FAST_OPEN_COOKIE (1 << 8) | 383 | #define OPTION_FAST_OPEN_COOKIE (1 << 8) |
| 389 | 384 | ||
| 390 | struct tcp_out_options { | 385 | struct tcp_out_options { |
| @@ -398,36 +393,6 @@ struct tcp_out_options { | |||
| 398 | struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ | 393 | struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ |
| 399 | }; | 394 | }; |
| 400 | 395 | ||
| 401 | /* The sysctl int routines are generic, so check consistency here. | ||
| 402 | */ | ||
| 403 | static u8 tcp_cookie_size_check(u8 desired) | ||
| 404 | { | ||
| 405 | int cookie_size; | ||
| 406 | |||
| 407 | if (desired > 0) | ||
| 408 | /* previously specified */ | ||
| 409 | return desired; | ||
| 410 | |||
| 411 | cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); | ||
| 412 | if (cookie_size <= 0) | ||
| 413 | /* no default specified */ | ||
| 414 | return 0; | ||
| 415 | |||
| 416 | if (cookie_size <= TCP_COOKIE_MIN) | ||
| 417 | /* value too small, specify minimum */ | ||
| 418 | return TCP_COOKIE_MIN; | ||
| 419 | |||
| 420 | if (cookie_size >= TCP_COOKIE_MAX) | ||
| 421 | /* value too large, specify maximum */ | ||
| 422 | return TCP_COOKIE_MAX; | ||
| 423 | |||
| 424 | if (cookie_size & 1) | ||
| 425 | /* 8-bit multiple, illegal, fix it */ | ||
| 426 | cookie_size++; | ||
| 427 | |||
| 428 | return (u8)cookie_size; | ||
| 429 | } | ||
| 430 | |||
| 431 | /* Write previously computed TCP options to the packet. | 396 | /* Write previously computed TCP options to the packet. |
| 432 | * | 397 | * |
| 433 | * Beware: Something in the Internet is very sensitive to the ordering of | 398 | * Beware: Something in the Internet is very sensitive to the ordering of |
| @@ -446,27 +411,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
| 446 | { | 411 | { |
| 447 | u16 options = opts->options; /* mungable copy */ | 412 | u16 options = opts->options; /* mungable copy */ |
| 448 | 413 | ||
| 449 | /* Having both authentication and cookies for security is redundant, | ||
| 450 | * and there's certainly not enough room. Instead, the cookie-less | ||
| 451 | * extension variant is proposed. | ||
| 452 | * | ||
| 453 | * Consider the pessimal case with authentication. The options | ||
| 454 | * could look like: | ||
| 455 | * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 | ||
| 456 | */ | ||
| 457 | if (unlikely(OPTION_MD5 & options)) { | 414 | if (unlikely(OPTION_MD5 & options)) { |
| 458 | if (unlikely(OPTION_COOKIE_EXTENSION & options)) { | 415 | *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | |
| 459 | *ptr++ = htonl((TCPOPT_COOKIE << 24) | | 416 | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); |
| 460 | (TCPOLEN_COOKIE_BASE << 16) | | ||
| 461 | (TCPOPT_MD5SIG << 8) | | ||
| 462 | TCPOLEN_MD5SIG); | ||
| 463 | } else { | ||
| 464 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
| 465 | (TCPOPT_NOP << 16) | | ||
| 466 | (TCPOPT_MD5SIG << 8) | | ||
| 467 | TCPOLEN_MD5SIG); | ||
| 468 | } | ||
| 469 | options &= ~OPTION_COOKIE_EXTENSION; | ||
| 470 | /* overload cookie hash location */ | 417 | /* overload cookie hash location */ |
| 471 | opts->hash_location = (__u8 *)ptr; | 418 | opts->hash_location = (__u8 *)ptr; |
| 472 | ptr += 4; | 419 | ptr += 4; |
| @@ -495,44 +442,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
| 495 | *ptr++ = htonl(opts->tsecr); | 442 | *ptr++ = htonl(opts->tsecr); |
| 496 | } | 443 | } |
| 497 | 444 | ||
| 498 | /* Specification requires after timestamp, so do it now. | ||
| 499 | * | ||
| 500 | * Consider the pessimal case without authentication. The options | ||
| 501 | * could look like: | ||
| 502 | * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 | ||
| 503 | */ | ||
| 504 | if (unlikely(OPTION_COOKIE_EXTENSION & options)) { | ||
| 505 | __u8 *cookie_copy = opts->hash_location; | ||
| 506 | u8 cookie_size = opts->hash_size; | ||
| 507 | |||
| 508 | /* 8-bit multiple handled in tcp_cookie_size_check() above, | ||
| 509 | * and elsewhere. | ||
| 510 | */ | ||
| 511 | if (0x2 & cookie_size) { | ||
| 512 | __u8 *p = (__u8 *)ptr; | ||
| 513 | |||
| 514 | /* 16-bit multiple */ | ||
| 515 | *p++ = TCPOPT_COOKIE; | ||
| 516 | *p++ = TCPOLEN_COOKIE_BASE + cookie_size; | ||
| 517 | *p++ = *cookie_copy++; | ||
| 518 | *p++ = *cookie_copy++; | ||
| 519 | ptr++; | ||
| 520 | cookie_size -= 2; | ||
| 521 | } else { | ||
| 522 | /* 32-bit multiple */ | ||
| 523 | *ptr++ = htonl(((TCPOPT_NOP << 24) | | ||
| 524 | (TCPOPT_NOP << 16) | | ||
| 525 | (TCPOPT_COOKIE << 8) | | ||
| 526 | TCPOLEN_COOKIE_BASE) + | ||
| 527 | cookie_size); | ||
| 528 | } | ||
| 529 | |||
| 530 | if (cookie_size > 0) { | ||
| 531 | memcpy(ptr, cookie_copy, cookie_size); | ||
| 532 | ptr += (cookie_size / 4); | ||
| 533 | } | ||
| 534 | } | ||
| 535 | |||
| 536 | if (unlikely(OPTION_SACK_ADVERTISE & options)) { | 445 | if (unlikely(OPTION_SACK_ADVERTISE & options)) { |
| 537 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 446 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
| 538 | (TCPOPT_NOP << 16) | | 447 | (TCPOPT_NOP << 16) | |
| @@ -591,11 +500,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
| 591 | struct tcp_md5sig_key **md5) | 500 | struct tcp_md5sig_key **md5) |
| 592 | { | 501 | { |
| 593 | struct tcp_sock *tp = tcp_sk(sk); | 502 | struct tcp_sock *tp = tcp_sk(sk); |
| 594 | struct tcp_cookie_values *cvp = tp->cookie_values; | ||
| 595 | unsigned int remaining = MAX_TCP_OPTION_SPACE; | 503 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
| 596 | u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? | ||
| 597 | tcp_cookie_size_check(cvp->cookie_desired) : | ||
| 598 | 0; | ||
| 599 | struct tcp_fastopen_request *fastopen = tp->fastopen_req; | 504 | struct tcp_fastopen_request *fastopen = tp->fastopen_req; |
| 600 | 505 | ||
| 601 | #ifdef CONFIG_TCP_MD5SIG | 506 | #ifdef CONFIG_TCP_MD5SIG |
| @@ -647,52 +552,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
| 647 | tp->syn_fastopen = 1; | 552 | tp->syn_fastopen = 1; |
| 648 | } | 553 | } |
| 649 | } | 554 | } |
| 650 | /* Note that timestamps are required by the specification. | ||
| 651 | * | ||
| 652 | * Odd numbers of bytes are prohibited by the specification, ensuring | ||
| 653 | * that the cookie is 16-bit aligned, and the resulting cookie pair is | ||
| 654 | * 32-bit aligned. | ||
| 655 | */ | ||
| 656 | if (*md5 == NULL && | ||
| 657 | (OPTION_TS & opts->options) && | ||
| 658 | cookie_size > 0) { | ||
| 659 | int need = TCPOLEN_COOKIE_BASE + cookie_size; | ||
| 660 | |||
| 661 | if (0x2 & need) { | ||
| 662 | /* 32-bit multiple */ | ||
| 663 | need += 2; /* NOPs */ | ||
| 664 | |||
| 665 | if (need > remaining) { | ||
| 666 | /* try shrinking cookie to fit */ | ||
| 667 | cookie_size -= 2; | ||
| 668 | need -= 4; | ||
| 669 | } | ||
| 670 | } | ||
| 671 | while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { | ||
| 672 | cookie_size -= 4; | ||
| 673 | need -= 4; | ||
| 674 | } | ||
| 675 | if (TCP_COOKIE_MIN <= cookie_size) { | ||
| 676 | opts->options |= OPTION_COOKIE_EXTENSION; | ||
| 677 | opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; | ||
| 678 | opts->hash_size = cookie_size; | ||
| 679 | |||
| 680 | /* Remember for future incarnations. */ | ||
| 681 | cvp->cookie_desired = cookie_size; | ||
| 682 | |||
| 683 | if (cvp->cookie_desired != cvp->cookie_pair_size) { | ||
| 684 | /* Currently use random bytes as a nonce, | ||
| 685 | * assuming these are completely unpredictable | ||
| 686 | * by hostile users of the same system. | ||
| 687 | */ | ||
| 688 | get_random_bytes(&cvp->cookie_pair[0], | ||
| 689 | cookie_size); | ||
| 690 | cvp->cookie_pair_size = cookie_size; | ||
| 691 | } | ||
| 692 | 555 | ||
| 693 | remaining -= need; | ||
| 694 | } | ||
| 695 | } | ||
| 696 | return MAX_TCP_OPTION_SPACE - remaining; | 556 | return MAX_TCP_OPTION_SPACE - remaining; |
| 697 | } | 557 | } |
| 698 | 558 | ||
| @@ -702,14 +562,10 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
| 702 | unsigned int mss, struct sk_buff *skb, | 562 | unsigned int mss, struct sk_buff *skb, |
| 703 | struct tcp_out_options *opts, | 563 | struct tcp_out_options *opts, |
| 704 | struct tcp_md5sig_key **md5, | 564 | struct tcp_md5sig_key **md5, |
| 705 | struct tcp_extend_values *xvp, | ||
| 706 | struct tcp_fastopen_cookie *foc) | 565 | struct tcp_fastopen_cookie *foc) |
| 707 | { | 566 | { |
| 708 | struct inet_request_sock *ireq = inet_rsk(req); | 567 | struct inet_request_sock *ireq = inet_rsk(req); |
| 709 | unsigned int remaining = MAX_TCP_OPTION_SPACE; | 568 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
| 710 | u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? | ||
| 711 | xvp->cookie_plus : | ||
| 712 | 0; | ||
| 713 | 569 | ||
| 714 | #ifdef CONFIG_TCP_MD5SIG | 570 | #ifdef CONFIG_TCP_MD5SIG |
| 715 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); | 571 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); |
| @@ -757,28 +613,7 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
| 757 | remaining -= need; | 613 | remaining -= need; |
| 758 | } | 614 | } |
| 759 | } | 615 | } |
| 760 | /* Similar rationale to tcp_syn_options() applies here, too. | 616 | |
| 761 | * If the <SYN> options fit, the same options should fit now! | ||
| 762 | */ | ||
| 763 | if (*md5 == NULL && | ||
| 764 | ireq->tstamp_ok && | ||
| 765 | cookie_plus > TCPOLEN_COOKIE_BASE) { | ||
| 766 | int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ | ||
| 767 | |||
| 768 | if (0x2 & need) { | ||
| 769 | /* 32-bit multiple */ | ||
| 770 | need += 2; /* NOPs */ | ||
| 771 | } | ||
| 772 | if (need <= remaining) { | ||
| 773 | opts->options |= OPTION_COOKIE_EXTENSION; | ||
| 774 | opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; | ||
| 775 | remaining -= need; | ||
| 776 | } else { | ||
| 777 | /* There's no error return, so flag it. */ | ||
| 778 | xvp->cookie_out_never = 1; /* true */ | ||
| 779 | opts->hash_size = 0; | ||
| 780 | } | ||
| 781 | } | ||
| 782 | return MAX_TCP_OPTION_SPACE - remaining; | 617 | return MAX_TCP_OPTION_SPACE - remaining; |
| 783 | } | 618 | } |
| 784 | 619 | ||
| @@ -953,7 +788,7 @@ void __init tcp_tasklet_init(void) | |||
| 953 | * We cant xmit new skbs from this context, as we might already | 788 | * We cant xmit new skbs from this context, as we might already |
| 954 | * hold qdisc lock. | 789 | * hold qdisc lock. |
| 955 | */ | 790 | */ |
| 956 | static void tcp_wfree(struct sk_buff *skb) | 791 | void tcp_wfree(struct sk_buff *skb) |
| 957 | { | 792 | { |
| 958 | struct sock *sk = skb->sk; | 793 | struct sock *sk = skb->sk; |
| 959 | struct tcp_sock *tp = tcp_sk(sk); | 794 | struct tcp_sock *tp = tcp_sk(sk); |
| @@ -1012,6 +847,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 1012 | __net_timestamp(skb); | 847 | __net_timestamp(skb); |
| 1013 | 848 | ||
| 1014 | if (likely(clone_it)) { | 849 | if (likely(clone_it)) { |
| 850 | const struct sk_buff *fclone = skb + 1; | ||
| 851 | |||
| 852 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | ||
| 853 | fclone->fclone == SKB_FCLONE_CLONE)) | ||
| 854 | NET_INC_STATS_BH(sock_net(sk), | ||
| 855 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); | ||
| 856 | |||
| 1015 | if (unlikely(skb_cloned(skb))) | 857 | if (unlikely(skb_cloned(skb))) |
| 1016 | skb = pskb_copy(skb, gfp_mask); | 858 | skb = pskb_copy(skb, gfp_mask); |
| 1017 | else | 859 | else |
| @@ -1632,11 +1474,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf | |||
| 1632 | if (nonagle & TCP_NAGLE_PUSH) | 1474 | if (nonagle & TCP_NAGLE_PUSH) |
| 1633 | return true; | 1475 | return true; |
| 1634 | 1476 | ||
| 1635 | /* Don't use the nagle rule for urgent data (or for the final FIN). | 1477 | /* Don't use the nagle rule for urgent data (or for the final FIN). */ |
| 1636 | * Nagle can be ignored during F-RTO too (see RFC4138). | 1478 | if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) |
| 1637 | */ | ||
| 1638 | if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || | ||
| 1639 | (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) | ||
| 1640 | return true; | 1479 | return true; |
| 1641 | 1480 | ||
| 1642 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) | 1481 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) |
| @@ -1961,6 +1800,9 @@ static int tcp_mtu_probe(struct sock *sk) | |||
| 1961 | * snd_up-64k-mss .. snd_up cannot be large. However, taking into | 1800 | * snd_up-64k-mss .. snd_up cannot be large. However, taking into |
| 1962 | * account rare use of URG, this is not a big flaw. | 1801 | * account rare use of URG, this is not a big flaw. |
| 1963 | * | 1802 | * |
| 1803 | * Send at most one packet when push_one > 0. Temporarily ignore | ||
| 1804 | * cwnd limit to force at most one packet out when push_one == 2. | ||
| 1805 | |||
| 1964 | * Returns true, if no segments are in flight and we have queued segments, | 1806 | * Returns true, if no segments are in flight and we have queued segments, |
| 1965 | * but cannot send anything now because of SWS or another problem. | 1807 | * but cannot send anything now because of SWS or another problem. |
| 1966 | */ | 1808 | */ |
| @@ -1996,8 +1838,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1996 | goto repair; /* Skip network transmission */ | 1838 | goto repair; /* Skip network transmission */ |
| 1997 | 1839 | ||
| 1998 | cwnd_quota = tcp_cwnd_test(tp, skb); | 1840 | cwnd_quota = tcp_cwnd_test(tp, skb); |
| 1999 | if (!cwnd_quota) | 1841 | if (!cwnd_quota) { |
| 2000 | break; | 1842 | if (push_one == 2) |
| 1843 | /* Force out a loss probe pkt. */ | ||
| 1844 | cwnd_quota = 1; | ||
| 1845 | else | ||
| 1846 | break; | ||
| 1847 | } | ||
| 2001 | 1848 | ||
| 2002 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) | 1849 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) |
| 2003 | break; | 1850 | break; |
| @@ -2051,10 +1898,129 @@ repair: | |||
| 2051 | if (likely(sent_pkts)) { | 1898 | if (likely(sent_pkts)) { |
| 2052 | if (tcp_in_cwnd_reduction(sk)) | 1899 | if (tcp_in_cwnd_reduction(sk)) |
| 2053 | tp->prr_out += sent_pkts; | 1900 | tp->prr_out += sent_pkts; |
| 1901 | |||
| 1902 | /* Send one loss probe per tail loss episode. */ | ||
| 1903 | if (push_one != 2) | ||
| 1904 | tcp_schedule_loss_probe(sk); | ||
| 2054 | tcp_cwnd_validate(sk); | 1905 | tcp_cwnd_validate(sk); |
| 2055 | return false; | 1906 | return false; |
| 2056 | } | 1907 | } |
| 2057 | return !tp->packets_out && tcp_send_head(sk); | 1908 | return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); |
| 1909 | } | ||
| 1910 | |||
| 1911 | bool tcp_schedule_loss_probe(struct sock *sk) | ||
| 1912 | { | ||
| 1913 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 1914 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 1915 | u32 timeout, tlp_time_stamp, rto_time_stamp; | ||
| 1916 | u32 rtt = tp->srtt >> 3; | ||
| 1917 | |||
| 1918 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) | ||
| 1919 | return false; | ||
| 1920 | /* No consecutive loss probes. */ | ||
| 1921 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { | ||
| 1922 | tcp_rearm_rto(sk); | ||
| 1923 | return false; | ||
| 1924 | } | ||
| 1925 | /* Don't do any loss probe on a Fast Open connection before 3WHS | ||
| 1926 | * finishes. | ||
| 1927 | */ | ||
| 1928 | if (sk->sk_state == TCP_SYN_RECV) | ||
| 1929 | return false; | ||
| 1930 | |||
| 1931 | /* TLP is only scheduled when next timer event is RTO. */ | ||
| 1932 | if (icsk->icsk_pending != ICSK_TIME_RETRANS) | ||
| 1933 | return false; | ||
| 1934 | |||
| 1935 | /* Schedule a loss probe in 2*RTT for SACK capable connections | ||
| 1936 | * in Open state, that are either limited by cwnd or application. | ||
| 1937 | */ | ||
| 1938 | if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || | ||
| 1939 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | ||
| 1940 | return false; | ||
| 1941 | |||
| 1942 | if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && | ||
| 1943 | tcp_send_head(sk)) | ||
| 1944 | return false; | ||
| 1945 | |||
| 1946 | /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account | ||
| 1947 | * for delayed ack when there's one outstanding packet. | ||
| 1948 | */ | ||
| 1949 | timeout = rtt << 1; | ||
| 1950 | if (tp->packets_out == 1) | ||
| 1951 | timeout = max_t(u32, timeout, | ||
| 1952 | (rtt + (rtt >> 1) + TCP_DELACK_MAX)); | ||
| 1953 | timeout = max_t(u32, timeout, msecs_to_jiffies(10)); | ||
| 1954 | |||
| 1955 | /* If RTO is shorter, just schedule TLP in its place. */ | ||
| 1956 | tlp_time_stamp = tcp_time_stamp + timeout; | ||
| 1957 | rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; | ||
| 1958 | if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { | ||
| 1959 | s32 delta = rto_time_stamp - tcp_time_stamp; | ||
| 1960 | if (delta > 0) | ||
| 1961 | timeout = delta; | ||
| 1962 | } | ||
| 1963 | |||
| 1964 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, | ||
| 1965 | TCP_RTO_MAX); | ||
| 1966 | return true; | ||
| 1967 | } | ||
| 1968 | |||
| 1969 | /* When probe timeout (PTO) fires, send a new segment if one exists, else | ||
| 1970 | * retransmit the last segment. | ||
| 1971 | */ | ||
| 1972 | void tcp_send_loss_probe(struct sock *sk) | ||
| 1973 | { | ||
| 1974 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 1975 | struct sk_buff *skb; | ||
| 1976 | int pcount; | ||
| 1977 | int mss = tcp_current_mss(sk); | ||
| 1978 | int err = -1; | ||
| 1979 | |||
| 1980 | if (tcp_send_head(sk) != NULL) { | ||
| 1981 | err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); | ||
| 1982 | goto rearm_timer; | ||
| 1983 | } | ||
| 1984 | |||
| 1985 | /* At most one outstanding TLP retransmission. */ | ||
| 1986 | if (tp->tlp_high_seq) | ||
| 1987 | goto rearm_timer; | ||
| 1988 | |||
| 1989 | /* Retransmit last segment. */ | ||
| 1990 | skb = tcp_write_queue_tail(sk); | ||
| 1991 | if (WARN_ON(!skb)) | ||
| 1992 | goto rearm_timer; | ||
| 1993 | |||
| 1994 | pcount = tcp_skb_pcount(skb); | ||
| 1995 | if (WARN_ON(!pcount)) | ||
| 1996 | goto rearm_timer; | ||
| 1997 | |||
| 1998 | if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { | ||
| 1999 | if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) | ||
| 2000 | goto rearm_timer; | ||
| 2001 | skb = tcp_write_queue_tail(sk); | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | if (WARN_ON(!skb || !tcp_skb_pcount(skb))) | ||
| 2005 | goto rearm_timer; | ||
| 2006 | |||
| 2007 | /* Probe with zero data doesn't trigger fast recovery. */ | ||
| 2008 | if (skb->len > 0) | ||
| 2009 | err = __tcp_retransmit_skb(sk, skb); | ||
| 2010 | |||
| 2011 | /* Record snd_nxt for loss detection. */ | ||
| 2012 | if (likely(!err)) | ||
| 2013 | tp->tlp_high_seq = tp->snd_nxt; | ||
| 2014 | |||
| 2015 | rearm_timer: | ||
| 2016 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
| 2017 | inet_csk(sk)->icsk_rto, | ||
| 2018 | TCP_RTO_MAX); | ||
| 2019 | |||
| 2020 | if (likely(!err)) | ||
| 2021 | NET_INC_STATS_BH(sock_net(sk), | ||
| 2022 | LINUX_MIB_TCPLOSSPROBES); | ||
| 2023 | return; | ||
| 2058 | } | 2024 | } |
| 2059 | 2025 | ||
| 2060 | /* Push out any pending frames which were held back due to | 2026 | /* Push out any pending frames which were held back due to |
| @@ -2679,32 +2645,24 @@ int tcp_send_synack(struct sock *sk) | |||
| 2679 | * sk: listener socket | 2645 | * sk: listener socket |
| 2680 | * dst: dst entry attached to the SYNACK | 2646 | * dst: dst entry attached to the SYNACK |
| 2681 | * req: request_sock pointer | 2647 | * req: request_sock pointer |
| 2682 | * rvp: request_values pointer | ||
| 2683 | * | 2648 | * |
| 2684 | * Allocate one skb and build a SYNACK packet. | 2649 | * Allocate one skb and build a SYNACK packet. |
| 2685 | * @dst is consumed : Caller should not use it again. | 2650 | * @dst is consumed : Caller should not use it again. |
| 2686 | */ | 2651 | */ |
| 2687 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | 2652 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, |
| 2688 | struct request_sock *req, | 2653 | struct request_sock *req, |
| 2689 | struct request_values *rvp, | ||
| 2690 | struct tcp_fastopen_cookie *foc) | 2654 | struct tcp_fastopen_cookie *foc) |
| 2691 | { | 2655 | { |
| 2692 | struct tcp_out_options opts; | 2656 | struct tcp_out_options opts; |
| 2693 | struct tcp_extend_values *xvp = tcp_xv(rvp); | ||
| 2694 | struct inet_request_sock *ireq = inet_rsk(req); | 2657 | struct inet_request_sock *ireq = inet_rsk(req); |
| 2695 | struct tcp_sock *tp = tcp_sk(sk); | 2658 | struct tcp_sock *tp = tcp_sk(sk); |
| 2696 | const struct tcp_cookie_values *cvp = tp->cookie_values; | ||
| 2697 | struct tcphdr *th; | 2659 | struct tcphdr *th; |
| 2698 | struct sk_buff *skb; | 2660 | struct sk_buff *skb; |
| 2699 | struct tcp_md5sig_key *md5; | 2661 | struct tcp_md5sig_key *md5; |
| 2700 | int tcp_header_size; | 2662 | int tcp_header_size; |
| 2701 | int mss; | 2663 | int mss; |
| 2702 | int s_data_desired = 0; | ||
| 2703 | 2664 | ||
| 2704 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) | 2665 | skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); |
| 2705 | s_data_desired = cvp->s_data_desired; | ||
| 2706 | skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, | ||
| 2707 | sk_gfp_atomic(sk, GFP_ATOMIC)); | ||
| 2708 | if (unlikely(!skb)) { | 2666 | if (unlikely(!skb)) { |
| 2709 | dst_release(dst); | 2667 | dst_release(dst); |
| 2710 | return NULL; | 2668 | return NULL; |
| @@ -2747,9 +2705,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2747 | else | 2705 | else |
| 2748 | #endif | 2706 | #endif |
| 2749 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2707 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 2750 | tcp_header_size = tcp_synack_options(sk, req, mss, | 2708 | tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, |
| 2751 | skb, &opts, &md5, xvp, foc) | 2709 | foc) + sizeof(*th); |
| 2752 | + sizeof(*th); | ||
| 2753 | 2710 | ||
| 2754 | skb_push(skb, tcp_header_size); | 2711 | skb_push(skb, tcp_header_size); |
| 2755 | skb_reset_transport_header(skb); | 2712 | skb_reset_transport_header(skb); |
| @@ -2767,40 +2724,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2767 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, | 2724 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, |
| 2768 | TCPHDR_SYN | TCPHDR_ACK); | 2725 | TCPHDR_SYN | TCPHDR_ACK); |
| 2769 | 2726 | ||
| 2770 | if (OPTION_COOKIE_EXTENSION & opts.options) { | ||
| 2771 | if (s_data_desired) { | ||
| 2772 | u8 *buf = skb_put(skb, s_data_desired); | ||
| 2773 | |||
| 2774 | /* copy data directly from the listening socket. */ | ||
| 2775 | memcpy(buf, cvp->s_data_payload, s_data_desired); | ||
| 2776 | TCP_SKB_CB(skb)->end_seq += s_data_desired; | ||
| 2777 | } | ||
| 2778 | |||
| 2779 | if (opts.hash_size > 0) { | ||
| 2780 | __u32 workspace[SHA_WORKSPACE_WORDS]; | ||
| 2781 | u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; | ||
| 2782 | u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; | ||
| 2783 | |||
| 2784 | /* Secret recipe depends on the Timestamp, (future) | ||
| 2785 | * Sequence and Acknowledgment Numbers, Initiator | ||
| 2786 | * Cookie, and others handled by IP variant caller. | ||
| 2787 | */ | ||
| 2788 | *tail-- ^= opts.tsval; | ||
| 2789 | *tail-- ^= tcp_rsk(req)->rcv_isn + 1; | ||
| 2790 | *tail-- ^= TCP_SKB_CB(skb)->seq + 1; | ||
| 2791 | |||
| 2792 | /* recommended */ | ||
| 2793 | *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); | ||
| 2794 | *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ | ||
| 2795 | |||
| 2796 | sha_transform((__u32 *)&xvp->cookie_bakery[0], | ||
| 2797 | (char *)mess, | ||
| 2798 | &workspace[0]); | ||
| 2799 | opts.hash_location = | ||
| 2800 | (__u8 *)&xvp->cookie_bakery[0]; | ||
| 2801 | } | ||
| 2802 | } | ||
| 2803 | |||
| 2804 | th->seq = htonl(TCP_SKB_CB(skb)->seq); | 2727 | th->seq = htonl(TCP_SKB_CB(skb)->seq); |
| 2805 | /* XXX data is queued and acked as is. No buffer/window check */ | 2728 | /* XXX data is queued and acked as is. No buffer/window check */ |
| 2806 | th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); | 2729 | th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); |
