aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c394
1 files changed, 162 insertions, 232 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e2b4461074da..536d40929ba6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,28 +65,24 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
65/* By default, RFC2861 behavior. */ 65/* By default, RFC2861 behavior. */
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67 67
68int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
69EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
70
71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
72 int push_one, gfp_t gfp); 69 int push_one, gfp_t gfp);
73 70
74/* Account for new data that has been sent to the network. */ 71/* Account for new data that has been sent to the network. */
75static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) 72static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
76{ 73{
74 struct inet_connection_sock *icsk = inet_csk(sk);
77 struct tcp_sock *tp = tcp_sk(sk); 75 struct tcp_sock *tp = tcp_sk(sk);
78 unsigned int prior_packets = tp->packets_out; 76 unsigned int prior_packets = tp->packets_out;
79 77
80 tcp_advance_send_head(sk, skb); 78 tcp_advance_send_head(sk, skb);
81 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 79 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
82 80
83 /* Don't override Nagle indefinitely with F-RTO */
84 if (tp->frto_counter == 2)
85 tp->frto_counter = 3;
86
87 tp->packets_out += tcp_skb_pcount(skb); 81 tp->packets_out += tcp_skb_pcount(skb);
88 if (!prior_packets || tp->early_retrans_delayed) 82 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
83 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
89 tcp_rearm_rto(sk); 84 tcp_rearm_rto(sk);
85 }
90} 86}
91 87
92/* SND.NXT, if window was not shrunk. 88/* SND.NXT, if window was not shrunk.
@@ -384,7 +380,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
384#define OPTION_TS (1 << 1) 380#define OPTION_TS (1 << 1)
385#define OPTION_MD5 (1 << 2) 381#define OPTION_MD5 (1 << 2)
386#define OPTION_WSCALE (1 << 3) 382#define OPTION_WSCALE (1 << 3)
387#define OPTION_COOKIE_EXTENSION (1 << 4)
388#define OPTION_FAST_OPEN_COOKIE (1 << 8) 383#define OPTION_FAST_OPEN_COOKIE (1 << 8)
389 384
390struct tcp_out_options { 385struct tcp_out_options {
@@ -398,36 +393,6 @@ struct tcp_out_options {
398 struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ 393 struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
399}; 394};
400 395
401/* The sysctl int routines are generic, so check consistency here.
402 */
403static u8 tcp_cookie_size_check(u8 desired)
404{
405 int cookie_size;
406
407 if (desired > 0)
408 /* previously specified */
409 return desired;
410
411 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
412 if (cookie_size <= 0)
413 /* no default specified */
414 return 0;
415
416 if (cookie_size <= TCP_COOKIE_MIN)
417 /* value too small, specify minimum */
418 return TCP_COOKIE_MIN;
419
420 if (cookie_size >= TCP_COOKIE_MAX)
421 /* value too large, specify maximum */
422 return TCP_COOKIE_MAX;
423
424 if (cookie_size & 1)
425 /* 8-bit multiple, illegal, fix it */
426 cookie_size++;
427
428 return (u8)cookie_size;
429}
430
431/* Write previously computed TCP options to the packet. 396/* Write previously computed TCP options to the packet.
432 * 397 *
433 * Beware: Something in the Internet is very sensitive to the ordering of 398 * Beware: Something in the Internet is very sensitive to the ordering of
@@ -446,27 +411,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
446{ 411{
447 u16 options = opts->options; /* mungable copy */ 412 u16 options = opts->options; /* mungable copy */
448 413
449 /* Having both authentication and cookies for security is redundant,
450 * and there's certainly not enough room. Instead, the cookie-less
451 * extension variant is proposed.
452 *
453 * Consider the pessimal case with authentication. The options
454 * could look like:
455 * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40
456 */
457 if (unlikely(OPTION_MD5 & options)) { 414 if (unlikely(OPTION_MD5 & options)) {
458 if (unlikely(OPTION_COOKIE_EXTENSION & options)) { 415 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
459 *ptr++ = htonl((TCPOPT_COOKIE << 24) | 416 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
460 (TCPOLEN_COOKIE_BASE << 16) |
461 (TCPOPT_MD5SIG << 8) |
462 TCPOLEN_MD5SIG);
463 } else {
464 *ptr++ = htonl((TCPOPT_NOP << 24) |
465 (TCPOPT_NOP << 16) |
466 (TCPOPT_MD5SIG << 8) |
467 TCPOLEN_MD5SIG);
468 }
469 options &= ~OPTION_COOKIE_EXTENSION;
470 /* overload cookie hash location */ 417 /* overload cookie hash location */
471 opts->hash_location = (__u8 *)ptr; 418 opts->hash_location = (__u8 *)ptr;
472 ptr += 4; 419 ptr += 4;
@@ -495,44 +442,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
495 *ptr++ = htonl(opts->tsecr); 442 *ptr++ = htonl(opts->tsecr);
496 } 443 }
497 444
498 /* Specification requires after timestamp, so do it now.
499 *
500 * Consider the pessimal case without authentication. The options
501 * could look like:
502 * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40
503 */
504 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
505 __u8 *cookie_copy = opts->hash_location;
506 u8 cookie_size = opts->hash_size;
507
508 /* 8-bit multiple handled in tcp_cookie_size_check() above,
509 * and elsewhere.
510 */
511 if (0x2 & cookie_size) {
512 __u8 *p = (__u8 *)ptr;
513
514 /* 16-bit multiple */
515 *p++ = TCPOPT_COOKIE;
516 *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
517 *p++ = *cookie_copy++;
518 *p++ = *cookie_copy++;
519 ptr++;
520 cookie_size -= 2;
521 } else {
522 /* 32-bit multiple */
523 *ptr++ = htonl(((TCPOPT_NOP << 24) |
524 (TCPOPT_NOP << 16) |
525 (TCPOPT_COOKIE << 8) |
526 TCPOLEN_COOKIE_BASE) +
527 cookie_size);
528 }
529
530 if (cookie_size > 0) {
531 memcpy(ptr, cookie_copy, cookie_size);
532 ptr += (cookie_size / 4);
533 }
534 }
535
536 if (unlikely(OPTION_SACK_ADVERTISE & options)) { 445 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
537 *ptr++ = htonl((TCPOPT_NOP << 24) | 446 *ptr++ = htonl((TCPOPT_NOP << 24) |
538 (TCPOPT_NOP << 16) | 447 (TCPOPT_NOP << 16) |
@@ -591,11 +500,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
591 struct tcp_md5sig_key **md5) 500 struct tcp_md5sig_key **md5)
592{ 501{
593 struct tcp_sock *tp = tcp_sk(sk); 502 struct tcp_sock *tp = tcp_sk(sk);
594 struct tcp_cookie_values *cvp = tp->cookie_values;
595 unsigned int remaining = MAX_TCP_OPTION_SPACE; 503 unsigned int remaining = MAX_TCP_OPTION_SPACE;
596 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
597 tcp_cookie_size_check(cvp->cookie_desired) :
598 0;
599 struct tcp_fastopen_request *fastopen = tp->fastopen_req; 504 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
600 505
601#ifdef CONFIG_TCP_MD5SIG 506#ifdef CONFIG_TCP_MD5SIG
@@ -647,52 +552,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
647 tp->syn_fastopen = 1; 552 tp->syn_fastopen = 1;
648 } 553 }
649 } 554 }
650 /* Note that timestamps are required by the specification.
651 *
652 * Odd numbers of bytes are prohibited by the specification, ensuring
653 * that the cookie is 16-bit aligned, and the resulting cookie pair is
654 * 32-bit aligned.
655 */
656 if (*md5 == NULL &&
657 (OPTION_TS & opts->options) &&
658 cookie_size > 0) {
659 int need = TCPOLEN_COOKIE_BASE + cookie_size;
660
661 if (0x2 & need) {
662 /* 32-bit multiple */
663 need += 2; /* NOPs */
664
665 if (need > remaining) {
666 /* try shrinking cookie to fit */
667 cookie_size -= 2;
668 need -= 4;
669 }
670 }
671 while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
672 cookie_size -= 4;
673 need -= 4;
674 }
675 if (TCP_COOKIE_MIN <= cookie_size) {
676 opts->options |= OPTION_COOKIE_EXTENSION;
677 opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
678 opts->hash_size = cookie_size;
679
680 /* Remember for future incarnations. */
681 cvp->cookie_desired = cookie_size;
682
683 if (cvp->cookie_desired != cvp->cookie_pair_size) {
684 /* Currently use random bytes as a nonce,
685 * assuming these are completely unpredictable
686 * by hostile users of the same system.
687 */
688 get_random_bytes(&cvp->cookie_pair[0],
689 cookie_size);
690 cvp->cookie_pair_size = cookie_size;
691 }
692 555
693 remaining -= need;
694 }
695 }
696 return MAX_TCP_OPTION_SPACE - remaining; 556 return MAX_TCP_OPTION_SPACE - remaining;
697} 557}
698 558
@@ -702,14 +562,10 @@ static unsigned int tcp_synack_options(struct sock *sk,
702 unsigned int mss, struct sk_buff *skb, 562 unsigned int mss, struct sk_buff *skb,
703 struct tcp_out_options *opts, 563 struct tcp_out_options *opts,
704 struct tcp_md5sig_key **md5, 564 struct tcp_md5sig_key **md5,
705 struct tcp_extend_values *xvp,
706 struct tcp_fastopen_cookie *foc) 565 struct tcp_fastopen_cookie *foc)
707{ 566{
708 struct inet_request_sock *ireq = inet_rsk(req); 567 struct inet_request_sock *ireq = inet_rsk(req);
709 unsigned int remaining = MAX_TCP_OPTION_SPACE; 568 unsigned int remaining = MAX_TCP_OPTION_SPACE;
710 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
711 xvp->cookie_plus :
712 0;
713 569
714#ifdef CONFIG_TCP_MD5SIG 570#ifdef CONFIG_TCP_MD5SIG
715 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); 571 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
@@ -757,28 +613,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
757 remaining -= need; 613 remaining -= need;
758 } 614 }
759 } 615 }
760 /* Similar rationale to tcp_syn_options() applies here, too. 616
761 * If the <SYN> options fit, the same options should fit now!
762 */
763 if (*md5 == NULL &&
764 ireq->tstamp_ok &&
765 cookie_plus > TCPOLEN_COOKIE_BASE) {
766 int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
767
768 if (0x2 & need) {
769 /* 32-bit multiple */
770 need += 2; /* NOPs */
771 }
772 if (need <= remaining) {
773 opts->options |= OPTION_COOKIE_EXTENSION;
774 opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
775 remaining -= need;
776 } else {
777 /* There's no error return, so flag it. */
778 xvp->cookie_out_never = 1; /* true */
779 opts->hash_size = 0;
780 }
781 }
782 return MAX_TCP_OPTION_SPACE - remaining; 617 return MAX_TCP_OPTION_SPACE - remaining;
783} 618}
784 619
@@ -953,7 +788,7 @@ void __init tcp_tasklet_init(void)
953 * We cant xmit new skbs from this context, as we might already 788 * We cant xmit new skbs from this context, as we might already
954 * hold qdisc lock. 789 * hold qdisc lock.
955 */ 790 */
956static void tcp_wfree(struct sk_buff *skb) 791void tcp_wfree(struct sk_buff *skb)
957{ 792{
958 struct sock *sk = skb->sk; 793 struct sock *sk = skb->sk;
959 struct tcp_sock *tp = tcp_sk(sk); 794 struct tcp_sock *tp = tcp_sk(sk);
@@ -1012,6 +847,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1012 __net_timestamp(skb); 847 __net_timestamp(skb);
1013 848
1014 if (likely(clone_it)) { 849 if (likely(clone_it)) {
850 const struct sk_buff *fclone = skb + 1;
851
852 if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
853 fclone->fclone == SKB_FCLONE_CLONE))
854 NET_INC_STATS_BH(sock_net(sk),
855 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
856
1015 if (unlikely(skb_cloned(skb))) 857 if (unlikely(skb_cloned(skb)))
1016 skb = pskb_copy(skb, gfp_mask); 858 skb = pskb_copy(skb, gfp_mask);
1017 else 859 else
@@ -1298,7 +1140,6 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
1298 eat = min_t(int, len, skb_headlen(skb)); 1140 eat = min_t(int, len, skb_headlen(skb));
1299 if (eat) { 1141 if (eat) {
1300 __skb_pull(skb, eat); 1142 __skb_pull(skb, eat);
1301 skb->avail_size -= eat;
1302 len -= eat; 1143 len -= eat;
1303 if (!len) 1144 if (!len)
1304 return; 1145 return;
@@ -1633,11 +1474,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
1633 if (nonagle & TCP_NAGLE_PUSH) 1474 if (nonagle & TCP_NAGLE_PUSH)
1634 return true; 1475 return true;
1635 1476
1636 /* Don't use the nagle rule for urgent data (or for the final FIN). 1477 /* Don't use the nagle rule for urgent data (or for the final FIN). */
1637 * Nagle can be ignored during F-RTO too (see RFC4138). 1478 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1638 */
1639 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1640 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1641 return true; 1479 return true;
1642 1480
1643 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) 1481 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1810,8 +1648,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1810 goto send_now; 1648 goto send_now;
1811 } 1649 }
1812 1650
1813 /* Ok, it looks like it is advisable to defer. */ 1651 /* Ok, it looks like it is advisable to defer.
1814 tp->tso_deferred = 1 | (jiffies << 1); 1652 * Do not rearm the timer if already set to not break TCP ACK clocking.
1653 */
1654 if (!tp->tso_deferred)
1655 tp->tso_deferred = 1 | (jiffies << 1);
1815 1656
1816 return true; 1657 return true;
1817 1658
@@ -1959,6 +1800,9 @@ static int tcp_mtu_probe(struct sock *sk)
1959 * snd_up-64k-mss .. snd_up cannot be large. However, taking into 1800 * snd_up-64k-mss .. snd_up cannot be large. However, taking into
1960 * account rare use of URG, this is not a big flaw. 1801 * account rare use of URG, this is not a big flaw.
1961 * 1802 *
1803 * Send at most one packet when push_one > 0. Temporarily ignore
1804 * cwnd limit to force at most one packet out when push_one == 2.
1805
1962 * Returns true, if no segments are in flight and we have queued segments, 1806 * Returns true, if no segments are in flight and we have queued segments,
1963 * but cannot send anything now because of SWS or another problem. 1807 * but cannot send anything now because of SWS or another problem.
1964 */ 1808 */
@@ -1994,8 +1838,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1994 goto repair; /* Skip network transmission */ 1838 goto repair; /* Skip network transmission */
1995 1839
1996 cwnd_quota = tcp_cwnd_test(tp, skb); 1840 cwnd_quota = tcp_cwnd_test(tp, skb);
1997 if (!cwnd_quota) 1841 if (!cwnd_quota) {
1998 break; 1842 if (push_one == 2)
1843 /* Force out a loss probe pkt. */
1844 cwnd_quota = 1;
1845 else
1846 break;
1847 }
1999 1848
2000 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) 1849 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2001 break; 1850 break;
@@ -2049,10 +1898,129 @@ repair:
2049 if (likely(sent_pkts)) { 1898 if (likely(sent_pkts)) {
2050 if (tcp_in_cwnd_reduction(sk)) 1899 if (tcp_in_cwnd_reduction(sk))
2051 tp->prr_out += sent_pkts; 1900 tp->prr_out += sent_pkts;
1901
1902 /* Send one loss probe per tail loss episode. */
1903 if (push_one != 2)
1904 tcp_schedule_loss_probe(sk);
2052 tcp_cwnd_validate(sk); 1905 tcp_cwnd_validate(sk);
2053 return false; 1906 return false;
2054 } 1907 }
2055 return !tp->packets_out && tcp_send_head(sk); 1908 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
1909}
1910
1911bool tcp_schedule_loss_probe(struct sock *sk)
1912{
1913 struct inet_connection_sock *icsk = inet_csk(sk);
1914 struct tcp_sock *tp = tcp_sk(sk);
1915 u32 timeout, tlp_time_stamp, rto_time_stamp;
1916 u32 rtt = tp->srtt >> 3;
1917
1918 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
1919 return false;
1920 /* No consecutive loss probes. */
1921 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
1922 tcp_rearm_rto(sk);
1923 return false;
1924 }
1925 /* Don't do any loss probe on a Fast Open connection before 3WHS
1926 * finishes.
1927 */
1928 if (sk->sk_state == TCP_SYN_RECV)
1929 return false;
1930
1931 /* TLP is only scheduled when next timer event is RTO. */
1932 if (icsk->icsk_pending != ICSK_TIME_RETRANS)
1933 return false;
1934
1935 /* Schedule a loss probe in 2*RTT for SACK capable connections
1936 * in Open state, that are either limited by cwnd or application.
1937 */
1938 if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
1939 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
1940 return false;
1941
1942 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
1943 tcp_send_head(sk))
1944 return false;
1945
1946 /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
1947 * for delayed ack when there's one outstanding packet.
1948 */
1949 timeout = rtt << 1;
1950 if (tp->packets_out == 1)
1951 timeout = max_t(u32, timeout,
1952 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
1953 timeout = max_t(u32, timeout, msecs_to_jiffies(10));
1954
1955 /* If RTO is shorter, just schedule TLP in its place. */
1956 tlp_time_stamp = tcp_time_stamp + timeout;
1957 rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
1958 if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
1959 s32 delta = rto_time_stamp - tcp_time_stamp;
1960 if (delta > 0)
1961 timeout = delta;
1962 }
1963
1964 inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
1965 TCP_RTO_MAX);
1966 return true;
1967}
1968
1969/* When probe timeout (PTO) fires, send a new segment if one exists, else
1970 * retransmit the last segment.
1971 */
1972void tcp_send_loss_probe(struct sock *sk)
1973{
1974 struct tcp_sock *tp = tcp_sk(sk);
1975 struct sk_buff *skb;
1976 int pcount;
1977 int mss = tcp_current_mss(sk);
1978 int err = -1;
1979
1980 if (tcp_send_head(sk) != NULL) {
1981 err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
1982 goto rearm_timer;
1983 }
1984
1985 /* At most one outstanding TLP retransmission. */
1986 if (tp->tlp_high_seq)
1987 goto rearm_timer;
1988
1989 /* Retransmit last segment. */
1990 skb = tcp_write_queue_tail(sk);
1991 if (WARN_ON(!skb))
1992 goto rearm_timer;
1993
1994 pcount = tcp_skb_pcount(skb);
1995 if (WARN_ON(!pcount))
1996 goto rearm_timer;
1997
1998 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
1999 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
2000 goto rearm_timer;
2001 skb = tcp_write_queue_tail(sk);
2002 }
2003
2004 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2005 goto rearm_timer;
2006
2007 /* Probe with zero data doesn't trigger fast recovery. */
2008 if (skb->len > 0)
2009 err = __tcp_retransmit_skb(sk, skb);
2010
2011 /* Record snd_nxt for loss detection. */
2012 if (likely(!err))
2013 tp->tlp_high_seq = tp->snd_nxt;
2014
2015rearm_timer:
2016 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2017 inet_csk(sk)->icsk_rto,
2018 TCP_RTO_MAX);
2019
2020 if (likely(!err))
2021 NET_INC_STATS_BH(sock_net(sk),
2022 LINUX_MIB_TCPLOSSPROBES);
2023 return;
2056} 2024}
2057 2025
2058/* Push out any pending frames which were held back due to 2026/* Push out any pending frames which were held back due to
@@ -2386,8 +2354,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2386 */ 2354 */
2387 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2355 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2388 2356
2389 /* make sure skb->data is aligned on arches that require it */ 2357 /* make sure skb->data is aligned on arches that require it
2390 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { 2358 * and check if ack-trimming & collapsing extended the headroom
2359 * beyond what csum_start can cover.
2360 */
2361 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2362 skb_headroom(skb) >= 0xFFFF)) {
2391 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, 2363 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2392 GFP_ATOMIC); 2364 GFP_ATOMIC);
2393 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : 2365 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
@@ -2673,32 +2645,24 @@ int tcp_send_synack(struct sock *sk)
2673 * sk: listener socket 2645 * sk: listener socket
2674 * dst: dst entry attached to the SYNACK 2646 * dst: dst entry attached to the SYNACK
2675 * req: request_sock pointer 2647 * req: request_sock pointer
2676 * rvp: request_values pointer
2677 * 2648 *
2678 * Allocate one skb and build a SYNACK packet. 2649 * Allocate one skb and build a SYNACK packet.
2679 * @dst is consumed : Caller should not use it again. 2650 * @dst is consumed : Caller should not use it again.
2680 */ 2651 */
2681struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2652struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2682 struct request_sock *req, 2653 struct request_sock *req,
2683 struct request_values *rvp,
2684 struct tcp_fastopen_cookie *foc) 2654 struct tcp_fastopen_cookie *foc)
2685{ 2655{
2686 struct tcp_out_options opts; 2656 struct tcp_out_options opts;
2687 struct tcp_extend_values *xvp = tcp_xv(rvp);
2688 struct inet_request_sock *ireq = inet_rsk(req); 2657 struct inet_request_sock *ireq = inet_rsk(req);
2689 struct tcp_sock *tp = tcp_sk(sk); 2658 struct tcp_sock *tp = tcp_sk(sk);
2690 const struct tcp_cookie_values *cvp = tp->cookie_values;
2691 struct tcphdr *th; 2659 struct tcphdr *th;
2692 struct sk_buff *skb; 2660 struct sk_buff *skb;
2693 struct tcp_md5sig_key *md5; 2661 struct tcp_md5sig_key *md5;
2694 int tcp_header_size; 2662 int tcp_header_size;
2695 int mss; 2663 int mss;
2696 int s_data_desired = 0;
2697 2664
2698 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) 2665 skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));
2699 s_data_desired = cvp->s_data_desired;
2700 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
2701 sk_gfp_atomic(sk, GFP_ATOMIC));
2702 if (unlikely(!skb)) { 2666 if (unlikely(!skb)) {
2703 dst_release(dst); 2667 dst_release(dst);
2704 return NULL; 2668 return NULL;
@@ -2707,6 +2671,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2707 skb_reserve(skb, MAX_TCP_HEADER); 2671 skb_reserve(skb, MAX_TCP_HEADER);
2708 2672
2709 skb_dst_set(skb, dst); 2673 skb_dst_set(skb, dst);
2674 security_skb_owned_by(skb, sk);
2710 2675
2711 mss = dst_metric_advmss(dst); 2676 mss = dst_metric_advmss(dst);
2712 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2677 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
@@ -2740,9 +2705,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2740 else 2705 else
2741#endif 2706#endif
2742 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2707 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2743 tcp_header_size = tcp_synack_options(sk, req, mss, 2708 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
2744 skb, &opts, &md5, xvp, foc) 2709 foc) + sizeof(*th);
2745 + sizeof(*th);
2746 2710
2747 skb_push(skb, tcp_header_size); 2711 skb_push(skb, tcp_header_size);
2748 skb_reset_transport_header(skb); 2712 skb_reset_transport_header(skb);
@@ -2760,40 +2724,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2760 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, 2724 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2761 TCPHDR_SYN | TCPHDR_ACK); 2725 TCPHDR_SYN | TCPHDR_ACK);
2762 2726
2763 if (OPTION_COOKIE_EXTENSION & opts.options) {
2764 if (s_data_desired) {
2765 u8 *buf = skb_put(skb, s_data_desired);
2766
2767 /* copy data directly from the listening socket. */
2768 memcpy(buf, cvp->s_data_payload, s_data_desired);
2769 TCP_SKB_CB(skb)->end_seq += s_data_desired;
2770 }
2771
2772 if (opts.hash_size > 0) {
2773 __u32 workspace[SHA_WORKSPACE_WORDS];
2774 u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
2775 u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
2776
2777 /* Secret recipe depends on the Timestamp, (future)
2778 * Sequence and Acknowledgment Numbers, Initiator
2779 * Cookie, and others handled by IP variant caller.
2780 */
2781 *tail-- ^= opts.tsval;
2782 *tail-- ^= tcp_rsk(req)->rcv_isn + 1;
2783 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2784
2785 /* recommended */
2786 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2787 *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
2788
2789 sha_transform((__u32 *)&xvp->cookie_bakery[0],
2790 (char *)mess,
2791 &workspace[0]);
2792 opts.hash_location =
2793 (__u8 *)&xvp->cookie_bakery[0];
2794 }
2795 }
2796
2797 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2727 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2798 /* XXX data is queued and acked as is. No buffer/window check */ 2728 /* XXX data is queued and acked as is. No buffer/window check */
2799 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); 2729 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);