aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c241
1 files changed, 165 insertions, 76 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a2a796c5536b..a369e8a70b2c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -518,17 +518,26 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
518 518
519 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { 519 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
520 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; 520 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
521 u8 *p = (u8 *)ptr;
522 u32 len; /* Fast Open option length */
523
524 if (foc->exp) {
525 len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
526 *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
527 TCPOPT_FASTOPEN_MAGIC);
528 p += TCPOLEN_EXP_FASTOPEN_BASE;
529 } else {
530 len = TCPOLEN_FASTOPEN_BASE + foc->len;
531 *p++ = TCPOPT_FASTOPEN;
532 *p++ = len;
533 }
521 534
522 *ptr++ = htonl((TCPOPT_EXP << 24) | 535 memcpy(p, foc->val, foc->len);
523 ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) | 536 if ((len & 3) == 2) {
524 TCPOPT_FASTOPEN_MAGIC); 537 p[foc->len] = TCPOPT_NOP;
525 538 p[foc->len + 1] = TCPOPT_NOP;
526 memcpy(ptr, foc->val, foc->len);
527 if ((foc->len & 3) == 2) {
528 u8 *align = ((u8 *)ptr) + foc->len;
529 align[0] = align[1] = TCPOPT_NOP;
530 } 539 }
531 ptr += (foc->len + 3) >> 2; 540 ptr += (len + 3) >> 2;
532 } 541 }
533} 542}
534 543
@@ -565,7 +574,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
565 opts->mss = tcp_advertise_mss(sk); 574 opts->mss = tcp_advertise_mss(sk);
566 remaining -= TCPOLEN_MSS_ALIGNED; 575 remaining -= TCPOLEN_MSS_ALIGNED;
567 576
568 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { 577 if (likely(sysctl_tcp_timestamps && !*md5)) {
569 opts->options |= OPTION_TS; 578 opts->options |= OPTION_TS;
570 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; 579 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
571 opts->tsecr = tp->rx_opt.ts_recent; 580 opts->tsecr = tp->rx_opt.ts_recent;
@@ -583,13 +592,17 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
583 } 592 }
584 593
585 if (fastopen && fastopen->cookie.len >= 0) { 594 if (fastopen && fastopen->cookie.len >= 0) {
586 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len; 595 u32 need = fastopen->cookie.len;
596
597 need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
598 TCPOLEN_FASTOPEN_BASE;
587 need = (need + 3) & ~3U; /* Align to 32 bits */ 599 need = (need + 3) & ~3U; /* Align to 32 bits */
588 if (remaining >= need) { 600 if (remaining >= need) {
589 opts->options |= OPTION_FAST_OPEN_COOKIE; 601 opts->options |= OPTION_FAST_OPEN_COOKIE;
590 opts->fastopen_cookie = &fastopen->cookie; 602 opts->fastopen_cookie = &fastopen->cookie;
591 remaining -= need; 603 remaining -= need;
592 tp->syn_fastopen = 1; 604 tp->syn_fastopen = 1;
605 tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
593 } 606 }
594 } 607 }
595 608
@@ -601,15 +614,14 @@ static unsigned int tcp_synack_options(struct sock *sk,
601 struct request_sock *req, 614 struct request_sock *req,
602 unsigned int mss, struct sk_buff *skb, 615 unsigned int mss, struct sk_buff *skb,
603 struct tcp_out_options *opts, 616 struct tcp_out_options *opts,
604 struct tcp_md5sig_key **md5, 617 const struct tcp_md5sig_key *md5,
605 struct tcp_fastopen_cookie *foc) 618 struct tcp_fastopen_cookie *foc)
606{ 619{
607 struct inet_request_sock *ireq = inet_rsk(req); 620 struct inet_request_sock *ireq = inet_rsk(req);
608 unsigned int remaining = MAX_TCP_OPTION_SPACE; 621 unsigned int remaining = MAX_TCP_OPTION_SPACE;
609 622
610#ifdef CONFIG_TCP_MD5SIG 623#ifdef CONFIG_TCP_MD5SIG
611 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); 624 if (md5) {
612 if (*md5) {
613 opts->options |= OPTION_MD5; 625 opts->options |= OPTION_MD5;
614 remaining -= TCPOLEN_MD5SIG_ALIGNED; 626 remaining -= TCPOLEN_MD5SIG_ALIGNED;
615 627
@@ -620,8 +632,6 @@ static unsigned int tcp_synack_options(struct sock *sk,
620 */ 632 */
621 ireq->tstamp_ok &= !ireq->sack_ok; 633 ireq->tstamp_ok &= !ireq->sack_ok;
622 } 634 }
623#else
624 *md5 = NULL;
625#endif 635#endif
626 636
627 /* We always send an MSS option. */ 637 /* We always send an MSS option. */
@@ -645,7 +655,10 @@ static unsigned int tcp_synack_options(struct sock *sk,
645 remaining -= TCPOLEN_SACKPERM_ALIGNED; 655 remaining -= TCPOLEN_SACKPERM_ALIGNED;
646 } 656 }
647 if (foc != NULL && foc->len >= 0) { 657 if (foc != NULL && foc->len >= 0) {
648 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; 658 u32 need = foc->len;
659
660 need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
661 TCPOLEN_FASTOPEN_BASE;
649 need = (need + 3) & ~3U; /* Align to 32 bits */ 662 need = (need + 3) & ~3U; /* Align to 32 bits */
650 if (remaining >= need) { 663 if (remaining >= need) {
651 opts->options |= OPTION_FAST_OPEN_COOKIE; 664 opts->options |= OPTION_FAST_OPEN_COOKIE;
@@ -989,7 +1002,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
989 if (md5) { 1002 if (md5) {
990 sk_nocaps_add(sk, NETIF_F_GSO_MASK); 1003 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
991 tp->af_specific->calc_md5_hash(opts.hash_location, 1004 tp->af_specific->calc_md5_hash(opts.hash_location,
992 md5, sk, NULL, skb); 1005 md5, sk, skb);
993 } 1006 }
994#endif 1007#endif
995 1008
@@ -1151,7 +1164,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1151 1164
1152 /* Get a new skb... force flag on. */ 1165 /* Get a new skb... force flag on. */
1153 buff = sk_stream_alloc_skb(sk, nsize, gfp); 1166 buff = sk_stream_alloc_skb(sk, nsize, gfp);
1154 if (buff == NULL) 1167 if (!buff)
1155 return -ENOMEM; /* We'll just try again later. */ 1168 return -ENOMEM; /* We'll just try again later. */
1156 1169
1157 sk->sk_wmem_queued += buff->truesize; 1170 sk->sk_wmem_queued += buff->truesize;
@@ -1354,6 +1367,8 @@ void tcp_mtup_init(struct sock *sk)
1354 icsk->icsk_af_ops->net_header_len; 1367 icsk->icsk_af_ops->net_header_len;
1355 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); 1368 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
1356 icsk->icsk_mtup.probe_size = 0; 1369 icsk->icsk_mtup.probe_size = 0;
1370 if (icsk->icsk_mtup.enabled)
1371 icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
1357} 1372}
1358EXPORT_SYMBOL(tcp_mtup_init); 1373EXPORT_SYMBOL(tcp_mtup_init);
1359 1374
@@ -1708,7 +1723,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1708 return tcp_fragment(sk, skb, len, mss_now, gfp); 1723 return tcp_fragment(sk, skb, len, mss_now, gfp);
1709 1724
1710 buff = sk_stream_alloc_skb(sk, 0, gfp); 1725 buff = sk_stream_alloc_skb(sk, 0, gfp);
1711 if (unlikely(buff == NULL)) 1726 if (unlikely(!buff))
1712 return -ENOMEM; 1727 return -ENOMEM;
1713 1728
1714 sk->sk_wmem_queued += buff->truesize; 1729 sk->sk_wmem_queued += buff->truesize;
@@ -1752,20 +1767,23 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1752static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, 1767static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1753 bool *is_cwnd_limited, u32 max_segs) 1768 bool *is_cwnd_limited, u32 max_segs)
1754{ 1769{
1755 struct tcp_sock *tp = tcp_sk(sk);
1756 const struct inet_connection_sock *icsk = inet_csk(sk); 1770 const struct inet_connection_sock *icsk = inet_csk(sk);
1757 u32 send_win, cong_win, limit, in_flight; 1771 u32 age, send_win, cong_win, limit, in_flight;
1772 struct tcp_sock *tp = tcp_sk(sk);
1773 struct skb_mstamp now;
1774 struct sk_buff *head;
1758 int win_divisor; 1775 int win_divisor;
1759 1776
1760 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 1777 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1761 goto send_now; 1778 goto send_now;
1762 1779
1763 if (icsk->icsk_ca_state != TCP_CA_Open) 1780 if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_CWR)))
1764 goto send_now; 1781 goto send_now;
1765 1782
1766 /* Defer for less than two clock ticks. */ 1783 /* Avoid bursty behavior by allowing defer
1767 if (tp->tso_deferred && 1784 * only if the last write was recent.
1768 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) 1785 */
1786 if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
1769 goto send_now; 1787 goto send_now;
1770 1788
1771 in_flight = tcp_packets_in_flight(tp); 1789 in_flight = tcp_packets_in_flight(tp);
@@ -1807,11 +1825,14 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1807 goto send_now; 1825 goto send_now;
1808 } 1826 }
1809 1827
1810 /* Ok, it looks like it is advisable to defer. 1828 head = tcp_write_queue_head(sk);
1811 * Do not rearm the timer if already set to not break TCP ACK clocking. 1829 skb_mstamp_get(&now);
1812 */ 1830 age = skb_mstamp_us_delta(&now, &head->skb_mstamp);
1813 if (!tp->tso_deferred) 1831 /* If next ACK is likely to come too late (half srtt), do not defer */
1814 tp->tso_deferred = 1 | (jiffies << 1); 1832 if (age < (tp->srtt_us >> 4))
1833 goto send_now;
1834
1835 /* Ok, it looks like it is advisable to defer. */
1815 1836
1816 if (cong_win < send_win && cong_win < skb->len) 1837 if (cong_win < send_win && cong_win < skb->len)
1817 *is_cwnd_limited = true; 1838 *is_cwnd_limited = true;
@@ -1819,10 +1840,34 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1819 return true; 1840 return true;
1820 1841
1821send_now: 1842send_now:
1822 tp->tso_deferred = 0;
1823 return false; 1843 return false;
1824} 1844}
1825 1845
1846static inline void tcp_mtu_check_reprobe(struct sock *sk)
1847{
1848 struct inet_connection_sock *icsk = inet_csk(sk);
1849 struct tcp_sock *tp = tcp_sk(sk);
1850 struct net *net = sock_net(sk);
1851 u32 interval;
1852 s32 delta;
1853
1854 interval = net->ipv4.sysctl_tcp_probe_interval;
1855 delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
1856 if (unlikely(delta >= interval * HZ)) {
1857 int mss = tcp_current_mss(sk);
1858
1859 /* Update current search range */
1860 icsk->icsk_mtup.probe_size = 0;
1861 icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
1862 sizeof(struct tcphdr) +
1863 icsk->icsk_af_ops->net_header_len;
1864 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
1865
1866 /* Update probe time stamp */
1867 icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
1868 }
1869}
1870
1826/* Create a new MTU probe if we are ready. 1871/* Create a new MTU probe if we are ready.
1827 * MTU probe is regularly attempting to increase the path MTU by 1872 * MTU probe is regularly attempting to increase the path MTU by
1828 * deliberately sending larger packets. This discovers routing 1873 * deliberately sending larger packets. This discovers routing
@@ -1837,11 +1882,13 @@ static int tcp_mtu_probe(struct sock *sk)
1837 struct tcp_sock *tp = tcp_sk(sk); 1882 struct tcp_sock *tp = tcp_sk(sk);
1838 struct inet_connection_sock *icsk = inet_csk(sk); 1883 struct inet_connection_sock *icsk = inet_csk(sk);
1839 struct sk_buff *skb, *nskb, *next; 1884 struct sk_buff *skb, *nskb, *next;
1885 struct net *net = sock_net(sk);
1840 int len; 1886 int len;
1841 int probe_size; 1887 int probe_size;
1842 int size_needed; 1888 int size_needed;
1843 int copy; 1889 int copy;
1844 int mss_now; 1890 int mss_now;
1891 int interval;
1845 1892
1846 /* Not currently probing/verifying, 1893 /* Not currently probing/verifying,
1847 * not in recovery, 1894 * not in recovery,
@@ -1854,12 +1901,25 @@ static int tcp_mtu_probe(struct sock *sk)
1854 tp->rx_opt.num_sacks || tp->rx_opt.dsack) 1901 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1855 return -1; 1902 return -1;
1856 1903
1857 /* Very simple search strategy: just double the MSS. */ 1904 /* Use binary search for probe_size between tcp_mss_base,
1905 * and current mss_clamp. if (search_high - search_low)
1906 * smaller than a threshold, backoff from probing.
1907 */
1858 mss_now = tcp_current_mss(sk); 1908 mss_now = tcp_current_mss(sk);
1859 probe_size = 2 * tp->mss_cache; 1909 probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
1910 icsk->icsk_mtup.search_low) >> 1);
1860 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; 1911 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1861 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { 1912 interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
1862 /* TODO: set timer for probe_converge_event */ 1913 /* When misfortune happens, we are reprobing actively,
1914 * and then reprobe timer has expired. We stick with current
1915 * probing process by not resetting search range to its orignal.
1916 */
1917 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
1918 interval < net->ipv4.sysctl_tcp_probe_threshold) {
1919 /* Check whether enough time has elaplased for
1920 * another round of probing.
1921 */
1922 tcp_mtu_check_reprobe(sk);
1863 return -1; 1923 return -1;
1864 } 1924 }
1865 1925
@@ -1881,7 +1941,8 @@ static int tcp_mtu_probe(struct sock *sk)
1881 } 1941 }
1882 1942
1883 /* We're allowed to probe. Build it now. */ 1943 /* We're allowed to probe. Build it now. */
1884 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL) 1944 nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC);
1945 if (!nskb)
1885 return -1; 1946 return -1;
1886 sk->sk_wmem_queued += nskb->truesize; 1947 sk->sk_wmem_queued += nskb->truesize;
1887 sk_mem_charge(sk, nskb->truesize); 1948 sk_mem_charge(sk, nskb->truesize);
@@ -2179,7 +2240,7 @@ void tcp_send_loss_probe(struct sock *sk)
2179 int mss = tcp_current_mss(sk); 2240 int mss = tcp_current_mss(sk);
2180 int err = -1; 2241 int err = -1;
2181 2242
2182 if (tcp_send_head(sk) != NULL) { 2243 if (tcp_send_head(sk)) {
2183 err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); 2244 err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2184 goto rearm_timer; 2245 goto rearm_timer;
2185 } 2246 }
@@ -2689,7 +2750,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2689 if (skb == tcp_send_head(sk)) 2750 if (skb == tcp_send_head(sk))
2690 break; 2751 break;
2691 /* we could do better than to assign each time */ 2752 /* we could do better than to assign each time */
2692 if (hole == NULL) 2753 if (!hole)
2693 tp->retransmit_skb_hint = skb; 2754 tp->retransmit_skb_hint = skb;
2694 2755
2695 /* Assume this retransmit will generate 2756 /* Assume this retransmit will generate
@@ -2713,7 +2774,7 @@ begin_fwd:
2713 if (!tcp_can_forward_retransmit(sk)) 2774 if (!tcp_can_forward_retransmit(sk))
2714 break; 2775 break;
2715 /* Backtrack if necessary to non-L'ed skb */ 2776 /* Backtrack if necessary to non-L'ed skb */
2716 if (hole != NULL) { 2777 if (hole) {
2717 skb = hole; 2778 skb = hole;
2718 hole = NULL; 2779 hole = NULL;
2719 } 2780 }
@@ -2721,7 +2782,7 @@ begin_fwd:
2721 goto begin_fwd; 2782 goto begin_fwd;
2722 2783
2723 } else if (!(sacked & TCPCB_LOST)) { 2784 } else if (!(sacked & TCPCB_LOST)) {
2724 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) 2785 if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2725 hole = skb; 2786 hole = skb;
2726 continue; 2787 continue;
2727 2788
@@ -2751,43 +2812,65 @@ begin_fwd:
2751 } 2812 }
2752} 2813}
2753 2814
2754/* Send a fin. The caller locks the socket for us. This cannot be 2815/* We allow to exceed memory limits for FIN packets to expedite
2755 * allowed to fail queueing a FIN frame under any circumstances. 2816 * connection tear down and (memory) recovery.
2817 * Otherwise tcp_send_fin() could be tempted to either delay FIN
2818 * or even be forced to close flow without any FIN.
2819 */
2820static void sk_forced_wmem_schedule(struct sock *sk, int size)
2821{
2822 int amt, status;
2823
2824 if (size <= sk->sk_forward_alloc)
2825 return;
2826 amt = sk_mem_pages(size);
2827 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2828 sk_memory_allocated_add(sk, amt, &status);
2829}
2830
2831/* Send a FIN. The caller locks the socket for us.
2832 * We should try to send a FIN packet really hard, but eventually give up.
2756 */ 2833 */
2757void tcp_send_fin(struct sock *sk) 2834void tcp_send_fin(struct sock *sk)
2758{ 2835{
2836 struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
2759 struct tcp_sock *tp = tcp_sk(sk); 2837 struct tcp_sock *tp = tcp_sk(sk);
2760 struct sk_buff *skb = tcp_write_queue_tail(sk);
2761 int mss_now;
2762 2838
2763 /* Optimization, tack on the FIN if we have a queue of 2839 /* Optimization, tack on the FIN if we have one skb in write queue and
2764 * unsent frames. But be careful about outgoing SACKS 2840 * this skb was not yet sent, or we are under memory pressure.
2765 * and IP options. 2841 * Note: in the latter case, FIN packet will be sent after a timeout,
2842 * as TCP stack thinks it has already been transmitted.
2766 */ 2843 */
2767 mss_now = tcp_current_mss(sk); 2844 if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) {
2768 2845coalesce:
2769 if (tcp_send_head(sk) != NULL) { 2846 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
2770 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; 2847 TCP_SKB_CB(tskb)->end_seq++;
2771 TCP_SKB_CB(skb)->end_seq++;
2772 tp->write_seq++; 2848 tp->write_seq++;
2849 if (!tcp_send_head(sk)) {
2850 /* This means tskb was already sent.
2851 * Pretend we included the FIN on previous transmit.
2852 * We need to set tp->snd_nxt to the value it would have
2853 * if FIN had been sent. This is because retransmit path
2854 * does not change tp->snd_nxt.
2855 */
2856 tp->snd_nxt++;
2857 return;
2858 }
2773 } else { 2859 } else {
2774 /* Socket is locked, keep trying until memory is available. */ 2860 skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
2775 for (;;) { 2861 if (unlikely(!skb)) {
2776 skb = alloc_skb_fclone(MAX_TCP_HEADER, 2862 if (tskb)
2777 sk->sk_allocation); 2863 goto coalesce;
2778 if (skb) 2864 return;
2779 break;
2780 yield();
2781 } 2865 }
2782
2783 /* Reserve space for headers and prepare control bits. */
2784 skb_reserve(skb, MAX_TCP_HEADER); 2866 skb_reserve(skb, MAX_TCP_HEADER);
2867 sk_forced_wmem_schedule(sk, skb->truesize);
2785 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2868 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2786 tcp_init_nondata_skb(skb, tp->write_seq, 2869 tcp_init_nondata_skb(skb, tp->write_seq,
2787 TCPHDR_ACK | TCPHDR_FIN); 2870 TCPHDR_ACK | TCPHDR_FIN);
2788 tcp_queue_skb(sk, skb); 2871 tcp_queue_skb(sk, skb);
2789 } 2872 }
2790 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); 2873 __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
2791} 2874}
2792 2875
2793/* We get here when a process closes a file descriptor (either due to 2876/* We get here when a process closes a file descriptor (either due to
@@ -2828,14 +2911,14 @@ int tcp_send_synack(struct sock *sk)
2828 struct sk_buff *skb; 2911 struct sk_buff *skb;
2829 2912
2830 skb = tcp_write_queue_head(sk); 2913 skb = tcp_write_queue_head(sk);
2831 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { 2914 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2832 pr_debug("%s: wrong queue state\n", __func__); 2915 pr_debug("%s: wrong queue state\n", __func__);
2833 return -EFAULT; 2916 return -EFAULT;
2834 } 2917 }
2835 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { 2918 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2836 if (skb_cloned(skb)) { 2919 if (skb_cloned(skb)) {
2837 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2920 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2838 if (nskb == NULL) 2921 if (!nskb)
2839 return -ENOMEM; 2922 return -ENOMEM;
2840 tcp_unlink_write_queue(skb, sk); 2923 tcp_unlink_write_queue(skb, sk);
2841 __skb_header_release(nskb); 2924 __skb_header_release(nskb);
@@ -2870,7 +2953,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2870 struct tcp_sock *tp = tcp_sk(sk); 2953 struct tcp_sock *tp = tcp_sk(sk);
2871 struct tcphdr *th; 2954 struct tcphdr *th;
2872 struct sk_buff *skb; 2955 struct sk_buff *skb;
2873 struct tcp_md5sig_key *md5; 2956 struct tcp_md5sig_key *md5 = NULL;
2874 int tcp_header_size; 2957 int tcp_header_size;
2875 int mss; 2958 int mss;
2876 2959
@@ -2883,7 +2966,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2883 skb_reserve(skb, MAX_TCP_HEADER); 2966 skb_reserve(skb, MAX_TCP_HEADER);
2884 2967
2885 skb_dst_set(skb, dst); 2968 skb_dst_set(skb, dst);
2886 security_skb_owned_by(skb, sk);
2887 2969
2888 mss = dst_metric_advmss(dst); 2970 mss = dst_metric_advmss(dst);
2889 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2971 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
@@ -2896,7 +2978,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2896 else 2978 else
2897#endif 2979#endif
2898 skb_mstamp_get(&skb->skb_mstamp); 2980 skb_mstamp_get(&skb->skb_mstamp);
2899 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, 2981
2982#ifdef CONFIG_TCP_MD5SIG
2983 rcu_read_lock();
2984 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
2985#endif
2986 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
2900 foc) + sizeof(*th); 2987 foc) + sizeof(*th);
2901 2988
2902 skb_push(skb, tcp_header_size); 2989 skb_push(skb, tcp_header_size);
@@ -2927,12 +3014,14 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2927 3014
2928#ifdef CONFIG_TCP_MD5SIG 3015#ifdef CONFIG_TCP_MD5SIG
2929 /* Okay, we have all we need - do the md5 hash if needed */ 3016 /* Okay, we have all we need - do the md5 hash if needed */
2930 if (md5) { 3017 if (md5)
2931 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, 3018 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
2932 md5, NULL, req, skb); 3019 md5, req_to_sk(req), skb);
2933 } 3020 rcu_read_unlock();
2934#endif 3021#endif
2935 3022
3023 /* Do not fool tcpdump (if any), clean our debris */
3024 skb->tstamp.tv64 = 0;
2936 return skb; 3025 return skb;
2937} 3026}
2938EXPORT_SYMBOL(tcp_make_synack); 3027EXPORT_SYMBOL(tcp_make_synack);
@@ -2970,7 +3059,7 @@ static void tcp_connect_init(struct sock *sk)
2970 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); 3059 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
2971 3060
2972#ifdef CONFIG_TCP_MD5SIG 3061#ifdef CONFIG_TCP_MD5SIG
2973 if (tp->af_specific->md5_lookup(sk, sk) != NULL) 3062 if (tp->af_specific->md5_lookup(sk, sk))
2974 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; 3063 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
2975#endif 3064#endif
2976 3065
@@ -3256,7 +3345,7 @@ void tcp_send_ack(struct sock *sk)
3256 * sock. 3345 * sock.
3257 */ 3346 */
3258 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); 3347 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3259 if (buff == NULL) { 3348 if (!buff) {
3260 inet_csk_schedule_ack(sk); 3349 inet_csk_schedule_ack(sk);
3261 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; 3350 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3262 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 3351 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
@@ -3300,7 +3389,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3300 3389
3301 /* We don't queue it, tcp_transmit_skb() sets ownership. */ 3390 /* We don't queue it, tcp_transmit_skb() sets ownership. */
3302 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); 3391 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3303 if (skb == NULL) 3392 if (!skb)
3304 return -1; 3393 return -1;
3305 3394
3306 /* Reserve space for headers and set control bits. */ 3395 /* Reserve space for headers and set control bits. */
@@ -3331,8 +3420,8 @@ int tcp_write_wakeup(struct sock *sk)
3331 if (sk->sk_state == TCP_CLOSE) 3420 if (sk->sk_state == TCP_CLOSE)
3332 return -1; 3421 return -1;
3333 3422
3334 if ((skb = tcp_send_head(sk)) != NULL && 3423 skb = tcp_send_head(sk);
3335 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { 3424 if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
3336 int err; 3425 int err;
3337 unsigned int mss = tcp_current_mss(sk); 3426 unsigned int mss = tcp_current_mss(sk);
3338 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; 3427 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;