diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 339 |
1 files changed, 179 insertions, 160 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 896e9dfbdb5c..c3c082ed3879 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -76,16 +76,15 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) | |||
76 | tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; | 76 | tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; |
77 | 77 | ||
78 | tp->packets_out += tcp_skb_pcount(skb); | 78 | tp->packets_out += tcp_skb_pcount(skb); |
79 | if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || | 79 | if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) |
80 | icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { | ||
81 | tcp_rearm_rto(sk); | 80 | tcp_rearm_rto(sk); |
82 | } | ||
83 | 81 | ||
84 | NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, | 82 | NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, |
85 | tcp_skb_pcount(skb)); | 83 | tcp_skb_pcount(skb)); |
86 | } | 84 | } |
87 | 85 | ||
88 | /* SND.NXT, if window was not shrunk. | 86 | /* SND.NXT, if window was not shrunk or the amount of shrunk was less than one |
87 | * window scaling factor due to loss of precision. | ||
89 | * If window has been shrunk, what should we make? It is not clear at all. | 88 | * If window has been shrunk, what should we make? It is not clear at all. |
90 | * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( | 89 | * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( |
91 | * Anything in between SND.UNA...SND.UNA+SND.WND also can be already | 90 | * Anything in between SND.UNA...SND.UNA+SND.WND also can be already |
@@ -95,7 +94,9 @@ static inline __u32 tcp_acceptable_seq(const struct sock *sk) | |||
95 | { | 94 | { |
96 | const struct tcp_sock *tp = tcp_sk(sk); | 95 | const struct tcp_sock *tp = tcp_sk(sk); |
97 | 96 | ||
98 | if (!before(tcp_wnd_end(tp), tp->snd_nxt)) | 97 | if (!before(tcp_wnd_end(tp), tp->snd_nxt) || |
98 | (tp->rx_opt.wscale_ok && | ||
99 | ((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale)))) | ||
99 | return tp->snd_nxt; | 100 | return tp->snd_nxt; |
100 | else | 101 | else |
101 | return tcp_wnd_end(tp); | 102 | return tcp_wnd_end(tp); |
@@ -640,7 +641,7 @@ static unsigned int tcp_synack_options(struct request_sock *req, | |||
640 | } | 641 | } |
641 | if (likely(ireq->tstamp_ok)) { | 642 | if (likely(ireq->tstamp_ok)) { |
642 | opts->options |= OPTION_TS; | 643 | opts->options |= OPTION_TS; |
643 | opts->tsval = tcp_skb_timestamp(skb); | 644 | opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; |
644 | opts->tsecr = req->ts_recent; | 645 | opts->tsecr = req->ts_recent; |
645 | remaining -= TCPOLEN_TSTAMP_ALIGNED; | 646 | remaining -= TCPOLEN_TSTAMP_ALIGNED; |
646 | } | 647 | } |
@@ -769,25 +770,27 @@ static void tcp_tasklet_func(unsigned long data) | |||
769 | list_del(&tp->tsq_node); | 770 | list_del(&tp->tsq_node); |
770 | 771 | ||
771 | sk = (struct sock *)tp; | 772 | sk = (struct sock *)tp; |
772 | bh_lock_sock(sk); | 773 | smp_mb__before_atomic(); |
773 | 774 | clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); | |
774 | if (!sock_owned_by_user(sk)) { | 775 | |
775 | tcp_tsq_handler(sk); | 776 | if (!sk->sk_lock.owned && |
776 | } else { | 777 | test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) { |
777 | /* defer the work to tcp_release_cb() */ | 778 | bh_lock_sock(sk); |
778 | set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); | 779 | if (!sock_owned_by_user(sk)) { |
780 | clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); | ||
781 | tcp_tsq_handler(sk); | ||
782 | } | ||
783 | bh_unlock_sock(sk); | ||
779 | } | 784 | } |
780 | bh_unlock_sock(sk); | ||
781 | 785 | ||
782 | clear_bit(TSQ_QUEUED, &tp->tsq_flags); | ||
783 | sk_free(sk); | 786 | sk_free(sk); |
784 | } | 787 | } |
785 | } | 788 | } |
786 | 789 | ||
787 | #define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ | 790 | #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ |
788 | (1UL << TCP_WRITE_TIMER_DEFERRED) | \ | 791 | TCPF_WRITE_TIMER_DEFERRED | \ |
789 | (1UL << TCP_DELACK_TIMER_DEFERRED) | \ | 792 | TCPF_DELACK_TIMER_DEFERRED | \ |
790 | (1UL << TCP_MTU_REDUCED_DEFERRED)) | 793 | TCPF_MTU_REDUCED_DEFERRED) |
791 | /** | 794 | /** |
792 | * tcp_release_cb - tcp release_sock() callback | 795 | * tcp_release_cb - tcp release_sock() callback |
793 | * @sk: socket | 796 | * @sk: socket |
@@ -797,18 +800,17 @@ static void tcp_tasklet_func(unsigned long data) | |||
797 | */ | 800 | */ |
798 | void tcp_release_cb(struct sock *sk) | 801 | void tcp_release_cb(struct sock *sk) |
799 | { | 802 | { |
800 | struct tcp_sock *tp = tcp_sk(sk); | ||
801 | unsigned long flags, nflags; | 803 | unsigned long flags, nflags; |
802 | 804 | ||
803 | /* perform an atomic operation only if at least one flag is set */ | 805 | /* perform an atomic operation only if at least one flag is set */ |
804 | do { | 806 | do { |
805 | flags = tp->tsq_flags; | 807 | flags = sk->sk_tsq_flags; |
806 | if (!(flags & TCP_DEFERRED_ALL)) | 808 | if (!(flags & TCP_DEFERRED_ALL)) |
807 | return; | 809 | return; |
808 | nflags = flags & ~TCP_DEFERRED_ALL; | 810 | nflags = flags & ~TCP_DEFERRED_ALL; |
809 | } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); | 811 | } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); |
810 | 812 | ||
811 | if (flags & (1UL << TCP_TSQ_DEFERRED)) | 813 | if (flags & TCPF_TSQ_DEFERRED) |
812 | tcp_tsq_handler(sk); | 814 | tcp_tsq_handler(sk); |
813 | 815 | ||
814 | /* Here begins the tricky part : | 816 | /* Here begins the tricky part : |
@@ -822,15 +824,15 @@ void tcp_release_cb(struct sock *sk) | |||
822 | */ | 824 | */ |
823 | sock_release_ownership(sk); | 825 | sock_release_ownership(sk); |
824 | 826 | ||
825 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { | 827 | if (flags & TCPF_WRITE_TIMER_DEFERRED) { |
826 | tcp_write_timer_handler(sk); | 828 | tcp_write_timer_handler(sk); |
827 | __sock_put(sk); | 829 | __sock_put(sk); |
828 | } | 830 | } |
829 | if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { | 831 | if (flags & TCPF_DELACK_TIMER_DEFERRED) { |
830 | tcp_delack_timer_handler(sk); | 832 | tcp_delack_timer_handler(sk); |
831 | __sock_put(sk); | 833 | __sock_put(sk); |
832 | } | 834 | } |
833 | if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { | 835 | if (flags & TCPF_MTU_REDUCED_DEFERRED) { |
834 | inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); | 836 | inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); |
835 | __sock_put(sk); | 837 | __sock_put(sk); |
836 | } | 838 | } |
@@ -860,6 +862,7 @@ void tcp_wfree(struct sk_buff *skb) | |||
860 | { | 862 | { |
861 | struct sock *sk = skb->sk; | 863 | struct sock *sk = skb->sk; |
862 | struct tcp_sock *tp = tcp_sk(sk); | 864 | struct tcp_sock *tp = tcp_sk(sk); |
865 | unsigned long flags, nval, oval; | ||
863 | int wmem; | 866 | int wmem; |
864 | 867 | ||
865 | /* Keep one reference on sk_wmem_alloc. | 868 | /* Keep one reference on sk_wmem_alloc. |
@@ -877,16 +880,25 @@ void tcp_wfree(struct sk_buff *skb) | |||
877 | if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) | 880 | if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) |
878 | goto out; | 881 | goto out; |
879 | 882 | ||
880 | if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && | 883 | for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { |
881 | !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { | ||
882 | unsigned long flags; | ||
883 | struct tsq_tasklet *tsq; | 884 | struct tsq_tasklet *tsq; |
885 | bool empty; | ||
886 | |||
887 | if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED)) | ||
888 | goto out; | ||
889 | |||
890 | nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; | ||
891 | nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); | ||
892 | if (nval != oval) | ||
893 | continue; | ||
884 | 894 | ||
885 | /* queue this socket to tasklet queue */ | 895 | /* queue this socket to tasklet queue */ |
886 | local_irq_save(flags); | 896 | local_irq_save(flags); |
887 | tsq = this_cpu_ptr(&tsq_tasklet); | 897 | tsq = this_cpu_ptr(&tsq_tasklet); |
898 | empty = list_empty(&tsq->head); | ||
888 | list_add(&tp->tsq_node, &tsq->head); | 899 | list_add(&tp->tsq_node, &tsq->head); |
889 | tasklet_schedule(&tsq->tasklet); | 900 | if (empty) |
901 | tasklet_schedule(&tsq->tasklet); | ||
890 | local_irq_restore(flags); | 902 | local_irq_restore(flags); |
891 | return; | 903 | return; |
892 | } | 904 | } |
@@ -955,6 +967,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
955 | */ | 967 | */ |
956 | skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); | 968 | skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); |
957 | 969 | ||
970 | /* If we had to use memory reserve to allocate this skb, | ||
971 | * this might cause drops if packet is looped back : | ||
972 | * Other socket might not have SOCK_MEMALLOC. | ||
973 | * Packets not looped back do not care about pfmemalloc. | ||
974 | */ | ||
975 | skb->pfmemalloc = 0; | ||
976 | |||
958 | skb_push(skb, tcp_header_size); | 977 | skb_push(skb, tcp_header_size); |
959 | skb_reset_transport_header(skb); | 978 | skb_reset_transport_header(skb); |
960 | 979 | ||
@@ -964,6 +983,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
964 | skb_set_hash_from_sk(skb, sk); | 983 | skb_set_hash_from_sk(skb, sk); |
965 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 984 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
966 | 985 | ||
986 | skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); | ||
987 | |||
967 | /* Build TCP header and checksum it. */ | 988 | /* Build TCP header and checksum it. */ |
968 | th = (struct tcphdr *)skb->data; | 989 | th = (struct tcphdr *)skb->data; |
969 | th->source = inet->inet_sport; | 990 | th->source = inet->inet_sport; |
@@ -1027,7 +1048,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
1027 | skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); | 1048 | skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); |
1028 | 1049 | ||
1029 | /* Our usage of tstamp should remain private */ | 1050 | /* Our usage of tstamp should remain private */ |
1030 | skb->tstamp.tv64 = 0; | 1051 | skb->tstamp = 0; |
1031 | 1052 | ||
1032 | /* Cleanup our debris for IP stacks */ | 1053 | /* Cleanup our debris for IP stacks */ |
1033 | memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), | 1054 | memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), |
@@ -1514,6 +1535,18 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) | |||
1514 | if (sysctl_tcp_slow_start_after_idle && | 1535 | if (sysctl_tcp_slow_start_after_idle && |
1515 | (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) | 1536 | (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) |
1516 | tcp_cwnd_application_limited(sk); | 1537 | tcp_cwnd_application_limited(sk); |
1538 | |||
1539 | /* The following conditions together indicate the starvation | ||
1540 | * is caused by insufficient sender buffer: | ||
1541 | * 1) just sent some data (see tcp_write_xmit) | ||
1542 | * 2) not cwnd limited (this else condition) | ||
1543 | * 3) no more data to send (null tcp_send_head ) | ||
1544 | * 4) application is hitting buffer limit (SOCK_NOSPACE) | ||
1545 | */ | ||
1546 | if (!tcp_send_head(sk) && sk->sk_socket && | ||
1547 | test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && | ||
1548 | (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) | ||
1549 | tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); | ||
1517 | } | 1550 | } |
1518 | } | 1551 | } |
1519 | 1552 | ||
@@ -1910,26 +1943,26 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) | |||
1910 | */ | 1943 | */ |
1911 | static int tcp_mtu_probe(struct sock *sk) | 1944 | static int tcp_mtu_probe(struct sock *sk) |
1912 | { | 1945 | { |
1913 | struct tcp_sock *tp = tcp_sk(sk); | ||
1914 | struct inet_connection_sock *icsk = inet_csk(sk); | 1946 | struct inet_connection_sock *icsk = inet_csk(sk); |
1947 | struct tcp_sock *tp = tcp_sk(sk); | ||
1915 | struct sk_buff *skb, *nskb, *next; | 1948 | struct sk_buff *skb, *nskb, *next; |
1916 | struct net *net = sock_net(sk); | 1949 | struct net *net = sock_net(sk); |
1917 | int len; | ||
1918 | int probe_size; | 1950 | int probe_size; |
1919 | int size_needed; | 1951 | int size_needed; |
1920 | int copy; | 1952 | int copy, len; |
1921 | int mss_now; | 1953 | int mss_now; |
1922 | int interval; | 1954 | int interval; |
1923 | 1955 | ||
1924 | /* Not currently probing/verifying, | 1956 | /* Not currently probing/verifying, |
1925 | * not in recovery, | 1957 | * not in recovery, |
1926 | * have enough cwnd, and | 1958 | * have enough cwnd, and |
1927 | * not SACKing (the variable headers throw things off) */ | 1959 | * not SACKing (the variable headers throw things off) |
1928 | if (!icsk->icsk_mtup.enabled || | 1960 | */ |
1929 | icsk->icsk_mtup.probe_size || | 1961 | if (likely(!icsk->icsk_mtup.enabled || |
1930 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open || | 1962 | icsk->icsk_mtup.probe_size || |
1931 | tp->snd_cwnd < 11 || | 1963 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open || |
1932 | tp->rx_opt.num_sacks || tp->rx_opt.dsack) | 1964 | tp->snd_cwnd < 11 || |
1965 | tp->rx_opt.num_sacks || tp->rx_opt.dsack)) | ||
1933 | return -1; | 1966 | return -1; |
1934 | 1967 | ||
1935 | /* Use binary search for probe_size between tcp_mss_base, | 1968 | /* Use binary search for probe_size between tcp_mss_base, |
@@ -2069,7 +2102,16 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, | |||
2069 | limit <<= factor; | 2102 | limit <<= factor; |
2070 | 2103 | ||
2071 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { | 2104 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { |
2072 | set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags); | 2105 | /* Always send the 1st or 2nd skb in write queue. |
2106 | * No need to wait for TX completion to call us back, | ||
2107 | * after softirq/tasklet schedule. | ||
2108 | * This helps when TX completions are delayed too much. | ||
2109 | */ | ||
2110 | if (skb == sk->sk_write_queue.next || | ||
2111 | skb->prev == sk->sk_write_queue.next) | ||
2112 | return false; | ||
2113 | |||
2114 | set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); | ||
2073 | /* It is possible TX completion already happened | 2115 | /* It is possible TX completion already happened |
2074 | * before we set TSQ_THROTTLED, so we must | 2116 | * before we set TSQ_THROTTLED, so we must |
2075 | * test again the condition. | 2117 | * test again the condition. |
@@ -2081,6 +2123,47 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, | |||
2081 | return false; | 2123 | return false; |
2082 | } | 2124 | } |
2083 | 2125 | ||
2126 | static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) | ||
2127 | { | ||
2128 | const u32 now = tcp_time_stamp; | ||
2129 | |||
2130 | if (tp->chrono_type > TCP_CHRONO_UNSPEC) | ||
2131 | tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start; | ||
2132 | tp->chrono_start = now; | ||
2133 | tp->chrono_type = new; | ||
2134 | } | ||
2135 | |||
2136 | void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) | ||
2137 | { | ||
2138 | struct tcp_sock *tp = tcp_sk(sk); | ||
2139 | |||
2140 | /* If there are multiple conditions worthy of tracking in a | ||
2141 | * chronograph then the highest priority enum takes precedence | ||
2142 | * over the other conditions. So that if something "more interesting" | ||
2143 | * starts happening, stop the previous chrono and start a new one. | ||
2144 | */ | ||
2145 | if (type > tp->chrono_type) | ||
2146 | tcp_chrono_set(tp, type); | ||
2147 | } | ||
2148 | |||
2149 | void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) | ||
2150 | { | ||
2151 | struct tcp_sock *tp = tcp_sk(sk); | ||
2152 | |||
2153 | |||
2154 | /* There are multiple conditions worthy of tracking in a | ||
2155 | * chronograph, so that the highest priority enum takes | ||
2156 | * precedence over the other conditions (see tcp_chrono_start). | ||
2157 | * If a condition stops, we only stop chrono tracking if | ||
2158 | * it's the "most interesting" or current chrono we are | ||
2159 | * tracking and starts busy chrono if we have pending data. | ||
2160 | */ | ||
2161 | if (tcp_write_queue_empty(sk)) | ||
2162 | tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); | ||
2163 | else if (type == tp->chrono_type) | ||
2164 | tcp_chrono_set(tp, TCP_CHRONO_BUSY); | ||
2165 | } | ||
2166 | |||
2084 | /* This routine writes packets to the network. It advances the | 2167 | /* This routine writes packets to the network. It advances the |
2085 | * send_head. This happens as incoming acks open up the remote | 2168 | * send_head. This happens as incoming acks open up the remote |
2086 | * window for us. | 2169 | * window for us. |
@@ -2103,7 +2186,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2103 | unsigned int tso_segs, sent_pkts; | 2186 | unsigned int tso_segs, sent_pkts; |
2104 | int cwnd_quota; | 2187 | int cwnd_quota; |
2105 | int result; | 2188 | int result; |
2106 | bool is_cwnd_limited = false; | 2189 | bool is_cwnd_limited = false, is_rwnd_limited = false; |
2107 | u32 max_segs; | 2190 | u32 max_segs; |
2108 | 2191 | ||
2109 | sent_pkts = 0; | 2192 | sent_pkts = 0; |
@@ -2140,8 +2223,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2140 | break; | 2223 | break; |
2141 | } | 2224 | } |
2142 | 2225 | ||
2143 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) | 2226 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) { |
2227 | is_rwnd_limited = true; | ||
2144 | break; | 2228 | break; |
2229 | } | ||
2145 | 2230 | ||
2146 | if (tso_segs == 1) { | 2231 | if (tso_segs == 1) { |
2147 | if (unlikely(!tcp_nagle_test(tp, skb, mss_now, | 2232 | if (unlikely(!tcp_nagle_test(tp, skb, mss_now, |
@@ -2167,6 +2252,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2167 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) | 2252 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) |
2168 | break; | 2253 | break; |
2169 | 2254 | ||
2255 | if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) | ||
2256 | clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); | ||
2170 | if (tcp_small_queue_check(sk, skb, 0)) | 2257 | if (tcp_small_queue_check(sk, skb, 0)) |
2171 | break; | 2258 | break; |
2172 | 2259 | ||
@@ -2186,6 +2273,11 @@ repair: | |||
2186 | break; | 2273 | break; |
2187 | } | 2274 | } |
2188 | 2275 | ||
2276 | if (is_rwnd_limited) | ||
2277 | tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED); | ||
2278 | else | ||
2279 | tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED); | ||
2280 | |||
2189 | if (likely(sent_pkts)) { | 2281 | if (likely(sent_pkts)) { |
2190 | if (tcp_in_cwnd_reduction(sk)) | 2282 | if (tcp_in_cwnd_reduction(sk)) |
2191 | tp->prr_out += sent_pkts; | 2283 | tp->prr_out += sent_pkts; |
@@ -2207,8 +2299,6 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
2207 | u32 timeout, tlp_time_stamp, rto_time_stamp; | 2299 | u32 timeout, tlp_time_stamp, rto_time_stamp; |
2208 | u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); | 2300 | u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); |
2209 | 2301 | ||
2210 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) | ||
2211 | return false; | ||
2212 | /* No consecutive loss probes. */ | 2302 | /* No consecutive loss probes. */ |
2213 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { | 2303 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { |
2214 | tcp_rearm_rto(sk); | 2304 | tcp_rearm_rto(sk); |
@@ -2227,8 +2317,9 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
2227 | /* Schedule a loss probe in 2*RTT for SACK capable connections | 2317 | /* Schedule a loss probe in 2*RTT for SACK capable connections |
2228 | * in Open state, that are either limited by cwnd or application. | 2318 | * in Open state, that are either limited by cwnd or application. |
2229 | */ | 2319 | */ |
2230 | if (sysctl_tcp_early_retrans < 3 || !tp->packets_out || | 2320 | if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || |
2231 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | 2321 | !tp->packets_out || !tcp_is_sack(tp) || |
2322 | icsk->icsk_ca_state != TCP_CA_Open) | ||
2232 | return false; | 2323 | return false; |
2233 | 2324 | ||
2234 | if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && | 2325 | if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && |
@@ -2436,9 +2527,11 @@ u32 __tcp_select_window(struct sock *sk) | |||
2436 | int full_space = min_t(int, tp->window_clamp, allowed_space); | 2527 | int full_space = min_t(int, tp->window_clamp, allowed_space); |
2437 | int window; | 2528 | int window; |
2438 | 2529 | ||
2439 | if (mss > full_space) | 2530 | if (unlikely(mss > full_space)) { |
2440 | mss = full_space; | 2531 | mss = full_space; |
2441 | 2532 | if (mss <= 0) | |
2533 | return 0; | ||
2534 | } | ||
2442 | if (free_space < (full_space >> 1)) { | 2535 | if (free_space < (full_space >> 1)) { |
2443 | icsk->icsk_ack.quick = 0; | 2536 | icsk->icsk_ack.quick = 0; |
2444 | 2537 | ||
@@ -2514,7 +2607,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, | |||
2514 | } | 2607 | } |
2515 | 2608 | ||
2516 | /* Collapses two adjacent SKB's during retransmission. */ | 2609 | /* Collapses two adjacent SKB's during retransmission. */ |
2517 | static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | 2610 | static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) |
2518 | { | 2611 | { |
2519 | struct tcp_sock *tp = tcp_sk(sk); | 2612 | struct tcp_sock *tp = tcp_sk(sk); |
2520 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); | 2613 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); |
@@ -2525,13 +2618,17 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
2525 | 2618 | ||
2526 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); | 2619 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); |
2527 | 2620 | ||
2621 | if (next_skb_size) { | ||
2622 | if (next_skb_size <= skb_availroom(skb)) | ||
2623 | skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), | ||
2624 | next_skb_size); | ||
2625 | else if (!skb_shift(skb, next_skb, next_skb_size)) | ||
2626 | return false; | ||
2627 | } | ||
2528 | tcp_highest_sack_combine(sk, next_skb, skb); | 2628 | tcp_highest_sack_combine(sk, next_skb, skb); |
2529 | 2629 | ||
2530 | tcp_unlink_write_queue(next_skb, sk); | 2630 | tcp_unlink_write_queue(next_skb, sk); |
2531 | 2631 | ||
2532 | skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), | ||
2533 | next_skb_size); | ||
2534 | |||
2535 | if (next_skb->ip_summed == CHECKSUM_PARTIAL) | 2632 | if (next_skb->ip_summed == CHECKSUM_PARTIAL) |
2536 | skb->ip_summed = CHECKSUM_PARTIAL; | 2633 | skb->ip_summed = CHECKSUM_PARTIAL; |
2537 | 2634 | ||
@@ -2560,6 +2657,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
2560 | tcp_skb_collapse_tstamp(skb, next_skb); | 2657 | tcp_skb_collapse_tstamp(skb, next_skb); |
2561 | 2658 | ||
2562 | sk_wmem_free_skb(sk, next_skb); | 2659 | sk_wmem_free_skb(sk, next_skb); |
2660 | return true; | ||
2563 | } | 2661 | } |
2564 | 2662 | ||
2565 | /* Check if coalescing SKBs is legal. */ | 2663 | /* Check if coalescing SKBs is legal. */ |
@@ -2567,14 +2665,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) | |||
2567 | { | 2665 | { |
2568 | if (tcp_skb_pcount(skb) > 1) | 2666 | if (tcp_skb_pcount(skb) > 1) |
2569 | return false; | 2667 | return false; |
2570 | /* TODO: SACK collapsing could be used to remove this condition */ | ||
2571 | if (skb_shinfo(skb)->nr_frags != 0) | ||
2572 | return false; | ||
2573 | if (skb_cloned(skb)) | 2668 | if (skb_cloned(skb)) |
2574 | return false; | 2669 | return false; |
2575 | if (skb == tcp_send_head(sk)) | 2670 | if (skb == tcp_send_head(sk)) |
2576 | return false; | 2671 | return false; |
2577 | /* Some heurestics for collapsing over SACK'd could be invented */ | 2672 | /* Some heuristics for collapsing over SACK'd could be invented */ |
2578 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) | 2673 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) |
2579 | return false; | 2674 | return false; |
2580 | 2675 | ||
@@ -2612,16 +2707,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | |||
2612 | 2707 | ||
2613 | if (space < 0) | 2708 | if (space < 0) |
2614 | break; | 2709 | break; |
2615 | /* Punt if not enough space exists in the first SKB for | ||
2616 | * the data in the second | ||
2617 | */ | ||
2618 | if (skb->len > skb_availroom(to)) | ||
2619 | break; | ||
2620 | 2710 | ||
2621 | if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) | 2711 | if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) |
2622 | break; | 2712 | break; |
2623 | 2713 | ||
2624 | tcp_collapse_retrans(sk, to); | 2714 | if (!tcp_collapse_retrans(sk, to)) |
2715 | break; | ||
2625 | } | 2716 | } |
2626 | } | 2717 | } |
2627 | 2718 | ||
@@ -2694,6 +2785,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) | |||
2694 | if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN) | 2785 | if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN) |
2695 | tcp_ecn_clear_syn(sk, skb); | 2786 | tcp_ecn_clear_syn(sk, skb); |
2696 | 2787 | ||
2788 | /* Update global and local TCP statistics. */ | ||
2789 | segs = tcp_skb_pcount(skb); | ||
2790 | TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs); | ||
2791 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) | ||
2792 | __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); | ||
2793 | tp->total_retrans += segs; | ||
2794 | |||
2697 | /* make sure skb->data is aligned on arches that require it | 2795 | /* make sure skb->data is aligned on arches that require it |
2698 | * and check if ack-trimming & collapsing extended the headroom | 2796 | * and check if ack-trimming & collapsing extended the headroom |
2699 | * beyond what csum_start can cover. | 2797 | * beyond what csum_start can cover. |
@@ -2711,14 +2809,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) | |||
2711 | } | 2809 | } |
2712 | 2810 | ||
2713 | if (likely(!err)) { | 2811 | if (likely(!err)) { |
2714 | segs = tcp_skb_pcount(skb); | ||
2715 | |||
2716 | TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; | 2812 | TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; |
2717 | /* Update global TCP statistics. */ | 2813 | } else if (err != -EBUSY) { |
2718 | TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs); | 2814 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); |
2719 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) | ||
2720 | __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); | ||
2721 | tp->total_retrans += segs; | ||
2722 | } | 2815 | } |
2723 | return err; | 2816 | return err; |
2724 | } | 2817 | } |
@@ -2741,8 +2834,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) | |||
2741 | if (!tp->retrans_stamp) | 2834 | if (!tp->retrans_stamp) |
2742 | tp->retrans_stamp = tcp_skb_timestamp(skb); | 2835 | tp->retrans_stamp = tcp_skb_timestamp(skb); |
2743 | 2836 | ||
2744 | } else if (err != -EBUSY) { | ||
2745 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); | ||
2746 | } | 2837 | } |
2747 | 2838 | ||
2748 | if (tp->undo_retrans < 0) | 2839 | if (tp->undo_retrans < 0) |
@@ -2751,36 +2842,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) | |||
2751 | return err; | 2842 | return err; |
2752 | } | 2843 | } |
2753 | 2844 | ||
2754 | /* Check if we forward retransmits are possible in the current | ||
2755 | * window/congestion state. | ||
2756 | */ | ||
2757 | static bool tcp_can_forward_retransmit(struct sock *sk) | ||
2758 | { | ||
2759 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
2760 | const struct tcp_sock *tp = tcp_sk(sk); | ||
2761 | |||
2762 | /* Forward retransmissions are possible only during Recovery. */ | ||
2763 | if (icsk->icsk_ca_state != TCP_CA_Recovery) | ||
2764 | return false; | ||
2765 | |||
2766 | /* No forward retransmissions in Reno are possible. */ | ||
2767 | if (tcp_is_reno(tp)) | ||
2768 | return false; | ||
2769 | |||
2770 | /* Yeah, we have to make difficult choice between forward transmission | ||
2771 | * and retransmission... Both ways have their merits... | ||
2772 | * | ||
2773 | * For now we do not retransmit anything, while we have some new | ||
2774 | * segments to send. In the other cases, follow rule 3 for | ||
2775 | * NextSeg() specified in RFC3517. | ||
2776 | */ | ||
2777 | |||
2778 | if (tcp_may_send_now(sk)) | ||
2779 | return false; | ||
2780 | |||
2781 | return true; | ||
2782 | } | ||
2783 | |||
2784 | /* This gets called after a retransmit timeout, and the initially | 2845 | /* This gets called after a retransmit timeout, and the initially |
2785 | * retransmitted data is acknowledged. It tries to continue | 2846 | * retransmitted data is acknowledged. It tries to continue |
2786 | * resending the rest of the retransmit queue, until either | 2847 | * resending the rest of the retransmit queue, until either |
@@ -2795,24 +2856,16 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
2795 | struct tcp_sock *tp = tcp_sk(sk); | 2856 | struct tcp_sock *tp = tcp_sk(sk); |
2796 | struct sk_buff *skb; | 2857 | struct sk_buff *skb; |
2797 | struct sk_buff *hole = NULL; | 2858 | struct sk_buff *hole = NULL; |
2798 | u32 max_segs, last_lost; | 2859 | u32 max_segs; |
2799 | int mib_idx; | 2860 | int mib_idx; |
2800 | int fwd_rexmitting = 0; | ||
2801 | 2861 | ||
2802 | if (!tp->packets_out) | 2862 | if (!tp->packets_out) |
2803 | return; | 2863 | return; |
2804 | 2864 | ||
2805 | if (!tp->lost_out) | ||
2806 | tp->retransmit_high = tp->snd_una; | ||
2807 | |||
2808 | if (tp->retransmit_skb_hint) { | 2865 | if (tp->retransmit_skb_hint) { |
2809 | skb = tp->retransmit_skb_hint; | 2866 | skb = tp->retransmit_skb_hint; |
2810 | last_lost = TCP_SKB_CB(skb)->end_seq; | ||
2811 | if (after(last_lost, tp->retransmit_high)) | ||
2812 | last_lost = tp->retransmit_high; | ||
2813 | } else { | 2867 | } else { |
2814 | skb = tcp_write_queue_head(sk); | 2868 | skb = tcp_write_queue_head(sk); |
2815 | last_lost = tp->snd_una; | ||
2816 | } | 2869 | } |
2817 | 2870 | ||
2818 | max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); | 2871 | max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); |
@@ -2835,31 +2888,14 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
2835 | */ | 2888 | */ |
2836 | segs = min_t(int, segs, max_segs); | 2889 | segs = min_t(int, segs, max_segs); |
2837 | 2890 | ||
2838 | if (fwd_rexmitting) { | 2891 | if (tp->retrans_out >= tp->lost_out) { |
2839 | begin_fwd: | 2892 | break; |
2840 | if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) | ||
2841 | break; | ||
2842 | mib_idx = LINUX_MIB_TCPFORWARDRETRANS; | ||
2843 | |||
2844 | } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) { | ||
2845 | tp->retransmit_high = last_lost; | ||
2846 | if (!tcp_can_forward_retransmit(sk)) | ||
2847 | break; | ||
2848 | /* Backtrack if necessary to non-L'ed skb */ | ||
2849 | if (hole) { | ||
2850 | skb = hole; | ||
2851 | hole = NULL; | ||
2852 | } | ||
2853 | fwd_rexmitting = 1; | ||
2854 | goto begin_fwd; | ||
2855 | |||
2856 | } else if (!(sacked & TCPCB_LOST)) { | 2893 | } else if (!(sacked & TCPCB_LOST)) { |
2857 | if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) | 2894 | if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) |
2858 | hole = skb; | 2895 | hole = skb; |
2859 | continue; | 2896 | continue; |
2860 | 2897 | ||
2861 | } else { | 2898 | } else { |
2862 | last_lost = TCP_SKB_CB(skb)->end_seq; | ||
2863 | if (icsk->icsk_ca_state != TCP_CA_Loss) | 2899 | if (icsk->icsk_ca_state != TCP_CA_Loss) |
2864 | mib_idx = LINUX_MIB_TCPFASTRETRANS; | 2900 | mib_idx = LINUX_MIB_TCPFASTRETRANS; |
2865 | else | 2901 | else |
@@ -2880,7 +2916,8 @@ begin_fwd: | |||
2880 | if (tcp_in_cwnd_reduction(sk)) | 2916 | if (tcp_in_cwnd_reduction(sk)) |
2881 | tp->prr_out += tcp_skb_pcount(skb); | 2917 | tp->prr_out += tcp_skb_pcount(skb); |
2882 | 2918 | ||
2883 | if (skb == tcp_write_queue_head(sk)) | 2919 | if (skb == tcp_write_queue_head(sk) && |
2920 | icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) | ||
2884 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 2921 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
2885 | inet_csk(sk)->icsk_rto, | 2922 | inet_csk(sk)->icsk_rto, |
2886 | TCP_RTO_MAX); | 2923 | TCP_RTO_MAX); |
@@ -2962,6 +2999,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
2962 | { | 2999 | { |
2963 | struct sk_buff *skb; | 3000 | struct sk_buff *skb; |
2964 | 3001 | ||
3002 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); | ||
3003 | |||
2965 | /* NOTE: No TCP options attached and we never retransmit this. */ | 3004 | /* NOTE: No TCP options attached and we never retransmit this. */ |
2966 | skb = alloc_skb(MAX_TCP_HEADER, priority); | 3005 | skb = alloc_skb(MAX_TCP_HEADER, priority); |
2967 | if (!skb) { | 3006 | if (!skb) { |
@@ -2977,8 +3016,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
2977 | /* Send it off. */ | 3016 | /* Send it off. */ |
2978 | if (tcp_transmit_skb(sk, skb, 0, priority)) | 3017 | if (tcp_transmit_skb(sk, skb, 0, priority)) |
2979 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); | 3018 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); |
2980 | |||
2981 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); | ||
2982 | } | 3019 | } |
2983 | 3020 | ||
2984 | /* Send a crossed SYN-ACK during socket establishment. | 3021 | /* Send a crossed SYN-ACK during socket establishment. |
@@ -3037,7 +3074,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, | |||
3037 | struct sk_buff *skb; | 3074 | struct sk_buff *skb; |
3038 | int tcp_header_size; | 3075 | int tcp_header_size; |
3039 | struct tcphdr *th; | 3076 | struct tcphdr *th; |
3040 | u16 user_mss; | ||
3041 | int mss; | 3077 | int mss; |
3042 | 3078 | ||
3043 | skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); | 3079 | skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); |
@@ -3067,10 +3103,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, | |||
3067 | } | 3103 | } |
3068 | skb_dst_set(skb, dst); | 3104 | skb_dst_set(skb, dst); |
3069 | 3105 | ||
3070 | mss = dst_metric_advmss(dst); | 3106 | mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); |
3071 | user_mss = READ_ONCE(tp->rx_opt.user_mss); | ||
3072 | if (user_mss && user_mss < mss) | ||
3073 | mss = user_mss; | ||
3074 | 3107 | ||
3075 | memset(&opts, 0, sizeof(opts)); | 3108 | memset(&opts, 0, sizeof(opts)); |
3076 | #ifdef CONFIG_SYN_COOKIES | 3109 | #ifdef CONFIG_SYN_COOKIES |
@@ -3123,7 +3156,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, | |||
3123 | #endif | 3156 | #endif |
3124 | 3157 | ||
3125 | /* Do not fool tcpdump (if any), clean our debris */ | 3158 | /* Do not fool tcpdump (if any), clean our debris */ |
3126 | skb->tstamp.tv64 = 0; | 3159 | skb->tstamp = 0; |
3127 | return skb; | 3160 | return skb; |
3128 | } | 3161 | } |
3129 | EXPORT_SYMBOL(tcp_make_synack); | 3162 | EXPORT_SYMBOL(tcp_make_synack); |
@@ -3176,9 +3209,7 @@ static void tcp_connect_init(struct sock *sk) | |||
3176 | 3209 | ||
3177 | if (!tp->window_clamp) | 3210 | if (!tp->window_clamp) |
3178 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); | 3211 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); |
3179 | tp->advmss = dst_metric_advmss(dst); | 3212 | tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); |
3180 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) | ||
3181 | tp->advmss = tp->rx_opt.user_mss; | ||
3182 | 3213 | ||
3183 | tcp_initialize_rcv_mss(sk); | 3214 | tcp_initialize_rcv_mss(sk); |
3184 | 3215 | ||
@@ -3244,31 +3275,19 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
3244 | { | 3275 | { |
3245 | struct tcp_sock *tp = tcp_sk(sk); | 3276 | struct tcp_sock *tp = tcp_sk(sk); |
3246 | struct tcp_fastopen_request *fo = tp->fastopen_req; | 3277 | struct tcp_fastopen_request *fo = tp->fastopen_req; |
3247 | int syn_loss = 0, space, err = 0; | 3278 | int space, err = 0; |
3248 | unsigned long last_syn_loss = 0; | ||
3249 | struct sk_buff *syn_data; | 3279 | struct sk_buff *syn_data; |
3250 | 3280 | ||
3251 | tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ | 3281 | tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ |
3252 | tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, | 3282 | if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie)) |
3253 | &syn_loss, &last_syn_loss); | ||
3254 | /* Recurring FO SYN losses: revert to regular handshake temporarily */ | ||
3255 | if (syn_loss > 1 && | ||
3256 | time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) { | ||
3257 | fo->cookie.len = -1; | ||
3258 | goto fallback; | ||
3259 | } | ||
3260 | |||
3261 | if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) | ||
3262 | fo->cookie.len = -1; | ||
3263 | else if (fo->cookie.len <= 0) | ||
3264 | goto fallback; | 3283 | goto fallback; |
3265 | 3284 | ||
3266 | /* MSS for SYN-data is based on cached MSS and bounded by PMTU and | 3285 | /* MSS for SYN-data is based on cached MSS and bounded by PMTU and |
3267 | * user-MSS. Reserve maximum option space for middleboxes that add | 3286 | * user-MSS. Reserve maximum option space for middleboxes that add |
3268 | * private TCP options. The cost is reduced data space in SYN :( | 3287 | * private TCP options. The cost is reduced data space in SYN :( |
3269 | */ | 3288 | */ |
3270 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp) | 3289 | tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp); |
3271 | tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; | 3290 | |
3272 | space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - | 3291 | space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - |
3273 | MAX_TCP_OPTION_SPACE; | 3292 | MAX_TCP_OPTION_SPACE; |
3274 | 3293 | ||
@@ -3300,6 +3319,8 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
3300 | fo->copied = space; | 3319 | fo->copied = space; |
3301 | 3320 | ||
3302 | tcp_connect_queue_skb(sk, syn_data); | 3321 | tcp_connect_queue_skb(sk, syn_data); |
3322 | if (syn_data->len) | ||
3323 | tcp_chrono_start(sk, TCP_CHRONO_BUSY); | ||
3303 | 3324 | ||
3304 | err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); | 3325 | err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); |
3305 | 3326 | ||
@@ -3464,8 +3485,6 @@ void tcp_send_ack(struct sock *sk) | |||
3464 | /* We do not want pure acks influencing TCP Small Queues or fq/pacing | 3485 | /* We do not want pure acks influencing TCP Small Queues or fq/pacing |
3465 | * too much. | 3486 | * too much. |
3466 | * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 | 3487 | * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 |
3467 | * We also avoid tcp_wfree() overhead (cache line miss accessing | ||
3468 | * tp->tsq_flags) by using regular sock_wfree() | ||
3469 | */ | 3488 | */ |
3470 | skb_set_tcp_pure_ack(buff); | 3489 | skb_set_tcp_pure_ack(buff); |
3471 | 3490 | ||