aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c137
1 files changed, 74 insertions, 63 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8162e2880178..696b0a168f16 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -66,15 +66,17 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
66 int push_one, gfp_t gfp); 66 int push_one, gfp_t gfp);
67 67
68/* Account for new data that has been sent to the network. */ 68/* Account for new data that has been sent to the network. */
69static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) 69static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
70{ 70{
71 struct inet_connection_sock *icsk = inet_csk(sk); 71 struct inet_connection_sock *icsk = inet_csk(sk);
72 struct tcp_sock *tp = tcp_sk(sk); 72 struct tcp_sock *tp = tcp_sk(sk);
73 unsigned int prior_packets = tp->packets_out; 73 unsigned int prior_packets = tp->packets_out;
74 74
75 tcp_advance_send_head(sk, skb);
76 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 75 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
77 76
77 __skb_unlink(skb, &sk->sk_write_queue);
78 tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
79
78 tp->packets_out += tcp_skb_pcount(skb); 80 tp->packets_out += tcp_skb_pcount(skb);
79 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) 81 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
80 tcp_rearm_rto(sk); 82 tcp_rearm_rto(sk);
@@ -1249,12 +1251,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
1249 TCP_SKB_CB(skb)->eor = 0; 1251 TCP_SKB_CB(skb)->eor = 0;
1250} 1252}
1251 1253
1254/* Insert buff after skb on the write or rtx queue of sk. */
1255static void tcp_insert_write_queue_after(struct sk_buff *skb,
1256 struct sk_buff *buff,
1257 struct sock *sk,
1258 enum tcp_queue tcp_queue)
1259{
1260 if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
1261 __skb_queue_after(&sk->sk_write_queue, skb, buff);
1262 else
1263 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
1264}
1265
1252/* Function to create two new TCP segments. Shrinks the given segment 1266/* Function to create two new TCP segments. Shrinks the given segment
1253 * to the specified size and appends a new segment with the rest of the 1267 * to the specified size and appends a new segment with the rest of the
1254 * packet to the list. This won't be called frequently, I hope. 1268 * packet to the list. This won't be called frequently, I hope.
1255 * Remember, these are still headerless SKBs at this point. 1269 * Remember, these are still headerless SKBs at this point.
1256 */ 1270 */
1257int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, 1271int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1272 struct sk_buff *skb, u32 len,
1258 unsigned int mss_now, gfp_t gfp) 1273 unsigned int mss_now, gfp_t gfp)
1259{ 1274{
1260 struct tcp_sock *tp = tcp_sk(sk); 1275 struct tcp_sock *tp = tcp_sk(sk);
@@ -1337,7 +1352,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1337 1352
1338 /* Link BUFF into the send queue. */ 1353 /* Link BUFF into the send queue. */
1339 __skb_header_release(buff); 1354 __skb_header_release(buff);
1340 tcp_insert_write_queue_after(skb, buff, sk); 1355 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1341 list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor); 1356 list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
1342 1357
1343 return 0; 1358 return 0;
@@ -1625,10 +1640,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1625 * is caused by insufficient sender buffer: 1640 * is caused by insufficient sender buffer:
1626 * 1) just sent some data (see tcp_write_xmit) 1641 * 1) just sent some data (see tcp_write_xmit)
1627 * 2) not cwnd limited (this else condition) 1642 * 2) not cwnd limited (this else condition)
1628 * 3) no more data to send (null tcp_send_head ) 1643 * 3) no more data to send (tcp_write_queue_empty())
1629 * 4) application is hitting buffer limit (SOCK_NOSPACE) 1644 * 4) application is hitting buffer limit (SOCK_NOSPACE)
1630 */ 1645 */
1631 if (!tcp_send_head(sk) && sk->sk_socket && 1646 if (tcp_write_queue_empty(sk) && sk->sk_socket &&
1632 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && 1647 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
1633 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) 1648 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1634 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); 1649 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
@@ -1824,7 +1839,8 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1824 * know that all the data is in scatter-gather pages, and that the 1839 * know that all the data is in scatter-gather pages, and that the
1825 * packet has never been sent out before (and thus is not cloned). 1840 * packet has never been sent out before (and thus is not cloned).
1826 */ 1841 */
1827static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, 1842static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1843 struct sk_buff *skb, unsigned int len,
1828 unsigned int mss_now, gfp_t gfp) 1844 unsigned int mss_now, gfp_t gfp)
1829{ 1845{
1830 struct sk_buff *buff; 1846 struct sk_buff *buff;
@@ -1833,7 +1849,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1833 1849
1834 /* All of a TSO frame must be composed of paged data. */ 1850 /* All of a TSO frame must be composed of paged data. */
1835 if (skb->len != skb->data_len) 1851 if (skb->len != skb->data_len)
1836 return tcp_fragment(sk, skb, len, mss_now, gfp); 1852 return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
1837 1853
1838 buff = sk_stream_alloc_skb(sk, 0, gfp, true); 1854 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
1839 if (unlikely(!buff)) 1855 if (unlikely(!buff))
@@ -1869,7 +1885,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1869 1885
1870 /* Link BUFF into the send queue. */ 1886 /* Link BUFF into the send queue. */
1871 __skb_header_release(buff); 1887 __skb_header_release(buff);
1872 tcp_insert_write_queue_after(skb, buff, sk); 1888 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1873 1889
1874 return 0; 1890 return 0;
1875} 1891}
@@ -1939,8 +1955,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1939 goto send_now; 1955 goto send_now;
1940 } 1956 }
1941 1957
1942 head = tcp_write_queue_head(sk); 1958 /* TODO : use tsorted_sent_queue ? */
1943 1959 head = tcp_rtx_queue_head(sk);
1960 if (!head)
1961 goto send_now;
1944 age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp); 1962 age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
1945 /* If next ACK is likely to come too late (half srtt), do not defer */ 1963 /* If next ACK is likely to come too late (half srtt), do not defer */
1946 if (age < (tp->srtt_us >> 4)) 1964 if (age < (tp->srtt_us >> 4))
@@ -2158,13 +2176,12 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
2158 limit <<= factor; 2176 limit <<= factor;
2159 2177
2160 if (refcount_read(&sk->sk_wmem_alloc) > limit) { 2178 if (refcount_read(&sk->sk_wmem_alloc) > limit) {
2161 /* Always send the 1st or 2nd skb in write queue. 2179 /* Always send skb if rtx queue is empty.
2162 * No need to wait for TX completion to call us back, 2180 * No need to wait for TX completion to call us back,
2163 * after softirq/tasklet schedule. 2181 * after softirq/tasklet schedule.
2164 * This helps when TX completions are delayed too much. 2182 * This helps when TX completions are delayed too much.
2165 */ 2183 */
2166 if (skb == sk->sk_write_queue.next || 2184 if (tcp_rtx_queue_empty(sk))
2167 skb->prev == sk->sk_write_queue.next)
2168 return false; 2185 return false;
2169 2186
2170 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); 2187 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2215,7 +2232,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
2215 * it's the "most interesting" or current chrono we are 2232 * it's the "most interesting" or current chrono we are
2216 * tracking and starts busy chrono if we have pending data. 2233 * tracking and starts busy chrono if we have pending data.
2217 */ 2234 */
2218 if (tcp_write_queue_empty(sk)) 2235 if (tcp_rtx_and_write_queues_empty(sk))
2219 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); 2236 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
2220 else if (type == tp->chrono_type) 2237 else if (type == tp->chrono_type)
2221 tcp_chrono_set(tp, TCP_CHRONO_BUSY); 2238 tcp_chrono_set(tp, TCP_CHRONO_BUSY);
@@ -2310,7 +2327,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2310 nonagle); 2327 nonagle);
2311 2328
2312 if (skb->len > limit && 2329 if (skb->len > limit &&
2313 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) 2330 unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
2331 skb, limit, mss_now, gfp)))
2314 break; 2332 break;
2315 2333
2316 if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) 2334 if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
@@ -2350,7 +2368,7 @@ repair:
2350 tcp_cwnd_validate(sk, is_cwnd_limited); 2368 tcp_cwnd_validate(sk, is_cwnd_limited);
2351 return false; 2369 return false;
2352 } 2370 }
2353 return !tp->packets_out && tcp_send_head(sk); 2371 return !tp->packets_out && !tcp_write_queue_empty(sk);
2354} 2372}
2355 2373
2356bool tcp_schedule_loss_probe(struct sock *sk) 2374bool tcp_schedule_loss_probe(struct sock *sk)
@@ -2374,7 +2392,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2374 return false; 2392 return false;
2375 2393
2376 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && 2394 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
2377 tcp_send_head(sk)) 2395 !tcp_write_queue_empty(sk))
2378 return false; 2396 return false;
2379 2397
2380 /* Probe timeout is 2*rtt. Add minimum RTO to account 2398 /* Probe timeout is 2*rtt. Add minimum RTO to account
@@ -2427,18 +2445,14 @@ void tcp_send_loss_probe(struct sock *sk)
2427 int mss = tcp_current_mss(sk); 2445 int mss = tcp_current_mss(sk);
2428 2446
2429 skb = tcp_send_head(sk); 2447 skb = tcp_send_head(sk);
2430 if (skb) { 2448 if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
2431 if (tcp_snd_wnd_test(tp, skb, mss)) { 2449 pcount = tp->packets_out;
2432 pcount = tp->packets_out; 2450 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2433 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); 2451 if (tp->packets_out > pcount)
2434 if (tp->packets_out > pcount) 2452 goto probe_sent;
2435 goto probe_sent; 2453 goto rearm_timer;
2436 goto rearm_timer;
2437 }
2438 skb = tcp_write_queue_prev(sk, skb);
2439 } else {
2440 skb = tcp_write_queue_tail(sk);
2441 } 2454 }
2455 skb = skb_rb_last(&sk->tcp_rtx_queue);
2442 2456
2443 /* At most one outstanding TLP retransmission. */ 2457 /* At most one outstanding TLP retransmission. */
2444 if (tp->tlp_high_seq) 2458 if (tp->tlp_high_seq)
@@ -2456,10 +2470,11 @@ void tcp_send_loss_probe(struct sock *sk)
2456 goto rearm_timer; 2470 goto rearm_timer;
2457 2471
2458 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { 2472 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2459 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, 2473 if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2474 (pcount - 1) * mss, mss,
2460 GFP_ATOMIC))) 2475 GFP_ATOMIC)))
2461 goto rearm_timer; 2476 goto rearm_timer;
2462 skb = tcp_write_queue_next(sk, skb); 2477 skb = skb_rb_next(skb);
2463 } 2478 }
2464 2479
2465 if (WARN_ON(!skb || !tcp_skb_pcount(skb))) 2480 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
@@ -2659,7 +2674,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2659static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) 2674static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2660{ 2675{
2661 struct tcp_sock *tp = tcp_sk(sk); 2676 struct tcp_sock *tp = tcp_sk(sk);
2662 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 2677 struct sk_buff *next_skb = skb_rb_next(skb);
2663 int skb_size, next_skb_size; 2678 int skb_size, next_skb_size;
2664 2679
2665 skb_size = skb->len; 2680 skb_size = skb->len;
@@ -2676,8 +2691,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2676 } 2691 }
2677 tcp_highest_sack_combine(sk, next_skb, skb); 2692 tcp_highest_sack_combine(sk, next_skb, skb);
2678 2693
2679 tcp_unlink_write_queue(next_skb, sk);
2680
2681 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 2694 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2682 skb->ip_summed = CHECKSUM_PARTIAL; 2695 skb->ip_summed = CHECKSUM_PARTIAL;
2683 2696
@@ -2705,7 +2718,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2705 2718
2706 tcp_skb_collapse_tstamp(skb, next_skb); 2719 tcp_skb_collapse_tstamp(skb, next_skb);
2707 2720
2708 sk_wmem_free_skb(sk, next_skb); 2721 tcp_rtx_queue_unlink_and_free(next_skb, sk);
2709 return true; 2722 return true;
2710} 2723}
2711 2724
@@ -2716,8 +2729,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2716 return false; 2729 return false;
2717 if (skb_cloned(skb)) 2730 if (skb_cloned(skb))
2718 return false; 2731 return false;
2719 if (skb == tcp_send_head(sk))
2720 return false;
2721 /* Some heuristics for collapsing over SACK'd could be invented */ 2732 /* Some heuristics for collapsing over SACK'd could be invented */
2722 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) 2733 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2723 return false; 2734 return false;
@@ -2740,7 +2751,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2740 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) 2751 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2741 return; 2752 return;
2742 2753
2743 tcp_for_write_queue_from_safe(skb, tmp, sk) { 2754 skb_rbtree_walk_from_safe(skb, tmp) {
2744 if (!tcp_can_collapse(sk, skb)) 2755 if (!tcp_can_collapse(sk, skb))
2745 break; 2756 break;
2746 2757
@@ -2815,7 +2826,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2815 2826
2816 len = cur_mss * segs; 2827 len = cur_mss * segs;
2817 if (skb->len > len) { 2828 if (skb->len > len) {
2818 if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC)) 2829 if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
2830 cur_mss, GFP_ATOMIC))
2819 return -ENOMEM; /* We'll try again later. */ 2831 return -ENOMEM; /* We'll try again later. */
2820 } else { 2832 } else {
2821 if (skb_unclone(skb, GFP_ATOMIC)) 2833 if (skb_unclone(skb, GFP_ATOMIC))
@@ -2906,29 +2918,24 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2906void tcp_xmit_retransmit_queue(struct sock *sk) 2918void tcp_xmit_retransmit_queue(struct sock *sk)
2907{ 2919{
2908 const struct inet_connection_sock *icsk = inet_csk(sk); 2920 const struct inet_connection_sock *icsk = inet_csk(sk);
2921 struct sk_buff *skb, *rtx_head = NULL, *hole = NULL;
2909 struct tcp_sock *tp = tcp_sk(sk); 2922 struct tcp_sock *tp = tcp_sk(sk);
2910 struct sk_buff *skb;
2911 struct sk_buff *hole = NULL;
2912 u32 max_segs; 2923 u32 max_segs;
2913 int mib_idx; 2924 int mib_idx;
2914 2925
2915 if (!tp->packets_out) 2926 if (!tp->packets_out)
2916 return; 2927 return;
2917 2928
2918 if (tp->retransmit_skb_hint) { 2929 skb = tp->retransmit_skb_hint;
2919 skb = tp->retransmit_skb_hint; 2930 if (!skb) {
2920 } else { 2931 rtx_head = tcp_rtx_queue_head(sk);
2921 skb = tcp_write_queue_head(sk); 2932 skb = rtx_head;
2922 } 2933 }
2923
2924 max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); 2934 max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
2925 tcp_for_write_queue_from(skb, sk) { 2935 skb_rbtree_walk_from(skb) {
2926 __u8 sacked; 2936 __u8 sacked;
2927 int segs; 2937 int segs;
2928 2938
2929 if (skb == tcp_send_head(sk))
2930 break;
2931
2932 if (tcp_pacing_check(sk)) 2939 if (tcp_pacing_check(sk))
2933 break; 2940 break;
2934 2941
@@ -2973,7 +2980,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2973 if (tcp_in_cwnd_reduction(sk)) 2980 if (tcp_in_cwnd_reduction(sk))
2974 tp->prr_out += tcp_skb_pcount(skb); 2981 tp->prr_out += tcp_skb_pcount(skb);
2975 2982
2976 if (skb == tcp_write_queue_head(sk) && 2983 if (skb == rtx_head &&
2977 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) 2984 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
2978 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 2985 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2979 inet_csk(sk)->icsk_rto, 2986 inet_csk(sk)->icsk_rto,
@@ -3015,12 +3022,15 @@ void tcp_send_fin(struct sock *sk)
3015 * Note: in the latter case, FIN packet will be sent after a timeout, 3022 * Note: in the latter case, FIN packet will be sent after a timeout,
3016 * as TCP stack thinks it has already been transmitted. 3023 * as TCP stack thinks it has already been transmitted.
3017 */ 3024 */
3018 if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) { 3025 if (!tskb && tcp_under_memory_pressure(sk))
3026 tskb = skb_rb_last(&sk->tcp_rtx_queue);
3027
3028 if (tskb) {
3019coalesce: 3029coalesce:
3020 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; 3030 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
3021 TCP_SKB_CB(tskb)->end_seq++; 3031 TCP_SKB_CB(tskb)->end_seq++;
3022 tp->write_seq++; 3032 tp->write_seq++;
3023 if (!tcp_send_head(sk)) { 3033 if (tcp_write_queue_empty(sk)) {
3024 /* This means tskb was already sent. 3034 /* This means tskb was already sent.
3025 * Pretend we included the FIN on previous transmit. 3035 * Pretend we included the FIN on previous transmit.
3026 * We need to set tp->snd_nxt to the value it would have 3036 * We need to set tp->snd_nxt to the value it would have
@@ -3086,9 +3096,9 @@ int tcp_send_synack(struct sock *sk)
3086{ 3096{
3087 struct sk_buff *skb; 3097 struct sk_buff *skb;
3088 3098
3089 skb = tcp_write_queue_head(sk); 3099 skb = tcp_rtx_queue_head(sk);
3090 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { 3100 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
3091 pr_debug("%s: wrong queue state\n", __func__); 3101 pr_err("%s: wrong queue state\n", __func__);
3092 return -EFAULT; 3102 return -EFAULT;
3093 } 3103 }
3094 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { 3104 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
@@ -3101,10 +3111,9 @@ int tcp_send_synack(struct sock *sk)
3101 if (!nskb) 3111 if (!nskb)
3102 return -ENOMEM; 3112 return -ENOMEM;
3103 INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); 3113 INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
3104 tcp_unlink_write_queue(skb, sk); 3114 tcp_rtx_queue_unlink_and_free(skb, sk);
3105 __skb_header_release(nskb); 3115 __skb_header_release(nskb);
3106 __tcp_add_write_queue_head(sk, nskb); 3116 tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
3107 sk_wmem_free_skb(sk, skb);
3108 sk->sk_wmem_queued += nskb->truesize; 3117 sk->sk_wmem_queued += nskb->truesize;
3109 sk_mem_charge(sk, nskb->truesize); 3118 sk_mem_charge(sk, nskb->truesize);
3110 skb = nskb; 3119 skb = nskb;
@@ -3327,7 +3336,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3327 3336
3328 tcb->end_seq += skb->len; 3337 tcb->end_seq += skb->len;
3329 __skb_header_release(skb); 3338 __skb_header_release(skb);
3330 __tcp_add_write_queue_tail(sk, skb);
3331 sk->sk_wmem_queued += skb->truesize; 3339 sk->sk_wmem_queued += skb->truesize;
3332 sk_mem_charge(sk, skb->truesize); 3340 sk_mem_charge(sk, skb->truesize);
3333 tp->write_seq = tcb->end_seq; 3341 tp->write_seq = tcb->end_seq;
@@ -3405,12 +3413,13 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3405 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; 3413 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3406 if (!err) { 3414 if (!err) {
3407 tp->syn_data = (fo->copied > 0); 3415 tp->syn_data = (fo->copied > 0);
3416 tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
3408 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); 3417 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3409 goto done; 3418 goto done;
3410 } 3419 }
3411 3420
3412 /* data was not sent, this is our new send_head */ 3421 /* data was not sent, put it in write_queue */
3413 sk->sk_send_head = syn_data; 3422 __skb_queue_tail(&sk->sk_write_queue, syn_data);
3414 tp->packets_out -= tcp_skb_pcount(syn_data); 3423 tp->packets_out -= tcp_skb_pcount(syn_data);
3415 3424
3416fallback: 3425fallback:
@@ -3453,6 +3462,7 @@ int tcp_connect(struct sock *sk)
3453 tp->retrans_stamp = tcp_time_stamp(tp); 3462 tp->retrans_stamp = tcp_time_stamp(tp);
3454 tcp_connect_queue_skb(sk, buff); 3463 tcp_connect_queue_skb(sk, buff);
3455 tcp_ecn_send_syn(sk, buff); 3464 tcp_ecn_send_syn(sk, buff);
3465 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
3456 3466
3457 /* Send off SYN; include data in Fast Open. */ 3467 /* Send off SYN; include data in Fast Open. */
3458 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : 3468 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@ -3647,7 +3657,8 @@ int tcp_write_wakeup(struct sock *sk, int mib)
3647 skb->len > mss) { 3657 skb->len > mss) {
3648 seg_size = min(seg_size, mss); 3658 seg_size = min(seg_size, mss);
3649 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; 3659 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3650 if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) 3660 if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
3661 skb, seg_size, mss, GFP_ATOMIC))
3651 return -1; 3662 return -1;
3652 } else if (!tcp_skb_pcount(skb)) 3663 } else if (!tcp_skb_pcount(skb))
3653 tcp_set_skb_tso_segs(skb, mss); 3664 tcp_set_skb_tso_segs(skb, mss);
@@ -3677,7 +3688,7 @@ void tcp_send_probe0(struct sock *sk)
3677 3688
3678 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); 3689 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3679 3690
3680 if (tp->packets_out || !tcp_send_head(sk)) { 3691 if (tp->packets_out || tcp_write_queue_empty(sk)) {
3681 /* Cancel probe timer, if it is not required. */ 3692 /* Cancel probe timer, if it is not required. */
3682 icsk->icsk_probes_out = 0; 3693 icsk->icsk_probes_out = 0;
3683 icsk->icsk_backoff = 0; 3694 icsk->icsk_backoff = 0;