diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 137 |
1 files changed, 74 insertions, 63 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8162e2880178..696b0a168f16 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -66,15 +66,17 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
66 | int push_one, gfp_t gfp); | 66 | int push_one, gfp_t gfp); |
67 | 67 | ||
68 | /* Account for new data that has been sent to the network. */ | 68 | /* Account for new data that has been sent to the network. */ |
69 | static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) | 69 | static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) |
70 | { | 70 | { |
71 | struct inet_connection_sock *icsk = inet_csk(sk); | 71 | struct inet_connection_sock *icsk = inet_csk(sk); |
72 | struct tcp_sock *tp = tcp_sk(sk); | 72 | struct tcp_sock *tp = tcp_sk(sk); |
73 | unsigned int prior_packets = tp->packets_out; | 73 | unsigned int prior_packets = tp->packets_out; |
74 | 74 | ||
75 | tcp_advance_send_head(sk, skb); | ||
76 | tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; | 75 | tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; |
77 | 76 | ||
77 | __skb_unlink(skb, &sk->sk_write_queue); | ||
78 | tcp_rbtree_insert(&sk->tcp_rtx_queue, skb); | ||
79 | |||
78 | tp->packets_out += tcp_skb_pcount(skb); | 80 | tp->packets_out += tcp_skb_pcount(skb); |
79 | if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) | 81 | if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) |
80 | tcp_rearm_rto(sk); | 82 | tcp_rearm_rto(sk); |
@@ -1249,12 +1251,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2) | |||
1249 | TCP_SKB_CB(skb)->eor = 0; | 1251 | TCP_SKB_CB(skb)->eor = 0; |
1250 | } | 1252 | } |
1251 | 1253 | ||
1254 | /* Insert buff after skb on the write or rtx queue of sk. */ | ||
1255 | static void tcp_insert_write_queue_after(struct sk_buff *skb, | ||
1256 | struct sk_buff *buff, | ||
1257 | struct sock *sk, | ||
1258 | enum tcp_queue tcp_queue) | ||
1259 | { | ||
1260 | if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE) | ||
1261 | __skb_queue_after(&sk->sk_write_queue, skb, buff); | ||
1262 | else | ||
1263 | tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); | ||
1264 | } | ||
1265 | |||
1252 | /* Function to create two new TCP segments. Shrinks the given segment | 1266 | /* Function to create two new TCP segments. Shrinks the given segment |
1253 | * to the specified size and appends a new segment with the rest of the | 1267 | * to the specified size and appends a new segment with the rest of the |
1254 | * packet to the list. This won't be called frequently, I hope. | 1268 | * packet to the list. This won't be called frequently, I hope. |
1255 | * Remember, these are still headerless SKBs at this point. | 1269 | * Remember, these are still headerless SKBs at this point. |
1256 | */ | 1270 | */ |
1257 | int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | 1271 | int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, |
1272 | struct sk_buff *skb, u32 len, | ||
1258 | unsigned int mss_now, gfp_t gfp) | 1273 | unsigned int mss_now, gfp_t gfp) |
1259 | { | 1274 | { |
1260 | struct tcp_sock *tp = tcp_sk(sk); | 1275 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -1337,7 +1352,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
1337 | 1352 | ||
1338 | /* Link BUFF into the send queue. */ | 1353 | /* Link BUFF into the send queue. */ |
1339 | __skb_header_release(buff); | 1354 | __skb_header_release(buff); |
1340 | tcp_insert_write_queue_after(skb, buff, sk); | 1355 | tcp_insert_write_queue_after(skb, buff, sk, tcp_queue); |
1341 | list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor); | 1356 | list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor); |
1342 | 1357 | ||
1343 | return 0; | 1358 | return 0; |
@@ -1625,10 +1640,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) | |||
1625 | * is caused by insufficient sender buffer: | 1640 | * is caused by insufficient sender buffer: |
1626 | * 1) just sent some data (see tcp_write_xmit) | 1641 | * 1) just sent some data (see tcp_write_xmit) |
1627 | * 2) not cwnd limited (this else condition) | 1642 | * 2) not cwnd limited (this else condition) |
1628 | * 3) no more data to send (null tcp_send_head ) | 1643 | * 3) no more data to send (tcp_write_queue_empty()) |
1629 | * 4) application is hitting buffer limit (SOCK_NOSPACE) | 1644 | * 4) application is hitting buffer limit (SOCK_NOSPACE) |
1630 | */ | 1645 | */ |
1631 | if (!tcp_send_head(sk) && sk->sk_socket && | 1646 | if (tcp_write_queue_empty(sk) && sk->sk_socket && |
1632 | test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && | 1647 | test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && |
1633 | (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) | 1648 | (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) |
1634 | tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); | 1649 | tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); |
@@ -1824,7 +1839,8 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp, | |||
1824 | * know that all the data is in scatter-gather pages, and that the | 1839 | * know that all the data is in scatter-gather pages, and that the |
1825 | * packet has never been sent out before (and thus is not cloned). | 1840 | * packet has never been sent out before (and thus is not cloned). |
1826 | */ | 1841 | */ |
1827 | static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | 1842 | static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, |
1843 | struct sk_buff *skb, unsigned int len, | ||
1828 | unsigned int mss_now, gfp_t gfp) | 1844 | unsigned int mss_now, gfp_t gfp) |
1829 | { | 1845 | { |
1830 | struct sk_buff *buff; | 1846 | struct sk_buff *buff; |
@@ -1833,7 +1849,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1833 | 1849 | ||
1834 | /* All of a TSO frame must be composed of paged data. */ | 1850 | /* All of a TSO frame must be composed of paged data. */ |
1835 | if (skb->len != skb->data_len) | 1851 | if (skb->len != skb->data_len) |
1836 | return tcp_fragment(sk, skb, len, mss_now, gfp); | 1852 | return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp); |
1837 | 1853 | ||
1838 | buff = sk_stream_alloc_skb(sk, 0, gfp, true); | 1854 | buff = sk_stream_alloc_skb(sk, 0, gfp, true); |
1839 | if (unlikely(!buff)) | 1855 | if (unlikely(!buff)) |
@@ -1869,7 +1885,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1869 | 1885 | ||
1870 | /* Link BUFF into the send queue. */ | 1886 | /* Link BUFF into the send queue. */ |
1871 | __skb_header_release(buff); | 1887 | __skb_header_release(buff); |
1872 | tcp_insert_write_queue_after(skb, buff, sk); | 1888 | tcp_insert_write_queue_after(skb, buff, sk, tcp_queue); |
1873 | 1889 | ||
1874 | return 0; | 1890 | return 0; |
1875 | } | 1891 | } |
@@ -1939,8 +1955,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, | |||
1939 | goto send_now; | 1955 | goto send_now; |
1940 | } | 1956 | } |
1941 | 1957 | ||
1942 | head = tcp_write_queue_head(sk); | 1958 | /* TODO : use tsorted_sent_queue ? */ |
1943 | 1959 | head = tcp_rtx_queue_head(sk); | |
1960 | if (!head) | ||
1961 | goto send_now; | ||
1944 | age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp); | 1962 | age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp); |
1945 | /* If next ACK is likely to come too late (half srtt), do not defer */ | 1963 | /* If next ACK is likely to come too late (half srtt), do not defer */ |
1946 | if (age < (tp->srtt_us >> 4)) | 1964 | if (age < (tp->srtt_us >> 4)) |
@@ -2158,13 +2176,12 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, | |||
2158 | limit <<= factor; | 2176 | limit <<= factor; |
2159 | 2177 | ||
2160 | if (refcount_read(&sk->sk_wmem_alloc) > limit) { | 2178 | if (refcount_read(&sk->sk_wmem_alloc) > limit) { |
2161 | /* Always send the 1st or 2nd skb in write queue. | 2179 | /* Always send skb if rtx queue is empty. |
2162 | * No need to wait for TX completion to call us back, | 2180 | * No need to wait for TX completion to call us back, |
2163 | * after softirq/tasklet schedule. | 2181 | * after softirq/tasklet schedule. |
2164 | * This helps when TX completions are delayed too much. | 2182 | * This helps when TX completions are delayed too much. |
2165 | */ | 2183 | */ |
2166 | if (skb == sk->sk_write_queue.next || | 2184 | if (tcp_rtx_queue_empty(sk)) |
2167 | skb->prev == sk->sk_write_queue.next) | ||
2168 | return false; | 2185 | return false; |
2169 | 2186 | ||
2170 | set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); | 2187 | set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); |
@@ -2215,7 +2232,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) | |||
2215 | * it's the "most interesting" or current chrono we are | 2232 | * it's the "most interesting" or current chrono we are |
2216 | * tracking and starts busy chrono if we have pending data. | 2233 | * tracking and starts busy chrono if we have pending data. |
2217 | */ | 2234 | */ |
2218 | if (tcp_write_queue_empty(sk)) | 2235 | if (tcp_rtx_and_write_queues_empty(sk)) |
2219 | tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); | 2236 | tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); |
2220 | else if (type == tp->chrono_type) | 2237 | else if (type == tp->chrono_type) |
2221 | tcp_chrono_set(tp, TCP_CHRONO_BUSY); | 2238 | tcp_chrono_set(tp, TCP_CHRONO_BUSY); |
@@ -2310,7 +2327,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2310 | nonagle); | 2327 | nonagle); |
2311 | 2328 | ||
2312 | if (skb->len > limit && | 2329 | if (skb->len > limit && |
2313 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) | 2330 | unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, |
2331 | skb, limit, mss_now, gfp))) | ||
2314 | break; | 2332 | break; |
2315 | 2333 | ||
2316 | if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) | 2334 | if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) |
@@ -2350,7 +2368,7 @@ repair: | |||
2350 | tcp_cwnd_validate(sk, is_cwnd_limited); | 2368 | tcp_cwnd_validate(sk, is_cwnd_limited); |
2351 | return false; | 2369 | return false; |
2352 | } | 2370 | } |
2353 | return !tp->packets_out && tcp_send_head(sk); | 2371 | return !tp->packets_out && !tcp_write_queue_empty(sk); |
2354 | } | 2372 | } |
2355 | 2373 | ||
2356 | bool tcp_schedule_loss_probe(struct sock *sk) | 2374 | bool tcp_schedule_loss_probe(struct sock *sk) |
@@ -2374,7 +2392,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
2374 | return false; | 2392 | return false; |
2375 | 2393 | ||
2376 | if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && | 2394 | if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && |
2377 | tcp_send_head(sk)) | 2395 | !tcp_write_queue_empty(sk)) |
2378 | return false; | 2396 | return false; |
2379 | 2397 | ||
2380 | /* Probe timeout is 2*rtt. Add minimum RTO to account | 2398 | /* Probe timeout is 2*rtt. Add minimum RTO to account |
@@ -2427,18 +2445,14 @@ void tcp_send_loss_probe(struct sock *sk) | |||
2427 | int mss = tcp_current_mss(sk); | 2445 | int mss = tcp_current_mss(sk); |
2428 | 2446 | ||
2429 | skb = tcp_send_head(sk); | 2447 | skb = tcp_send_head(sk); |
2430 | if (skb) { | 2448 | if (skb && tcp_snd_wnd_test(tp, skb, mss)) { |
2431 | if (tcp_snd_wnd_test(tp, skb, mss)) { | 2449 | pcount = tp->packets_out; |
2432 | pcount = tp->packets_out; | 2450 | tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); |
2433 | tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); | 2451 | if (tp->packets_out > pcount) |
2434 | if (tp->packets_out > pcount) | 2452 | goto probe_sent; |
2435 | goto probe_sent; | 2453 | goto rearm_timer; |
2436 | goto rearm_timer; | ||
2437 | } | ||
2438 | skb = tcp_write_queue_prev(sk, skb); | ||
2439 | } else { | ||
2440 | skb = tcp_write_queue_tail(sk); | ||
2441 | } | 2454 | } |
2455 | skb = skb_rb_last(&sk->tcp_rtx_queue); | ||
2442 | 2456 | ||
2443 | /* At most one outstanding TLP retransmission. */ | 2457 | /* At most one outstanding TLP retransmission. */ |
2444 | if (tp->tlp_high_seq) | 2458 | if (tp->tlp_high_seq) |
@@ -2456,10 +2470,11 @@ void tcp_send_loss_probe(struct sock *sk) | |||
2456 | goto rearm_timer; | 2470 | goto rearm_timer; |
2457 | 2471 | ||
2458 | if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { | 2472 | if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { |
2459 | if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, | 2473 | if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, |
2474 | (pcount - 1) * mss, mss, | ||
2460 | GFP_ATOMIC))) | 2475 | GFP_ATOMIC))) |
2461 | goto rearm_timer; | 2476 | goto rearm_timer; |
2462 | skb = tcp_write_queue_next(sk, skb); | 2477 | skb = skb_rb_next(skb); |
2463 | } | 2478 | } |
2464 | 2479 | ||
2465 | if (WARN_ON(!skb || !tcp_skb_pcount(skb))) | 2480 | if (WARN_ON(!skb || !tcp_skb_pcount(skb))) |
@@ -2659,7 +2674,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, | |||
2659 | static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | 2674 | static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) |
2660 | { | 2675 | { |
2661 | struct tcp_sock *tp = tcp_sk(sk); | 2676 | struct tcp_sock *tp = tcp_sk(sk); |
2662 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); | 2677 | struct sk_buff *next_skb = skb_rb_next(skb); |
2663 | int skb_size, next_skb_size; | 2678 | int skb_size, next_skb_size; |
2664 | 2679 | ||
2665 | skb_size = skb->len; | 2680 | skb_size = skb->len; |
@@ -2676,8 +2691,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
2676 | } | 2691 | } |
2677 | tcp_highest_sack_combine(sk, next_skb, skb); | 2692 | tcp_highest_sack_combine(sk, next_skb, skb); |
2678 | 2693 | ||
2679 | tcp_unlink_write_queue(next_skb, sk); | ||
2680 | |||
2681 | if (next_skb->ip_summed == CHECKSUM_PARTIAL) | 2694 | if (next_skb->ip_summed == CHECKSUM_PARTIAL) |
2682 | skb->ip_summed = CHECKSUM_PARTIAL; | 2695 | skb->ip_summed = CHECKSUM_PARTIAL; |
2683 | 2696 | ||
@@ -2705,7 +2718,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
2705 | 2718 | ||
2706 | tcp_skb_collapse_tstamp(skb, next_skb); | 2719 | tcp_skb_collapse_tstamp(skb, next_skb); |
2707 | 2720 | ||
2708 | sk_wmem_free_skb(sk, next_skb); | 2721 | tcp_rtx_queue_unlink_and_free(next_skb, sk); |
2709 | return true; | 2722 | return true; |
2710 | } | 2723 | } |
2711 | 2724 | ||
@@ -2716,8 +2729,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) | |||
2716 | return false; | 2729 | return false; |
2717 | if (skb_cloned(skb)) | 2730 | if (skb_cloned(skb)) |
2718 | return false; | 2731 | return false; |
2719 | if (skb == tcp_send_head(sk)) | ||
2720 | return false; | ||
2721 | /* Some heuristics for collapsing over SACK'd could be invented */ | 2732 | /* Some heuristics for collapsing over SACK'd could be invented */ |
2722 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) | 2733 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) |
2723 | return false; | 2734 | return false; |
@@ -2740,7 +2751,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | |||
2740 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) | 2751 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) |
2741 | return; | 2752 | return; |
2742 | 2753 | ||
2743 | tcp_for_write_queue_from_safe(skb, tmp, sk) { | 2754 | skb_rbtree_walk_from_safe(skb, tmp) { |
2744 | if (!tcp_can_collapse(sk, skb)) | 2755 | if (!tcp_can_collapse(sk, skb)) |
2745 | break; | 2756 | break; |
2746 | 2757 | ||
@@ -2815,7 +2826,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) | |||
2815 | 2826 | ||
2816 | len = cur_mss * segs; | 2827 | len = cur_mss * segs; |
2817 | if (skb->len > len) { | 2828 | if (skb->len > len) { |
2818 | if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC)) | 2829 | if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len, |
2830 | cur_mss, GFP_ATOMIC)) | ||
2819 | return -ENOMEM; /* We'll try again later. */ | 2831 | return -ENOMEM; /* We'll try again later. */ |
2820 | } else { | 2832 | } else { |
2821 | if (skb_unclone(skb, GFP_ATOMIC)) | 2833 | if (skb_unclone(skb, GFP_ATOMIC)) |
@@ -2906,29 +2918,24 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) | |||
2906 | void tcp_xmit_retransmit_queue(struct sock *sk) | 2918 | void tcp_xmit_retransmit_queue(struct sock *sk) |
2907 | { | 2919 | { |
2908 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2920 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2921 | struct sk_buff *skb, *rtx_head = NULL, *hole = NULL; | ||
2909 | struct tcp_sock *tp = tcp_sk(sk); | 2922 | struct tcp_sock *tp = tcp_sk(sk); |
2910 | struct sk_buff *skb; | ||
2911 | struct sk_buff *hole = NULL; | ||
2912 | u32 max_segs; | 2923 | u32 max_segs; |
2913 | int mib_idx; | 2924 | int mib_idx; |
2914 | 2925 | ||
2915 | if (!tp->packets_out) | 2926 | if (!tp->packets_out) |
2916 | return; | 2927 | return; |
2917 | 2928 | ||
2918 | if (tp->retransmit_skb_hint) { | 2929 | skb = tp->retransmit_skb_hint; |
2919 | skb = tp->retransmit_skb_hint; | 2930 | if (!skb) { |
2920 | } else { | 2931 | rtx_head = tcp_rtx_queue_head(sk); |
2921 | skb = tcp_write_queue_head(sk); | 2932 | skb = rtx_head; |
2922 | } | 2933 | } |
2923 | |||
2924 | max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); | 2934 | max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); |
2925 | tcp_for_write_queue_from(skb, sk) { | 2935 | skb_rbtree_walk_from(skb) { |
2926 | __u8 sacked; | 2936 | __u8 sacked; |
2927 | int segs; | 2937 | int segs; |
2928 | 2938 | ||
2929 | if (skb == tcp_send_head(sk)) | ||
2930 | break; | ||
2931 | |||
2932 | if (tcp_pacing_check(sk)) | 2939 | if (tcp_pacing_check(sk)) |
2933 | break; | 2940 | break; |
2934 | 2941 | ||
@@ -2973,7 +2980,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
2973 | if (tcp_in_cwnd_reduction(sk)) | 2980 | if (tcp_in_cwnd_reduction(sk)) |
2974 | tp->prr_out += tcp_skb_pcount(skb); | 2981 | tp->prr_out += tcp_skb_pcount(skb); |
2975 | 2982 | ||
2976 | if (skb == tcp_write_queue_head(sk) && | 2983 | if (skb == rtx_head && |
2977 | icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) | 2984 | icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) |
2978 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 2985 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
2979 | inet_csk(sk)->icsk_rto, | 2986 | inet_csk(sk)->icsk_rto, |
@@ -3015,12 +3022,15 @@ void tcp_send_fin(struct sock *sk) | |||
3015 | * Note: in the latter case, FIN packet will be sent after a timeout, | 3022 | * Note: in the latter case, FIN packet will be sent after a timeout, |
3016 | * as TCP stack thinks it has already been transmitted. | 3023 | * as TCP stack thinks it has already been transmitted. |
3017 | */ | 3024 | */ |
3018 | if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) { | 3025 | if (!tskb && tcp_under_memory_pressure(sk)) |
3026 | tskb = skb_rb_last(&sk->tcp_rtx_queue); | ||
3027 | |||
3028 | if (tskb) { | ||
3019 | coalesce: | 3029 | coalesce: |
3020 | TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; | 3030 | TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; |
3021 | TCP_SKB_CB(tskb)->end_seq++; | 3031 | TCP_SKB_CB(tskb)->end_seq++; |
3022 | tp->write_seq++; | 3032 | tp->write_seq++; |
3023 | if (!tcp_send_head(sk)) { | 3033 | if (tcp_write_queue_empty(sk)) { |
3024 | /* This means tskb was already sent. | 3034 | /* This means tskb was already sent. |
3025 | * Pretend we included the FIN on previous transmit. | 3035 | * Pretend we included the FIN on previous transmit. |
3026 | * We need to set tp->snd_nxt to the value it would have | 3036 | * We need to set tp->snd_nxt to the value it would have |
@@ -3086,9 +3096,9 @@ int tcp_send_synack(struct sock *sk) | |||
3086 | { | 3096 | { |
3087 | struct sk_buff *skb; | 3097 | struct sk_buff *skb; |
3088 | 3098 | ||
3089 | skb = tcp_write_queue_head(sk); | 3099 | skb = tcp_rtx_queue_head(sk); |
3090 | if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { | 3100 | if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { |
3091 | pr_debug("%s: wrong queue state\n", __func__); | 3101 | pr_err("%s: wrong queue state\n", __func__); |
3092 | return -EFAULT; | 3102 | return -EFAULT; |
3093 | } | 3103 | } |
3094 | if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { | 3104 | if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { |
@@ -3101,10 +3111,9 @@ int tcp_send_synack(struct sock *sk) | |||
3101 | if (!nskb) | 3111 | if (!nskb) |
3102 | return -ENOMEM; | 3112 | return -ENOMEM; |
3103 | INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); | 3113 | INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); |
3104 | tcp_unlink_write_queue(skb, sk); | 3114 | tcp_rtx_queue_unlink_and_free(skb, sk); |
3105 | __skb_header_release(nskb); | 3115 | __skb_header_release(nskb); |
3106 | __tcp_add_write_queue_head(sk, nskb); | 3116 | tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb); |
3107 | sk_wmem_free_skb(sk, skb); | ||
3108 | sk->sk_wmem_queued += nskb->truesize; | 3117 | sk->sk_wmem_queued += nskb->truesize; |
3109 | sk_mem_charge(sk, nskb->truesize); | 3118 | sk_mem_charge(sk, nskb->truesize); |
3110 | skb = nskb; | 3119 | skb = nskb; |
@@ -3327,7 +3336,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
3327 | 3336 | ||
3328 | tcb->end_seq += skb->len; | 3337 | tcb->end_seq += skb->len; |
3329 | __skb_header_release(skb); | 3338 | __skb_header_release(skb); |
3330 | __tcp_add_write_queue_tail(sk, skb); | ||
3331 | sk->sk_wmem_queued += skb->truesize; | 3339 | sk->sk_wmem_queued += skb->truesize; |
3332 | sk_mem_charge(sk, skb->truesize); | 3340 | sk_mem_charge(sk, skb->truesize); |
3333 | tp->write_seq = tcb->end_seq; | 3341 | tp->write_seq = tcb->end_seq; |
@@ -3405,12 +3413,13 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
3405 | TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; | 3413 | TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; |
3406 | if (!err) { | 3414 | if (!err) { |
3407 | tp->syn_data = (fo->copied > 0); | 3415 | tp->syn_data = (fo->copied > 0); |
3416 | tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data); | ||
3408 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); | 3417 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); |
3409 | goto done; | 3418 | goto done; |
3410 | } | 3419 | } |
3411 | 3420 | ||
3412 | /* data was not sent, this is our new send_head */ | 3421 | /* data was not sent, put it in write_queue */ |
3413 | sk->sk_send_head = syn_data; | 3422 | __skb_queue_tail(&sk->sk_write_queue, syn_data); |
3414 | tp->packets_out -= tcp_skb_pcount(syn_data); | 3423 | tp->packets_out -= tcp_skb_pcount(syn_data); |
3415 | 3424 | ||
3416 | fallback: | 3425 | fallback: |
@@ -3453,6 +3462,7 @@ int tcp_connect(struct sock *sk) | |||
3453 | tp->retrans_stamp = tcp_time_stamp(tp); | 3462 | tp->retrans_stamp = tcp_time_stamp(tp); |
3454 | tcp_connect_queue_skb(sk, buff); | 3463 | tcp_connect_queue_skb(sk, buff); |
3455 | tcp_ecn_send_syn(sk, buff); | 3464 | tcp_ecn_send_syn(sk, buff); |
3465 | tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); | ||
3456 | 3466 | ||
3457 | /* Send off SYN; include data in Fast Open. */ | 3467 | /* Send off SYN; include data in Fast Open. */ |
3458 | err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : | 3468 | err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : |
@@ -3647,7 +3657,8 @@ int tcp_write_wakeup(struct sock *sk, int mib) | |||
3647 | skb->len > mss) { | 3657 | skb->len > mss) { |
3648 | seg_size = min(seg_size, mss); | 3658 | seg_size = min(seg_size, mss); |
3649 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; | 3659 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; |
3650 | if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) | 3660 | if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, |
3661 | skb, seg_size, mss, GFP_ATOMIC)) | ||
3651 | return -1; | 3662 | return -1; |
3652 | } else if (!tcp_skb_pcount(skb)) | 3663 | } else if (!tcp_skb_pcount(skb)) |
3653 | tcp_set_skb_tso_segs(skb, mss); | 3664 | tcp_set_skb_tso_segs(skb, mss); |
@@ -3677,7 +3688,7 @@ void tcp_send_probe0(struct sock *sk) | |||
3677 | 3688 | ||
3678 | err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); | 3689 | err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); |
3679 | 3690 | ||
3680 | if (tp->packets_out || !tcp_send_head(sk)) { | 3691 | if (tp->packets_out || tcp_write_queue_empty(sk)) { |
3681 | /* Cancel probe timer, if it is not required. */ | 3692 | /* Cancel probe timer, if it is not required. */ |
3682 | icsk->icsk_probes_out = 0; | 3693 | icsk->icsk_probes_out = 0; |
3683 | icsk->icsk_backoff = 0; | 3694 | icsk->icsk_backoff = 0; |