aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c204
1 files changed, 121 insertions, 83 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ca2139a130b..25a89eaa669d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -688,6 +688,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
688 } 688 }
689} 689}
690 690
691/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
692 * Note: TCP stack does not yet implement pacing.
693 * FQ packet scheduler can be used to implement cheap but effective
694 * TCP pacing, to smooth the burst on large writes when packets
695 * in flight is significantly lower than cwnd (or rwin)
696 */
697static void tcp_update_pacing_rate(struct sock *sk)
698{
699 const struct tcp_sock *tp = tcp_sk(sk);
700 u64 rate;
701
702 /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
703 rate = (u64)tp->mss_cache * 2 * (HZ << 3);
704
705 rate *= max(tp->snd_cwnd, tp->packets_out);
706
707 /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
708 * be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
709 * We probably need usec resolution in the future.
710 * Note: This also takes care of possible srtt=0 case,
711 * when tcp_rtt_estimator() was not yet called.
712 */
713 if (tp->srtt > 8 + 2)
714 do_div(rate, tp->srtt);
715
716 sk->sk_pacing_rate = min_t(u64, rate, ~0U);
717}
718
691/* Calculate rto without backoff. This is the second half of Van Jacobson's 719/* Calculate rto without backoff. This is the second half of Van Jacobson's
692 * routine referred to above. 720 * routine referred to above.
693 */ 721 */
@@ -1048,6 +1076,7 @@ struct tcp_sacktag_state {
1048 int reord; 1076 int reord;
1049 int fack_count; 1077 int fack_count;
1050 int flag; 1078 int flag;
1079 s32 rtt; /* RTT measured by SACKing never-retransmitted data */
1051}; 1080};
1052 1081
1053/* Check if skb is fully within the SACK block. In presence of GSO skbs, 1082/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1108,7 +1137,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1108static u8 tcp_sacktag_one(struct sock *sk, 1137static u8 tcp_sacktag_one(struct sock *sk,
1109 struct tcp_sacktag_state *state, u8 sacked, 1138 struct tcp_sacktag_state *state, u8 sacked,
1110 u32 start_seq, u32 end_seq, 1139 u32 start_seq, u32 end_seq,
1111 bool dup_sack, int pcount) 1140 int dup_sack, int pcount, u32 xmit_time)
1112{ 1141{
1113 struct tcp_sock *tp = tcp_sk(sk); 1142 struct tcp_sock *tp = tcp_sk(sk);
1114 int fack_count = state->fack_count; 1143 int fack_count = state->fack_count;
@@ -1148,6 +1177,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
1148 state->reord); 1177 state->reord);
1149 if (!after(end_seq, tp->high_seq)) 1178 if (!after(end_seq, tp->high_seq))
1150 state->flag |= FLAG_ORIG_SACK_ACKED; 1179 state->flag |= FLAG_ORIG_SACK_ACKED;
1180 /* Pick the earliest sequence sacked for RTT */
1181 if (state->rtt < 0)
1182 state->rtt = tcp_time_stamp - xmit_time;
1151 } 1183 }
1152 1184
1153 if (sacked & TCPCB_LOST) { 1185 if (sacked & TCPCB_LOST) {
@@ -1205,7 +1237,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1205 * tcp_highest_sack_seq() when skb is highest_sack. 1237 * tcp_highest_sack_seq() when skb is highest_sack.
1206 */ 1238 */
1207 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, 1239 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1208 start_seq, end_seq, dup_sack, pcount); 1240 start_seq, end_seq, dup_sack, pcount,
1241 TCP_SKB_CB(skb)->when);
1209 1242
1210 if (skb == tp->lost_skb_hint) 1243 if (skb == tp->lost_skb_hint)
1211 tp->lost_cnt_hint += pcount; 1244 tp->lost_cnt_hint += pcount;
@@ -1479,7 +1512,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1479 TCP_SKB_CB(skb)->seq, 1512 TCP_SKB_CB(skb)->seq,
1480 TCP_SKB_CB(skb)->end_seq, 1513 TCP_SKB_CB(skb)->end_seq,
1481 dup_sack, 1514 dup_sack,
1482 tcp_skb_pcount(skb)); 1515 tcp_skb_pcount(skb),
1516 TCP_SKB_CB(skb)->when);
1483 1517
1484 if (!before(TCP_SKB_CB(skb)->seq, 1518 if (!before(TCP_SKB_CB(skb)->seq,
1485 tcp_highest_sack_seq(tp))) 1519 tcp_highest_sack_seq(tp)))
@@ -1536,7 +1570,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
1536 1570
1537static int 1571static int
1538tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1572tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1539 u32 prior_snd_una) 1573 u32 prior_snd_una, s32 *sack_rtt)
1540{ 1574{
1541 struct tcp_sock *tp = tcp_sk(sk); 1575 struct tcp_sock *tp = tcp_sk(sk);
1542 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1576 const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1554,6 +1588,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1554 1588
1555 state.flag = 0; 1589 state.flag = 0;
1556 state.reord = tp->packets_out; 1590 state.reord = tp->packets_out;
1591 state.rtt = -1;
1557 1592
1558 if (!tp->sacked_out) { 1593 if (!tp->sacked_out) {
1559 if (WARN_ON(tp->fackets_out)) 1594 if (WARN_ON(tp->fackets_out))
@@ -1737,6 +1772,7 @@ out:
1737 WARN_ON((int)tp->retrans_out < 0); 1772 WARN_ON((int)tp->retrans_out < 0);
1738 WARN_ON((int)tcp_packets_in_flight(tp) < 0); 1773 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1739#endif 1774#endif
1775 *sack_rtt = state.rtt;
1740 return state.flag; 1776 return state.flag;
1741} 1777}
1742 1778
@@ -1869,8 +1905,13 @@ void tcp_enter_loss(struct sock *sk, int how)
1869 } 1905 }
1870 tcp_verify_left_out(tp); 1906 tcp_verify_left_out(tp);
1871 1907
1872 tp->reordering = min_t(unsigned int, tp->reordering, 1908 /* Timeout in disordered state after receiving substantial DUPACKs
1873 sysctl_tcp_reordering); 1909 * suggests that the degree of reordering is over-estimated.
1910 */
1911 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
1912 tp->sacked_out >= sysctl_tcp_reordering)
1913 tp->reordering = min_t(unsigned int, tp->reordering,
1914 sysctl_tcp_reordering);
1874 tcp_set_ca_state(sk, TCP_CA_Loss); 1915 tcp_set_ca_state(sk, TCP_CA_Loss);
1875 tp->high_seq = tp->snd_nxt; 1916 tp->high_seq = tp->snd_nxt;
1876 TCP_ECN_queue_cwr(tp); 1917 TCP_ECN_queue_cwr(tp);
@@ -2472,8 +2513,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2472 2513
2473 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2514 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2474 tcp_try_keep_open(sk); 2515 tcp_try_keep_open(sk);
2475 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2476 tcp_moderate_cwnd(tp);
2477 } else { 2516 } else {
2478 tcp_cwnd_reduction(sk, prior_unsacked, 0); 2517 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2479 } 2518 }
@@ -2792,65 +2831,51 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2792 tcp_xmit_retransmit_queue(sk); 2831 tcp_xmit_retransmit_queue(sk);
2793} 2832}
2794 2833
2795void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) 2834static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2835 s32 seq_rtt, s32 sack_rtt)
2796{ 2836{
2797 tcp_rtt_estimator(sk, seq_rtt); 2837 const struct tcp_sock *tp = tcp_sk(sk);
2798 tcp_set_rto(sk); 2838
2799 inet_csk(sk)->icsk_backoff = 0; 2839 /* Prefer RTT measured from ACK's timing to TS-ECR. This is because
2800} 2840 * broken middle-boxes or peers may corrupt TS-ECR fields. But
2801EXPORT_SYMBOL(tcp_valid_rtt_meas); 2841 * Karn's algorithm forbids taking RTT if some retransmitted data
2842 * is acked (RFC6298).
2843 */
2844 if (flag & FLAG_RETRANS_DATA_ACKED)
2845 seq_rtt = -1;
2846
2847 if (seq_rtt < 0)
2848 seq_rtt = sack_rtt;
2802 2849
2803/* Read draft-ietf-tcplw-high-performance before mucking
2804 * with this code. (Supersedes RFC1323)
2805 */
2806static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
2807{
2808 /* RTTM Rule: A TSecr value received in a segment is used to 2850 /* RTTM Rule: A TSecr value received in a segment is used to
2809 * update the averaged RTT measurement only if the segment 2851 * update the averaged RTT measurement only if the segment
2810 * acknowledges some new data, i.e., only if it advances the 2852 * acknowledges some new data, i.e., only if it advances the
2811 * left edge of the send window. 2853 * left edge of the send window.
2812 *
2813 * See draft-ietf-tcplw-high-performance-00, section 3.3. 2854 * See draft-ietf-tcplw-high-performance-00, section 3.3.
2814 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
2815 *
2816 * Changed: reset backoff as soon as we see the first valid sample.
2817 * If we do not, we get strongly overestimated rto. With timestamps
2818 * samples are accepted even from very old segments: f.e., when rtt=1
2819 * increases to 8, we retransmit 5 times and after 8 seconds delayed
2820 * answer arrives rto becomes 120 seconds! If at least one of segments
2821 * in window is lost... Voila. --ANK (010210)
2822 */ 2855 */
2823 struct tcp_sock *tp = tcp_sk(sk); 2856 if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
2824 2857 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
2825 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
2826}
2827 2858
2828static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) 2859 if (seq_rtt < 0)
2829{ 2860 return false;
2830 /* We don't have a timestamp. Can only use
2831 * packets that are not retransmitted to determine
2832 * rtt estimates. Also, we must not reset the
2833 * backoff for rto until we get a non-retransmitted
2834 * packet. This allows us to deal with a situation
2835 * where the network delay has increased suddenly.
2836 * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2837 */
2838 2861
2839 if (flag & FLAG_RETRANS_DATA_ACKED) 2862 tcp_rtt_estimator(sk, seq_rtt);
2840 return; 2863 tcp_set_rto(sk);
2841 2864
2842 tcp_valid_rtt_meas(sk, seq_rtt); 2865 /* RFC6298: only reset backoff on valid RTT measurement. */
2866 inet_csk(sk)->icsk_backoff = 0;
2867 return true;
2843} 2868}
2844 2869
2845static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, 2870/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
2846 const s32 seq_rtt) 2871static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
2847{ 2872{
2848 const struct tcp_sock *tp = tcp_sk(sk); 2873 struct tcp_sock *tp = tcp_sk(sk);
2849 /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ 2874 s32 seq_rtt = -1;
2850 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) 2875
2851 tcp_ack_saw_tstamp(sk, flag); 2876 if (tp->lsndtime && !tp->total_retrans)
2852 else if (seq_rtt >= 0) 2877 seq_rtt = tcp_time_stamp - tp->lsndtime;
2853 tcp_ack_no_tstamp(sk, seq_rtt, flag); 2878 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
2854} 2879}
2855 2880
2856static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) 2881static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
@@ -2939,7 +2964,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
2939 * arrived at the other end. 2964 * arrived at the other end.
2940 */ 2965 */
2941static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 2966static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
2942 u32 prior_snd_una) 2967 u32 prior_snd_una, s32 sack_rtt)
2943{ 2968{
2944 struct tcp_sock *tp = tcp_sk(sk); 2969 struct tcp_sock *tp = tcp_sk(sk);
2945 const struct inet_connection_sock *icsk = inet_csk(sk); 2970 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2978,8 +3003,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
2978 if (sacked & TCPCB_SACKED_RETRANS) 3003 if (sacked & TCPCB_SACKED_RETRANS)
2979 tp->retrans_out -= acked_pcount; 3004 tp->retrans_out -= acked_pcount;
2980 flag |= FLAG_RETRANS_DATA_ACKED; 3005 flag |= FLAG_RETRANS_DATA_ACKED;
2981 ca_seq_rtt = -1;
2982 seq_rtt = -1;
2983 } else { 3006 } else {
2984 ca_seq_rtt = now - scb->when; 3007 ca_seq_rtt = now - scb->when;
2985 last_ackt = skb->tstamp; 3008 last_ackt = skb->tstamp;
@@ -3031,6 +3054,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3031 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 3054 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3032 flag |= FLAG_SACK_RENEGING; 3055 flag |= FLAG_SACK_RENEGING;
3033 3056
3057 if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) ||
3058 (flag & FLAG_ACKED))
3059 tcp_rearm_rto(sk);
3060
3034 if (flag & FLAG_ACKED) { 3061 if (flag & FLAG_ACKED) {
3035 const struct tcp_congestion_ops *ca_ops 3062 const struct tcp_congestion_ops *ca_ops
3036 = inet_csk(sk)->icsk_ca_ops; 3063 = inet_csk(sk)->icsk_ca_ops;
@@ -3040,9 +3067,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3040 tcp_mtup_probe_success(sk); 3067 tcp_mtup_probe_success(sk);
3041 } 3068 }
3042 3069
3043 tcp_ack_update_rtt(sk, flag, seq_rtt);
3044 tcp_rearm_rto(sk);
3045
3046 if (tcp_is_reno(tp)) { 3070 if (tcp_is_reno(tp)) {
3047 tcp_remove_reno_sacks(sk, pkts_acked); 3071 tcp_remove_reno_sacks(sk, pkts_acked);
3048 } else { 3072 } else {
@@ -3130,11 +3154,22 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3130 inet_csk(sk)->icsk_ca_state != TCP_CA_Open; 3154 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3131} 3155}
3132 3156
3157/* Decide wheather to run the increase function of congestion control. */
3133static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3158static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3134{ 3159{
3135 const struct tcp_sock *tp = tcp_sk(sk); 3160 if (tcp_in_cwnd_reduction(sk))
3136 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && 3161 return false;
3137 !tcp_in_cwnd_reduction(sk); 3162
3163 /* If reordering is high then always grow cwnd whenever data is
3164 * delivered regardless of its ordering. Otherwise stay conservative
3165 * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
3166 * new SACK or ECE mark may first advance cwnd here and later reduce
3167 * cwnd in tcp_fastretrans_alert() based on more states.
3168 */
3169 if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
3170 return flag & FLAG_FORWARD_PROGRESS;
3171
3172 return flag & FLAG_DATA_ACKED;
3138} 3173}
3139 3174
3140/* Check that window update is acceptable. 3175/* Check that window update is acceptable.
@@ -3269,11 +3304,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3269 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3304 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3270 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3305 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3271 bool is_dupack = false; 3306 bool is_dupack = false;
3272 u32 prior_in_flight; 3307 u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
3273 u32 prior_fackets; 3308 u32 prior_fackets;
3274 int prior_packets = tp->packets_out; 3309 int prior_packets = tp->packets_out;
3275 const int prior_unsacked = tp->packets_out - tp->sacked_out; 3310 const int prior_unsacked = tp->packets_out - tp->sacked_out;
3276 int acked = 0; /* Number of packets newly acked */ 3311 int acked = 0; /* Number of packets newly acked */
3312 s32 sack_rtt = -1;
3277 3313
3278 /* If the ack is older than previous acks 3314 /* If the ack is older than previous acks
3279 * then we can probably ignore it. 3315 * then we can probably ignore it.
@@ -3330,7 +3366,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3330 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); 3366 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3331 3367
3332 if (TCP_SKB_CB(skb)->sacked) 3368 if (TCP_SKB_CB(skb)->sacked)
3333 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3369 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3370 &sack_rtt);
3334 3371
3335 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) 3372 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3336 flag |= FLAG_ECE; 3373 flag |= FLAG_ECE;
@@ -3349,21 +3386,18 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3349 3386
3350 /* See if we can take anything off of the retransmit queue. */ 3387 /* See if we can take anything off of the retransmit queue. */
3351 acked = tp->packets_out; 3388 acked = tp->packets_out;
3352 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); 3389 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
3353 acked -= tp->packets_out; 3390 acked -= tp->packets_out;
3354 3391
3392 /* Advance cwnd if state allows */
3393 if (tcp_may_raise_cwnd(sk, flag))
3394 tcp_cong_avoid(sk, ack, prior_in_flight);
3395
3355 if (tcp_ack_is_dubious(sk, flag)) { 3396 if (tcp_ack_is_dubious(sk, flag)) {
3356 /* Advance CWND, if state allows this. */
3357 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3358 tcp_cong_avoid(sk, ack, prior_in_flight);
3359 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3397 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3360 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3398 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3361 is_dupack, flag); 3399 is_dupack, flag);
3362 } else {
3363 if (flag & FLAG_DATA_ACKED)
3364 tcp_cong_avoid(sk, ack, prior_in_flight);
3365 } 3400 }
3366
3367 if (tp->tlp_high_seq) 3401 if (tp->tlp_high_seq)
3368 tcp_process_tlp_ack(sk, ack, flag); 3402 tcp_process_tlp_ack(sk, ack, flag);
3369 3403
@@ -3375,6 +3409,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3375 3409
3376 if (icsk->icsk_pending == ICSK_TIME_RETRANS) 3410 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3377 tcp_schedule_loss_probe(sk); 3411 tcp_schedule_loss_probe(sk);
3412 if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
3413 tcp_update_pacing_rate(sk);
3378 return 1; 3414 return 1;
3379 3415
3380no_queue: 3416no_queue:
@@ -3402,7 +3438,8 @@ old_ack:
3402 * If data was DSACKed, see if we can undo a cwnd reduction. 3438 * If data was DSACKed, see if we can undo a cwnd reduction.
3403 */ 3439 */
3404 if (TCP_SKB_CB(skb)->sacked) { 3440 if (TCP_SKB_CB(skb)->sacked) {
3405 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3441 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3442 &sack_rtt);
3406 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3443 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3407 is_dupack, flag); 3444 is_dupack, flag);
3408 } 3445 }
@@ -4102,6 +4139,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4102 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { 4139 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4103 __skb_queue_after(&tp->out_of_order_queue, skb1, skb); 4140 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4104 } else { 4141 } else {
4142 tcp_grow_window(sk, skb);
4105 kfree_skb_partial(skb, fragstolen); 4143 kfree_skb_partial(skb, fragstolen);
4106 skb = NULL; 4144 skb = NULL;
4107 } 4145 }
@@ -4177,8 +4215,10 @@ add_sack:
4177 if (tcp_is_sack(tp)) 4215 if (tcp_is_sack(tp))
4178 tcp_sack_new_ofo_skb(sk, seq, end_seq); 4216 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4179end: 4217end:
4180 if (skb) 4218 if (skb) {
4219 tcp_grow_window(sk, skb);
4181 skb_set_owner_r(skb, sk); 4220 skb_set_owner_r(skb, sk);
4221 }
4182} 4222}
4183 4223
4184static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, 4224static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
@@ -5013,8 +5053,8 @@ discard:
5013 * the rest is checked inline. Fast processing is turned on in 5053 * the rest is checked inline. Fast processing is turned on in
5014 * tcp_data_queue when everything is OK. 5054 * tcp_data_queue when everything is OK.
5015 */ 5055 */
5016int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, 5056void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5017 const struct tcphdr *th, unsigned int len) 5057 const struct tcphdr *th, unsigned int len)
5018{ 5058{
5019 struct tcp_sock *tp = tcp_sk(sk); 5059 struct tcp_sock *tp = tcp_sk(sk);
5020 5060
@@ -5091,7 +5131,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5091 tcp_ack(sk, skb, 0); 5131 tcp_ack(sk, skb, 0);
5092 __kfree_skb(skb); 5132 __kfree_skb(skb);
5093 tcp_data_snd_check(sk); 5133 tcp_data_snd_check(sk);
5094 return 0; 5134 return;
5095 } else { /* Header too small */ 5135 } else { /* Header too small */
5096 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 5136 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5097 goto discard; 5137 goto discard;
@@ -5184,7 +5224,7 @@ no_ack:
5184 if (eaten) 5224 if (eaten)
5185 kfree_skb_partial(skb, fragstolen); 5225 kfree_skb_partial(skb, fragstolen);
5186 sk->sk_data_ready(sk, 0); 5226 sk->sk_data_ready(sk, 0);
5187 return 0; 5227 return;
5188 } 5228 }
5189 } 5229 }
5190 5230
@@ -5200,7 +5240,7 @@ slow_path:
5200 */ 5240 */
5201 5241
5202 if (!tcp_validate_incoming(sk, skb, th, 1)) 5242 if (!tcp_validate_incoming(sk, skb, th, 1))
5203 return 0; 5243 return;
5204 5244
5205step5: 5245step5:
5206 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) 5246 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
@@ -5216,7 +5256,7 @@ step5:
5216 5256
5217 tcp_data_snd_check(sk); 5257 tcp_data_snd_check(sk);
5218 tcp_ack_snd_check(sk); 5258 tcp_ack_snd_check(sk);
5219 return 0; 5259 return;
5220 5260
5221csum_error: 5261csum_error:
5222 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); 5262 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
@@ -5224,7 +5264,6 @@ csum_error:
5224 5264
5225discard: 5265discard:
5226 __kfree_skb(skb); 5266 __kfree_skb(skb);
5227 return 0;
5228} 5267}
5229EXPORT_SYMBOL(tcp_rcv_established); 5268EXPORT_SYMBOL(tcp_rcv_established);
5230 5269
@@ -5627,9 +5666,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5627 * so release it. 5666 * so release it.
5628 */ 5667 */
5629 if (req) { 5668 if (req) {
5630 tcp_synack_rtt_meas(sk, req);
5631 tp->total_retrans = req->num_retrans; 5669 tp->total_retrans = req->num_retrans;
5632
5633 reqsk_fastopen_remove(sk, req, false); 5670 reqsk_fastopen_remove(sk, req, false);
5634 } else { 5671 } else {
5635 /* Make sure socket is routed, for correct metrics. */ 5672 /* Make sure socket is routed, for correct metrics. */
@@ -5654,6 +5691,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5654 tp->snd_una = TCP_SKB_CB(skb)->ack_seq; 5691 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5655 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; 5692 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
5656 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); 5693 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5694 tcp_synack_rtt_meas(sk, req);
5657 5695
5658 if (tp->rx_opt.tstamp_ok) 5696 if (tp->rx_opt.tstamp_ok)
5659 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5697 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;