diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 204 |
1 files changed, 121 insertions, 83 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ca2139a130b..25a89eaa669d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -688,6 +688,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
688 | } | 688 | } |
689 | } | 689 | } |
690 | 690 | ||
691 | /* Set the sk_pacing_rate to allow proper sizing of TSO packets. | ||
692 | * Note: TCP stack does not yet implement pacing. | ||
693 | * FQ packet scheduler can be used to implement cheap but effective | ||
694 | * TCP pacing, to smooth the burst on large writes when packets | ||
695 | * in flight is significantly lower than cwnd (or rwin) | ||
696 | */ | ||
697 | static void tcp_update_pacing_rate(struct sock *sk) | ||
698 | { | ||
699 | const struct tcp_sock *tp = tcp_sk(sk); | ||
700 | u64 rate; | ||
701 | |||
702 | /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ | ||
703 | rate = (u64)tp->mss_cache * 2 * (HZ << 3); | ||
704 | |||
705 | rate *= max(tp->snd_cwnd, tp->packets_out); | ||
706 | |||
707 | /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), | ||
708 | * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) | ||
709 | * We probably need usec resolution in the future. | ||
710 | * Note: This also takes care of possible srtt=0 case, | ||
711 | * when tcp_rtt_estimator() was not yet called. | ||
712 | */ | ||
713 | if (tp->srtt > 8 + 2) | ||
714 | do_div(rate, tp->srtt); | ||
715 | |||
716 | sk->sk_pacing_rate = min_t(u64, rate, ~0U); | ||
717 | } | ||
718 | |||
691 | /* Calculate rto without backoff. This is the second half of Van Jacobson's | 719 | /* Calculate rto without backoff. This is the second half of Van Jacobson's |
692 | * routine referred to above. | 720 | * routine referred to above. |
693 | */ | 721 | */ |
@@ -1048,6 +1076,7 @@ struct tcp_sacktag_state { | |||
1048 | int reord; | 1076 | int reord; |
1049 | int fack_count; | 1077 | int fack_count; |
1050 | int flag; | 1078 | int flag; |
1079 | s32 rtt; /* RTT measured by SACKing never-retransmitted data */ | ||
1051 | }; | 1080 | }; |
1052 | 1081 | ||
1053 | /* Check if skb is fully within the SACK block. In presence of GSO skbs, | 1082 | /* Check if skb is fully within the SACK block. In presence of GSO skbs, |
@@ -1108,7 +1137,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | |||
1108 | static u8 tcp_sacktag_one(struct sock *sk, | 1137 | static u8 tcp_sacktag_one(struct sock *sk, |
1109 | struct tcp_sacktag_state *state, u8 sacked, | 1138 | struct tcp_sacktag_state *state, u8 sacked, |
1110 | u32 start_seq, u32 end_seq, | 1139 | u32 start_seq, u32 end_seq, |
1111 | bool dup_sack, int pcount) | 1140 | int dup_sack, int pcount, u32 xmit_time) |
1112 | { | 1141 | { |
1113 | struct tcp_sock *tp = tcp_sk(sk); | 1142 | struct tcp_sock *tp = tcp_sk(sk); |
1114 | int fack_count = state->fack_count; | 1143 | int fack_count = state->fack_count; |
@@ -1148,6 +1177,9 @@ static u8 tcp_sacktag_one(struct sock *sk, | |||
1148 | state->reord); | 1177 | state->reord); |
1149 | if (!after(end_seq, tp->high_seq)) | 1178 | if (!after(end_seq, tp->high_seq)) |
1150 | state->flag |= FLAG_ORIG_SACK_ACKED; | 1179 | state->flag |= FLAG_ORIG_SACK_ACKED; |
1180 | /* Pick the earliest sequence sacked for RTT */ | ||
1181 | if (state->rtt < 0) | ||
1182 | state->rtt = tcp_time_stamp - xmit_time; | ||
1151 | } | 1183 | } |
1152 | 1184 | ||
1153 | if (sacked & TCPCB_LOST) { | 1185 | if (sacked & TCPCB_LOST) { |
@@ -1205,7 +1237,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1205 | * tcp_highest_sack_seq() when skb is highest_sack. | 1237 | * tcp_highest_sack_seq() when skb is highest_sack. |
1206 | */ | 1238 | */ |
1207 | tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, | 1239 | tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, |
1208 | start_seq, end_seq, dup_sack, pcount); | 1240 | start_seq, end_seq, dup_sack, pcount, |
1241 | TCP_SKB_CB(skb)->when); | ||
1209 | 1242 | ||
1210 | if (skb == tp->lost_skb_hint) | 1243 | if (skb == tp->lost_skb_hint) |
1211 | tp->lost_cnt_hint += pcount; | 1244 | tp->lost_cnt_hint += pcount; |
@@ -1479,7 +1512,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1479 | TCP_SKB_CB(skb)->seq, | 1512 | TCP_SKB_CB(skb)->seq, |
1480 | TCP_SKB_CB(skb)->end_seq, | 1513 | TCP_SKB_CB(skb)->end_seq, |
1481 | dup_sack, | 1514 | dup_sack, |
1482 | tcp_skb_pcount(skb)); | 1515 | tcp_skb_pcount(skb), |
1516 | TCP_SKB_CB(skb)->when); | ||
1483 | 1517 | ||
1484 | if (!before(TCP_SKB_CB(skb)->seq, | 1518 | if (!before(TCP_SKB_CB(skb)->seq, |
1485 | tcp_highest_sack_seq(tp))) | 1519 | tcp_highest_sack_seq(tp))) |
@@ -1536,7 +1570,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl | |||
1536 | 1570 | ||
1537 | static int | 1571 | static int |
1538 | tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | 1572 | tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, |
1539 | u32 prior_snd_una) | 1573 | u32 prior_snd_una, s32 *sack_rtt) |
1540 | { | 1574 | { |
1541 | struct tcp_sock *tp = tcp_sk(sk); | 1575 | struct tcp_sock *tp = tcp_sk(sk); |
1542 | const unsigned char *ptr = (skb_transport_header(ack_skb) + | 1576 | const unsigned char *ptr = (skb_transport_header(ack_skb) + |
@@ -1554,6 +1588,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1554 | 1588 | ||
1555 | state.flag = 0; | 1589 | state.flag = 0; |
1556 | state.reord = tp->packets_out; | 1590 | state.reord = tp->packets_out; |
1591 | state.rtt = -1; | ||
1557 | 1592 | ||
1558 | if (!tp->sacked_out) { | 1593 | if (!tp->sacked_out) { |
1559 | if (WARN_ON(tp->fackets_out)) | 1594 | if (WARN_ON(tp->fackets_out)) |
@@ -1737,6 +1772,7 @@ out: | |||
1737 | WARN_ON((int)tp->retrans_out < 0); | 1772 | WARN_ON((int)tp->retrans_out < 0); |
1738 | WARN_ON((int)tcp_packets_in_flight(tp) < 0); | 1773 | WARN_ON((int)tcp_packets_in_flight(tp) < 0); |
1739 | #endif | 1774 | #endif |
1775 | *sack_rtt = state.rtt; | ||
1740 | return state.flag; | 1776 | return state.flag; |
1741 | } | 1777 | } |
1742 | 1778 | ||
@@ -1869,8 +1905,13 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1869 | } | 1905 | } |
1870 | tcp_verify_left_out(tp); | 1906 | tcp_verify_left_out(tp); |
1871 | 1907 | ||
1872 | tp->reordering = min_t(unsigned int, tp->reordering, | 1908 | /* Timeout in disordered state after receiving substantial DUPACKs |
1873 | sysctl_tcp_reordering); | 1909 | * suggests that the degree of reordering is over-estimated. |
1910 | */ | ||
1911 | if (icsk->icsk_ca_state <= TCP_CA_Disorder && | ||
1912 | tp->sacked_out >= sysctl_tcp_reordering) | ||
1913 | tp->reordering = min_t(unsigned int, tp->reordering, | ||
1914 | sysctl_tcp_reordering); | ||
1874 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1915 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1875 | tp->high_seq = tp->snd_nxt; | 1916 | tp->high_seq = tp->snd_nxt; |
1876 | TCP_ECN_queue_cwr(tp); | 1917 | TCP_ECN_queue_cwr(tp); |
@@ -2472,8 +2513,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) | |||
2472 | 2513 | ||
2473 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { | 2514 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { |
2474 | tcp_try_keep_open(sk); | 2515 | tcp_try_keep_open(sk); |
2475 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | ||
2476 | tcp_moderate_cwnd(tp); | ||
2477 | } else { | 2516 | } else { |
2478 | tcp_cwnd_reduction(sk, prior_unsacked, 0); | 2517 | tcp_cwnd_reduction(sk, prior_unsacked, 0); |
2479 | } | 2518 | } |
@@ -2792,65 +2831,51 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, | |||
2792 | tcp_xmit_retransmit_queue(sk); | 2831 | tcp_xmit_retransmit_queue(sk); |
2793 | } | 2832 | } |
2794 | 2833 | ||
2795 | void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) | 2834 | static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, |
2835 | s32 seq_rtt, s32 sack_rtt) | ||
2796 | { | 2836 | { |
2797 | tcp_rtt_estimator(sk, seq_rtt); | 2837 | const struct tcp_sock *tp = tcp_sk(sk); |
2798 | tcp_set_rto(sk); | 2838 | |
2799 | inet_csk(sk)->icsk_backoff = 0; | 2839 | /* Prefer RTT measured from ACK's timing to TS-ECR. This is because |
2800 | } | 2840 | * broken middle-boxes or peers may corrupt TS-ECR fields. But |
2801 | EXPORT_SYMBOL(tcp_valid_rtt_meas); | 2841 | * Karn's algorithm forbids taking RTT if some retransmitted data |
2842 | * is acked (RFC6298). | ||
2843 | */ | ||
2844 | if (flag & FLAG_RETRANS_DATA_ACKED) | ||
2845 | seq_rtt = -1; | ||
2846 | |||
2847 | if (seq_rtt < 0) | ||
2848 | seq_rtt = sack_rtt; | ||
2802 | 2849 | ||
2803 | /* Read draft-ietf-tcplw-high-performance before mucking | ||
2804 | * with this code. (Supersedes RFC1323) | ||
2805 | */ | ||
2806 | static void tcp_ack_saw_tstamp(struct sock *sk, int flag) | ||
2807 | { | ||
2808 | /* RTTM Rule: A TSecr value received in a segment is used to | 2850 | /* RTTM Rule: A TSecr value received in a segment is used to |
2809 | * update the averaged RTT measurement only if the segment | 2851 | * update the averaged RTT measurement only if the segment |
2810 | * acknowledges some new data, i.e., only if it advances the | 2852 | * acknowledges some new data, i.e., only if it advances the |
2811 | * left edge of the send window. | 2853 | * left edge of the send window. |
2812 | * | ||
2813 | * See draft-ietf-tcplw-high-performance-00, section 3.3. | 2854 | * See draft-ietf-tcplw-high-performance-00, section 3.3. |
2814 | * 1998/04/10 Andrey V. Savochkin <saw@msu.ru> | ||
2815 | * | ||
2816 | * Changed: reset backoff as soon as we see the first valid sample. | ||
2817 | * If we do not, we get strongly overestimated rto. With timestamps | ||
2818 | * samples are accepted even from very old segments: f.e., when rtt=1 | ||
2819 | * increases to 8, we retransmit 5 times and after 8 seconds delayed | ||
2820 | * answer arrives rto becomes 120 seconds! If at least one of segments | ||
2821 | * in window is lost... Voila. --ANK (010210) | ||
2822 | */ | 2855 | */ |
2823 | struct tcp_sock *tp = tcp_sk(sk); | 2856 | if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) |
2824 | 2857 | seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | |
2825 | tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr); | ||
2826 | } | ||
2827 | 2858 | ||
2828 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) | 2859 | if (seq_rtt < 0) |
2829 | { | 2860 | return false; |
2830 | /* We don't have a timestamp. Can only use | ||
2831 | * packets that are not retransmitted to determine | ||
2832 | * rtt estimates. Also, we must not reset the | ||
2833 | * backoff for rto until we get a non-retransmitted | ||
2834 | * packet. This allows us to deal with a situation | ||
2835 | * where the network delay has increased suddenly. | ||
2836 | * I.e. Karn's algorithm. (SIGCOMM '87, p5.) | ||
2837 | */ | ||
2838 | 2861 | ||
2839 | if (flag & FLAG_RETRANS_DATA_ACKED) | 2862 | tcp_rtt_estimator(sk, seq_rtt); |
2840 | return; | 2863 | tcp_set_rto(sk); |
2841 | 2864 | ||
2842 | tcp_valid_rtt_meas(sk, seq_rtt); | 2865 | /* RFC6298: only reset backoff on valid RTT measurement. */ |
2866 | inet_csk(sk)->icsk_backoff = 0; | ||
2867 | return true; | ||
2843 | } | 2868 | } |
2844 | 2869 | ||
2845 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, | 2870 | /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ |
2846 | const s32 seq_rtt) | 2871 | static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) |
2847 | { | 2872 | { |
2848 | const struct tcp_sock *tp = tcp_sk(sk); | 2873 | struct tcp_sock *tp = tcp_sk(sk); |
2849 | /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ | 2874 | s32 seq_rtt = -1; |
2850 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) | 2875 | |
2851 | tcp_ack_saw_tstamp(sk, flag); | 2876 | if (tp->lsndtime && !tp->total_retrans) |
2852 | else if (seq_rtt >= 0) | 2877 | seq_rtt = tcp_time_stamp - tp->lsndtime; |
2853 | tcp_ack_no_tstamp(sk, seq_rtt, flag); | 2878 | tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); |
2854 | } | 2879 | } |
2855 | 2880 | ||
2856 | static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | 2881 | static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) |
@@ -2939,7 +2964,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) | |||
2939 | * arrived at the other end. | 2964 | * arrived at the other end. |
2940 | */ | 2965 | */ |
2941 | static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | 2966 | static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, |
2942 | u32 prior_snd_una) | 2967 | u32 prior_snd_una, s32 sack_rtt) |
2943 | { | 2968 | { |
2944 | struct tcp_sock *tp = tcp_sk(sk); | 2969 | struct tcp_sock *tp = tcp_sk(sk); |
2945 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2970 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -2978,8 +3003,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
2978 | if (sacked & TCPCB_SACKED_RETRANS) | 3003 | if (sacked & TCPCB_SACKED_RETRANS) |
2979 | tp->retrans_out -= acked_pcount; | 3004 | tp->retrans_out -= acked_pcount; |
2980 | flag |= FLAG_RETRANS_DATA_ACKED; | 3005 | flag |= FLAG_RETRANS_DATA_ACKED; |
2981 | ca_seq_rtt = -1; | ||
2982 | seq_rtt = -1; | ||
2983 | } else { | 3006 | } else { |
2984 | ca_seq_rtt = now - scb->when; | 3007 | ca_seq_rtt = now - scb->when; |
2985 | last_ackt = skb->tstamp; | 3008 | last_ackt = skb->tstamp; |
@@ -3031,6 +3054,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3031 | if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) | 3054 | if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) |
3032 | flag |= FLAG_SACK_RENEGING; | 3055 | flag |= FLAG_SACK_RENEGING; |
3033 | 3056 | ||
3057 | if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) || | ||
3058 | (flag & FLAG_ACKED)) | ||
3059 | tcp_rearm_rto(sk); | ||
3060 | |||
3034 | if (flag & FLAG_ACKED) { | 3061 | if (flag & FLAG_ACKED) { |
3035 | const struct tcp_congestion_ops *ca_ops | 3062 | const struct tcp_congestion_ops *ca_ops |
3036 | = inet_csk(sk)->icsk_ca_ops; | 3063 | = inet_csk(sk)->icsk_ca_ops; |
@@ -3040,9 +3067,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3040 | tcp_mtup_probe_success(sk); | 3067 | tcp_mtup_probe_success(sk); |
3041 | } | 3068 | } |
3042 | 3069 | ||
3043 | tcp_ack_update_rtt(sk, flag, seq_rtt); | ||
3044 | tcp_rearm_rto(sk); | ||
3045 | |||
3046 | if (tcp_is_reno(tp)) { | 3070 | if (tcp_is_reno(tp)) { |
3047 | tcp_remove_reno_sacks(sk, pkts_acked); | 3071 | tcp_remove_reno_sacks(sk, pkts_acked); |
3048 | } else { | 3072 | } else { |
@@ -3130,11 +3154,22 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag) | |||
3130 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open; | 3154 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open; |
3131 | } | 3155 | } |
3132 | 3156 | ||
3157 | /* Decide wheather to run the increase function of congestion control. */ | ||
3133 | static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) | 3158 | static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) |
3134 | { | 3159 | { |
3135 | const struct tcp_sock *tp = tcp_sk(sk); | 3160 | if (tcp_in_cwnd_reduction(sk)) |
3136 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && | 3161 | return false; |
3137 | !tcp_in_cwnd_reduction(sk); | 3162 | |
3163 | /* If reordering is high then always grow cwnd whenever data is | ||
3164 | * delivered regardless of its ordering. Otherwise stay conservative | ||
3165 | * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/ | ||
3166 | * new SACK or ECE mark may first advance cwnd here and later reduce | ||
3167 | * cwnd in tcp_fastretrans_alert() based on more states. | ||
3168 | */ | ||
3169 | if (tcp_sk(sk)->reordering > sysctl_tcp_reordering) | ||
3170 | return flag & FLAG_FORWARD_PROGRESS; | ||
3171 | |||
3172 | return flag & FLAG_DATA_ACKED; | ||
3138 | } | 3173 | } |
3139 | 3174 | ||
3140 | /* Check that window update is acceptable. | 3175 | /* Check that window update is acceptable. |
@@ -3269,11 +3304,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3269 | u32 ack_seq = TCP_SKB_CB(skb)->seq; | 3304 | u32 ack_seq = TCP_SKB_CB(skb)->seq; |
3270 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 3305 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
3271 | bool is_dupack = false; | 3306 | bool is_dupack = false; |
3272 | u32 prior_in_flight; | 3307 | u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; |
3273 | u32 prior_fackets; | 3308 | u32 prior_fackets; |
3274 | int prior_packets = tp->packets_out; | 3309 | int prior_packets = tp->packets_out; |
3275 | const int prior_unsacked = tp->packets_out - tp->sacked_out; | 3310 | const int prior_unsacked = tp->packets_out - tp->sacked_out; |
3276 | int acked = 0; /* Number of packets newly acked */ | 3311 | int acked = 0; /* Number of packets newly acked */ |
3312 | s32 sack_rtt = -1; | ||
3277 | 3313 | ||
3278 | /* If the ack is older than previous acks | 3314 | /* If the ack is older than previous acks |
3279 | * then we can probably ignore it. | 3315 | * then we can probably ignore it. |
@@ -3330,7 +3366,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3330 | flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); | 3366 | flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); |
3331 | 3367 | ||
3332 | if (TCP_SKB_CB(skb)->sacked) | 3368 | if (TCP_SKB_CB(skb)->sacked) |
3333 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); | 3369 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, |
3370 | &sack_rtt); | ||
3334 | 3371 | ||
3335 | if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) | 3372 | if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) |
3336 | flag |= FLAG_ECE; | 3373 | flag |= FLAG_ECE; |
@@ -3349,21 +3386,18 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3349 | 3386 | ||
3350 | /* See if we can take anything off of the retransmit queue. */ | 3387 | /* See if we can take anything off of the retransmit queue. */ |
3351 | acked = tp->packets_out; | 3388 | acked = tp->packets_out; |
3352 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); | 3389 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt); |
3353 | acked -= tp->packets_out; | 3390 | acked -= tp->packets_out; |
3354 | 3391 | ||
3392 | /* Advance cwnd if state allows */ | ||
3393 | if (tcp_may_raise_cwnd(sk, flag)) | ||
3394 | tcp_cong_avoid(sk, ack, prior_in_flight); | ||
3395 | |||
3355 | if (tcp_ack_is_dubious(sk, flag)) { | 3396 | if (tcp_ack_is_dubious(sk, flag)) { |
3356 | /* Advance CWND, if state allows this. */ | ||
3357 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) | ||
3358 | tcp_cong_avoid(sk, ack, prior_in_flight); | ||
3359 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); | 3397 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); |
3360 | tcp_fastretrans_alert(sk, acked, prior_unsacked, | 3398 | tcp_fastretrans_alert(sk, acked, prior_unsacked, |
3361 | is_dupack, flag); | 3399 | is_dupack, flag); |
3362 | } else { | ||
3363 | if (flag & FLAG_DATA_ACKED) | ||
3364 | tcp_cong_avoid(sk, ack, prior_in_flight); | ||
3365 | } | 3400 | } |
3366 | |||
3367 | if (tp->tlp_high_seq) | 3401 | if (tp->tlp_high_seq) |
3368 | tcp_process_tlp_ack(sk, ack, flag); | 3402 | tcp_process_tlp_ack(sk, ack, flag); |
3369 | 3403 | ||
@@ -3375,6 +3409,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3375 | 3409 | ||
3376 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) | 3410 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) |
3377 | tcp_schedule_loss_probe(sk); | 3411 | tcp_schedule_loss_probe(sk); |
3412 | if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) | ||
3413 | tcp_update_pacing_rate(sk); | ||
3378 | return 1; | 3414 | return 1; |
3379 | 3415 | ||
3380 | no_queue: | 3416 | no_queue: |
@@ -3402,7 +3438,8 @@ old_ack: | |||
3402 | * If data was DSACKed, see if we can undo a cwnd reduction. | 3438 | * If data was DSACKed, see if we can undo a cwnd reduction. |
3403 | */ | 3439 | */ |
3404 | if (TCP_SKB_CB(skb)->sacked) { | 3440 | if (TCP_SKB_CB(skb)->sacked) { |
3405 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); | 3441 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, |
3442 | &sack_rtt); | ||
3406 | tcp_fastretrans_alert(sk, acked, prior_unsacked, | 3443 | tcp_fastretrans_alert(sk, acked, prior_unsacked, |
3407 | is_dupack, flag); | 3444 | is_dupack, flag); |
3408 | } | 3445 | } |
@@ -4102,6 +4139,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4102 | if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { | 4139 | if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { |
4103 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); | 4140 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); |
4104 | } else { | 4141 | } else { |
4142 | tcp_grow_window(sk, skb); | ||
4105 | kfree_skb_partial(skb, fragstolen); | 4143 | kfree_skb_partial(skb, fragstolen); |
4106 | skb = NULL; | 4144 | skb = NULL; |
4107 | } | 4145 | } |
@@ -4177,8 +4215,10 @@ add_sack: | |||
4177 | if (tcp_is_sack(tp)) | 4215 | if (tcp_is_sack(tp)) |
4178 | tcp_sack_new_ofo_skb(sk, seq, end_seq); | 4216 | tcp_sack_new_ofo_skb(sk, seq, end_seq); |
4179 | end: | 4217 | end: |
4180 | if (skb) | 4218 | if (skb) { |
4219 | tcp_grow_window(sk, skb); | ||
4181 | skb_set_owner_r(skb, sk); | 4220 | skb_set_owner_r(skb, sk); |
4221 | } | ||
4182 | } | 4222 | } |
4183 | 4223 | ||
4184 | static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, | 4224 | static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, |
@@ -5013,8 +5053,8 @@ discard: | |||
5013 | * the rest is checked inline. Fast processing is turned on in | 5053 | * the rest is checked inline. Fast processing is turned on in |
5014 | * tcp_data_queue when everything is OK. | 5054 | * tcp_data_queue when everything is OK. |
5015 | */ | 5055 | */ |
5016 | int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | 5056 | void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, |
5017 | const struct tcphdr *th, unsigned int len) | 5057 | const struct tcphdr *th, unsigned int len) |
5018 | { | 5058 | { |
5019 | struct tcp_sock *tp = tcp_sk(sk); | 5059 | struct tcp_sock *tp = tcp_sk(sk); |
5020 | 5060 | ||
@@ -5091,7 +5131,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5091 | tcp_ack(sk, skb, 0); | 5131 | tcp_ack(sk, skb, 0); |
5092 | __kfree_skb(skb); | 5132 | __kfree_skb(skb); |
5093 | tcp_data_snd_check(sk); | 5133 | tcp_data_snd_check(sk); |
5094 | return 0; | 5134 | return; |
5095 | } else { /* Header too small */ | 5135 | } else { /* Header too small */ |
5096 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | 5136 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); |
5097 | goto discard; | 5137 | goto discard; |
@@ -5184,7 +5224,7 @@ no_ack: | |||
5184 | if (eaten) | 5224 | if (eaten) |
5185 | kfree_skb_partial(skb, fragstolen); | 5225 | kfree_skb_partial(skb, fragstolen); |
5186 | sk->sk_data_ready(sk, 0); | 5226 | sk->sk_data_ready(sk, 0); |
5187 | return 0; | 5227 | return; |
5188 | } | 5228 | } |
5189 | } | 5229 | } |
5190 | 5230 | ||
@@ -5200,7 +5240,7 @@ slow_path: | |||
5200 | */ | 5240 | */ |
5201 | 5241 | ||
5202 | if (!tcp_validate_incoming(sk, skb, th, 1)) | 5242 | if (!tcp_validate_incoming(sk, skb, th, 1)) |
5203 | return 0; | 5243 | return; |
5204 | 5244 | ||
5205 | step5: | 5245 | step5: |
5206 | if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) | 5246 | if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) |
@@ -5216,7 +5256,7 @@ step5: | |||
5216 | 5256 | ||
5217 | tcp_data_snd_check(sk); | 5257 | tcp_data_snd_check(sk); |
5218 | tcp_ack_snd_check(sk); | 5258 | tcp_ack_snd_check(sk); |
5219 | return 0; | 5259 | return; |
5220 | 5260 | ||
5221 | csum_error: | 5261 | csum_error: |
5222 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); | 5262 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); |
@@ -5224,7 +5264,6 @@ csum_error: | |||
5224 | 5264 | ||
5225 | discard: | 5265 | discard: |
5226 | __kfree_skb(skb); | 5266 | __kfree_skb(skb); |
5227 | return 0; | ||
5228 | } | 5267 | } |
5229 | EXPORT_SYMBOL(tcp_rcv_established); | 5268 | EXPORT_SYMBOL(tcp_rcv_established); |
5230 | 5269 | ||
@@ -5627,9 +5666,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5627 | * so release it. | 5666 | * so release it. |
5628 | */ | 5667 | */ |
5629 | if (req) { | 5668 | if (req) { |
5630 | tcp_synack_rtt_meas(sk, req); | ||
5631 | tp->total_retrans = req->num_retrans; | 5669 | tp->total_retrans = req->num_retrans; |
5632 | |||
5633 | reqsk_fastopen_remove(sk, req, false); | 5670 | reqsk_fastopen_remove(sk, req, false); |
5634 | } else { | 5671 | } else { |
5635 | /* Make sure socket is routed, for correct metrics. */ | 5672 | /* Make sure socket is routed, for correct metrics. */ |
@@ -5654,6 +5691,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5654 | tp->snd_una = TCP_SKB_CB(skb)->ack_seq; | 5691 | tp->snd_una = TCP_SKB_CB(skb)->ack_seq; |
5655 | tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; | 5692 | tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; |
5656 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); | 5693 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5694 | tcp_synack_rtt_meas(sk, req); | ||
5657 | 5695 | ||
5658 | if (tp->rx_opt.tstamp_ok) | 5696 | if (tp->rx_opt.tstamp_ok) |
5659 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 5697 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |