diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 78 |
1 files changed, 73 insertions, 5 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 62ee71efd1ce..eedb25db3947 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -95,6 +95,7 @@ int sysctl_tcp_stdurg __read_mostly; | |||
95 | int sysctl_tcp_rfc1337 __read_mostly; | 95 | int sysctl_tcp_rfc1337 __read_mostly; |
96 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; | 96 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; |
97 | int sysctl_tcp_frto __read_mostly = 2; | 97 | int sysctl_tcp_frto __read_mostly = 2; |
98 | int sysctl_tcp_min_rtt_wlen __read_mostly = 300; | ||
98 | 99 | ||
99 | int sysctl_tcp_thin_dupack __read_mostly; | 100 | int sysctl_tcp_thin_dupack __read_mostly; |
100 | 101 | ||
@@ -2915,8 +2916,69 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, | |||
2915 | tcp_xmit_retransmit_queue(sk); | 2916 | tcp_xmit_retransmit_queue(sk); |
2916 | } | 2917 | } |
2917 | 2918 | ||
2919 | /* Kathleen Nichols' algorithm for tracking the minimum value of | ||
2920 | * a data stream over some fixed time interval. (E.g., the minimum | ||
2921 | * RTT over the past five minutes.) It uses constant space and constant | ||
2922 | * time per update yet almost always delivers the same minimum as an | ||
2923 | * implementation that has to keep all the data in the window. | ||
2924 | * | ||
2925 | * The algorithm keeps track of the best, 2nd best & 3rd best min | ||
2926 | * values, maintaining an invariant that the measurement time of the | ||
2927 | * n'th best >= n-1'th best. It also makes sure that the three values | ||
2928 | * are widely separated in the time window since that bounds the worse | ||
2929 | * case error when that data is monotonically increasing over the window. | ||
2930 | * | ||
2931 | * Upon getting a new min, we can forget everything earlier because it | ||
2932 | * has no value - the new min is <= everything else in the window by | ||
2933 | * definition and it's the most recent. So we restart fresh on every new min | ||
2934 | * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd | ||
2935 | * best. | ||
2936 | */ | ||
2937 | static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) | ||
2938 | { | ||
2939 | const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ; | ||
2940 | struct rtt_meas *m = tcp_sk(sk)->rtt_min; | ||
2941 | struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now }; | ||
2942 | u32 elapsed; | ||
2943 | |||
2944 | /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */ | ||
2945 | if (unlikely(rttm.rtt <= m[0].rtt)) | ||
2946 | m[0] = m[1] = m[2] = rttm; | ||
2947 | else if (rttm.rtt <= m[1].rtt) | ||
2948 | m[1] = m[2] = rttm; | ||
2949 | else if (rttm.rtt <= m[2].rtt) | ||
2950 | m[2] = rttm; | ||
2951 | |||
2952 | elapsed = now - m[0].ts; | ||
2953 | if (unlikely(elapsed > wlen)) { | ||
2954 | /* Passed entire window without a new min so make 2nd choice | ||
2955 | * the new min & 3rd choice the new 2nd. So forth and so on. | ||
2956 | */ | ||
2957 | m[0] = m[1]; | ||
2958 | m[1] = m[2]; | ||
2959 | m[2] = rttm; | ||
2960 | if (now - m[0].ts > wlen) { | ||
2961 | m[0] = m[1]; | ||
2962 | m[1] = rttm; | ||
2963 | if (now - m[0].ts > wlen) | ||
2964 | m[0] = rttm; | ||
2965 | } | ||
2966 | } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) { | ||
2967 | /* Passed a quarter of the window without a new min so | ||
2968 | * take 2nd choice from the 2nd quarter of the window. | ||
2969 | */ | ||
2970 | m[2] = m[1] = rttm; | ||
2971 | } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) { | ||
2972 | /* Passed half the window without a new min so take the 3rd | ||
2973 | * choice from the last half of the window. | ||
2974 | */ | ||
2975 | m[2] = rttm; | ||
2976 | } | ||
2977 | } | ||
2978 | |||
2918 | static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, | 2979 | static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, |
2919 | long seq_rtt_us, long sack_rtt_us) | 2980 | long seq_rtt_us, long sack_rtt_us, |
2981 | long ca_rtt_us) | ||
2920 | { | 2982 | { |
2921 | const struct tcp_sock *tp = tcp_sk(sk); | 2983 | const struct tcp_sock *tp = tcp_sk(sk); |
2922 | 2984 | ||
@@ -2936,11 +2998,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, | |||
2936 | */ | 2998 | */ |
2937 | if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && | 2999 | if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && |
2938 | flag & FLAG_ACKED) | 3000 | flag & FLAG_ACKED) |
2939 | seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr); | 3001 | seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp - |
2940 | 3002 | tp->rx_opt.rcv_tsecr); | |
2941 | if (seq_rtt_us < 0) | 3003 | if (seq_rtt_us < 0) |
2942 | return false; | 3004 | return false; |
2943 | 3005 | ||
3006 | /* ca_rtt_us >= 0 is counting on the invariant that ca_rtt_us is | ||
3007 | * always taken together with ACK, SACK, or TS-opts. Any negative | ||
3008 | * values will be skipped with the seq_rtt_us < 0 check above. | ||
3009 | */ | ||
3010 | tcp_update_rtt_min(sk, ca_rtt_us); | ||
2944 | tcp_rtt_estimator(sk, seq_rtt_us); | 3011 | tcp_rtt_estimator(sk, seq_rtt_us); |
2945 | tcp_set_rto(sk); | 3012 | tcp_set_rto(sk); |
2946 | 3013 | ||
@@ -2961,7 +3028,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) | |||
2961 | rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack); | 3028 | rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack); |
2962 | } | 3029 | } |
2963 | 3030 | ||
2964 | tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L); | 3031 | tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us); |
2965 | } | 3032 | } |
2966 | 3033 | ||
2967 | 3034 | ||
@@ -3175,7 +3242,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3175 | ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); | 3242 | ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); |
3176 | } | 3243 | } |
3177 | 3244 | ||
3178 | rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); | 3245 | rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us, |
3246 | ca_rtt_us); | ||
3179 | 3247 | ||
3180 | if (flag & FLAG_ACKED) { | 3248 | if (flag & FLAG_ACKED) { |
3181 | tcp_rearm_rto(sk); | 3249 | tcp_rearm_rto(sk); |