diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 90 |
1 files changed, 52 insertions, 38 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 075ab4d5af5e..fb4cf8b8e121 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -100,6 +100,7 @@ int sysctl_tcp_thin_dupack __read_mostly; | |||
100 | 100 | ||
101 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 101 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
102 | int sysctl_tcp_early_retrans __read_mostly = 3; | 102 | int sysctl_tcp_early_retrans __read_mostly = 3; |
103 | int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; | ||
103 | 104 | ||
104 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 105 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
105 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 106 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
@@ -3183,8 +3184,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3183 | 3184 | ||
3184 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); | 3185 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); |
3185 | 3186 | ||
3186 | if (ca_ops->pkts_acked) | 3187 | if (ca_ops->pkts_acked) { |
3187 | ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); | 3188 | long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us); |
3189 | ca_ops->pkts_acked(sk, pkts_acked, rtt_us); | ||
3190 | } | ||
3188 | 3191 | ||
3189 | } else if (skb && rtt_update && sack_rtt_us >= 0 && | 3192 | } else if (skb && rtt_update && sack_rtt_us >= 0 && |
3190 | sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { | 3193 | sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { |
@@ -3319,13 +3322,22 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 | |||
3319 | } | 3322 | } |
3320 | 3323 | ||
3321 | /* RFC 5961 7 [ACK Throttling] */ | 3324 | /* RFC 5961 7 [ACK Throttling] */ |
3322 | static void tcp_send_challenge_ack(struct sock *sk) | 3325 | static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) |
3323 | { | 3326 | { |
3324 | /* unprotected vars, we dont care of overwrites */ | 3327 | /* unprotected vars, we dont care of overwrites */ |
3325 | static u32 challenge_timestamp; | 3328 | static u32 challenge_timestamp; |
3326 | static unsigned int challenge_count; | 3329 | static unsigned int challenge_count; |
3327 | u32 now = jiffies / HZ; | 3330 | struct tcp_sock *tp = tcp_sk(sk); |
3331 | u32 now; | ||
3332 | |||
3333 | /* First check our per-socket dupack rate limit. */ | ||
3334 | if (tcp_oow_rate_limited(sock_net(sk), skb, | ||
3335 | LINUX_MIB_TCPACKSKIPPEDCHALLENGE, | ||
3336 | &tp->last_oow_ack_time)) | ||
3337 | return; | ||
3328 | 3338 | ||
3339 | /* Then check the check host-wide RFC 5961 rate limit. */ | ||
3340 | now = jiffies / HZ; | ||
3329 | if (now != challenge_timestamp) { | 3341 | if (now != challenge_timestamp) { |
3330 | challenge_timestamp = now; | 3342 | challenge_timestamp = now; |
3331 | challenge_count = 0; | 3343 | challenge_count = 0; |
@@ -3358,34 +3370,34 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) | |||
3358 | } | 3370 | } |
3359 | 3371 | ||
3360 | /* This routine deals with acks during a TLP episode. | 3372 | /* This routine deals with acks during a TLP episode. |
3373 | * We mark the end of a TLP episode on receiving TLP dupack or when | ||
3374 | * ack is after tlp_high_seq. | ||
3361 | * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. | 3375 | * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. |
3362 | */ | 3376 | */ |
3363 | static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) | 3377 | static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) |
3364 | { | 3378 | { |
3365 | struct tcp_sock *tp = tcp_sk(sk); | 3379 | struct tcp_sock *tp = tcp_sk(sk); |
3366 | bool is_tlp_dupack = (ack == tp->tlp_high_seq) && | ||
3367 | !(flag & (FLAG_SND_UNA_ADVANCED | | ||
3368 | FLAG_NOT_DUP | FLAG_DATA_SACKED)); | ||
3369 | 3380 | ||
3370 | /* Mark the end of TLP episode on receiving TLP dupack or when | 3381 | if (before(ack, tp->tlp_high_seq)) |
3371 | * ack is after tlp_high_seq. | ||
3372 | */ | ||
3373 | if (is_tlp_dupack) { | ||
3374 | tp->tlp_high_seq = 0; | ||
3375 | return; | 3382 | return; |
3376 | } | ||
3377 | 3383 | ||
3378 | if (after(ack, tp->tlp_high_seq)) { | 3384 | if (flag & FLAG_DSACKING_ACK) { |
3385 | /* This DSACK means original and TLP probe arrived; no loss */ | ||
3386 | tp->tlp_high_seq = 0; | ||
3387 | } else if (after(ack, tp->tlp_high_seq)) { | ||
3388 | /* ACK advances: there was a loss, so reduce cwnd. Reset | ||
3389 | * tlp_high_seq in tcp_init_cwnd_reduction() | ||
3390 | */ | ||
3391 | tcp_init_cwnd_reduction(sk); | ||
3392 | tcp_set_ca_state(sk, TCP_CA_CWR); | ||
3393 | tcp_end_cwnd_reduction(sk); | ||
3394 | tcp_try_keep_open(sk); | ||
3395 | NET_INC_STATS_BH(sock_net(sk), | ||
3396 | LINUX_MIB_TCPLOSSPROBERECOVERY); | ||
3397 | } else if (!(flag & (FLAG_SND_UNA_ADVANCED | | ||
3398 | FLAG_NOT_DUP | FLAG_DATA_SACKED))) { | ||
3399 | /* Pure dupack: original and TLP probe arrived; no loss */ | ||
3379 | tp->tlp_high_seq = 0; | 3400 | tp->tlp_high_seq = 0; |
3380 | /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ | ||
3381 | if (!(flag & FLAG_DSACKING_ACK)) { | ||
3382 | tcp_init_cwnd_reduction(sk); | ||
3383 | tcp_set_ca_state(sk, TCP_CA_CWR); | ||
3384 | tcp_end_cwnd_reduction(sk); | ||
3385 | tcp_try_keep_open(sk); | ||
3386 | NET_INC_STATS_BH(sock_net(sk), | ||
3387 | LINUX_MIB_TCPLOSSPROBERECOVERY); | ||
3388 | } | ||
3389 | } | 3401 | } |
3390 | } | 3402 | } |
3391 | 3403 | ||
@@ -3421,7 +3433,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3421 | if (before(ack, prior_snd_una)) { | 3433 | if (before(ack, prior_snd_una)) { |
3422 | /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ | 3434 | /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ |
3423 | if (before(ack, prior_snd_una - tp->max_window)) { | 3435 | if (before(ack, prior_snd_una - tp->max_window)) { |
3424 | tcp_send_challenge_ack(sk); | 3436 | tcp_send_challenge_ack(sk, skb); |
3425 | return -1; | 3437 | return -1; |
3426 | } | 3438 | } |
3427 | goto old_ack; | 3439 | goto old_ack; |
@@ -4758,7 +4770,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) | |||
4758 | return false; | 4770 | return false; |
4759 | 4771 | ||
4760 | /* If we filled the congestion window, do not expand. */ | 4772 | /* If we filled the congestion window, do not expand. */ |
4761 | if (tp->packets_out >= tp->snd_cwnd) | 4773 | if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) |
4762 | return false; | 4774 | return false; |
4763 | 4775 | ||
4764 | return true; | 4776 | return true; |
@@ -4990,7 +5002,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | |||
4990 | tcp_paws_discard(sk, skb)) { | 5002 | tcp_paws_discard(sk, skb)) { |
4991 | if (!th->rst) { | 5003 | if (!th->rst) { |
4992 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | 5004 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); |
4993 | tcp_send_dupack(sk, skb); | 5005 | if (!tcp_oow_rate_limited(sock_net(sk), skb, |
5006 | LINUX_MIB_TCPACKSKIPPEDPAWS, | ||
5007 | &tp->last_oow_ack_time)) | ||
5008 | tcp_send_dupack(sk, skb); | ||
4994 | goto discard; | 5009 | goto discard; |
4995 | } | 5010 | } |
4996 | /* Reset is accepted even if it did not pass PAWS. */ | 5011 | /* Reset is accepted even if it did not pass PAWS. */ |
@@ -5007,7 +5022,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | |||
5007 | if (!th->rst) { | 5022 | if (!th->rst) { |
5008 | if (th->syn) | 5023 | if (th->syn) |
5009 | goto syn_challenge; | 5024 | goto syn_challenge; |
5010 | tcp_send_dupack(sk, skb); | 5025 | if (!tcp_oow_rate_limited(sock_net(sk), skb, |
5026 | LINUX_MIB_TCPACKSKIPPEDSEQ, | ||
5027 | &tp->last_oow_ack_time)) | ||
5028 | tcp_send_dupack(sk, skb); | ||
5011 | } | 5029 | } |
5012 | goto discard; | 5030 | goto discard; |
5013 | } | 5031 | } |
@@ -5023,7 +5041,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | |||
5023 | if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) | 5041 | if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) |
5024 | tcp_reset(sk); | 5042 | tcp_reset(sk); |
5025 | else | 5043 | else |
5026 | tcp_send_challenge_ack(sk); | 5044 | tcp_send_challenge_ack(sk, skb); |
5027 | goto discard; | 5045 | goto discard; |
5028 | } | 5046 | } |
5029 | 5047 | ||
@@ -5037,7 +5055,7 @@ syn_challenge: | |||
5037 | if (syn_inerr) | 5055 | if (syn_inerr) |
5038 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | 5056 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); |
5039 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); | 5057 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); |
5040 | tcp_send_challenge_ack(sk); | 5058 | tcp_send_challenge_ack(sk, skb); |
5041 | goto discard; | 5059 | goto discard; |
5042 | } | 5060 | } |
5043 | 5061 | ||
@@ -5870,10 +5888,9 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) | |||
5870 | * TCP ECN negotiation. | 5888 | * TCP ECN negotiation. |
5871 | * | 5889 | * |
5872 | * Exception: tcp_ca wants ECN. This is required for DCTCP | 5890 | * Exception: tcp_ca wants ECN. This is required for DCTCP |
5873 | * congestion control; it requires setting ECT on all packets, | 5891 | * congestion control: Linux DCTCP asserts ECT on all packets, |
5874 | * including SYN. We inverse the test in this case: If our | 5892 | * including SYN, which is most optimal solution; however, |
5875 | * local socket wants ECN, but peer only set ece/cwr (but not | 5893 | * others, such as FreeBSD do not. |
5876 | * ECT in IP header) its probably a non-DCTCP aware sender. | ||
5877 | */ | 5894 | */ |
5878 | static void tcp_ecn_create_request(struct request_sock *req, | 5895 | static void tcp_ecn_create_request(struct request_sock *req, |
5879 | const struct sk_buff *skb, | 5896 | const struct sk_buff *skb, |
@@ -5883,18 +5900,15 @@ static void tcp_ecn_create_request(struct request_sock *req, | |||
5883 | const struct tcphdr *th = tcp_hdr(skb); | 5900 | const struct tcphdr *th = tcp_hdr(skb); |
5884 | const struct net *net = sock_net(listen_sk); | 5901 | const struct net *net = sock_net(listen_sk); |
5885 | bool th_ecn = th->ece && th->cwr; | 5902 | bool th_ecn = th->ece && th->cwr; |
5886 | bool ect, need_ecn, ecn_ok; | 5903 | bool ect, ecn_ok; |
5887 | 5904 | ||
5888 | if (!th_ecn) | 5905 | if (!th_ecn) |
5889 | return; | 5906 | return; |
5890 | 5907 | ||
5891 | ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); | 5908 | ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); |
5892 | need_ecn = tcp_ca_needs_ecn(listen_sk); | ||
5893 | ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); | 5909 | ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); |
5894 | 5910 | ||
5895 | if (!ect && !need_ecn && ecn_ok) | 5911 | if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk)) |
5896 | inet_rsk(req)->ecn_ok = 1; | ||
5897 | else if (ect && need_ecn) | ||
5898 | inet_rsk(req)->ecn_ok = 1; | 5912 | inet_rsk(req)->ecn_ok = 1; |
5899 | } | 5913 | } |
5900 | 5914 | ||