diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 88 |
1 files changed, 51 insertions, 37 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 075ab4d5af5e..8fdd27b17306 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -100,6 +100,7 @@ int sysctl_tcp_thin_dupack __read_mostly; | |||
| 100 | 100 | ||
| 101 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 101 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
| 102 | int sysctl_tcp_early_retrans __read_mostly = 3; | 102 | int sysctl_tcp_early_retrans __read_mostly = 3; |
| 103 | int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; | ||
| 103 | 104 | ||
| 104 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 105 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
| 105 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 106 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
| @@ -3183,8 +3184,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
| 3183 | 3184 | ||
| 3184 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); | 3185 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); |
| 3185 | 3186 | ||
| 3186 | if (ca_ops->pkts_acked) | 3187 | if (ca_ops->pkts_acked) { |
| 3187 | ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); | 3188 | long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us); |
| 3189 | ca_ops->pkts_acked(sk, pkts_acked, rtt_us); | ||
| 3190 | } | ||
| 3188 | 3191 | ||
| 3189 | } else if (skb && rtt_update && sack_rtt_us >= 0 && | 3192 | } else if (skb && rtt_update && sack_rtt_us >= 0 && |
| 3190 | sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { | 3193 | sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { |
| @@ -3319,13 +3322,22 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 | |||
| 3319 | } | 3322 | } |
| 3320 | 3323 | ||
| 3321 | /* RFC 5961 7 [ACK Throttling] */ | 3324 | /* RFC 5961 7 [ACK Throttling] */ |
| 3322 | static void tcp_send_challenge_ack(struct sock *sk) | 3325 | static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) |
| 3323 | { | 3326 | { |
| 3324 | /* unprotected vars, we dont care of overwrites */ | 3327 | /* unprotected vars, we dont care of overwrites */ |
| 3325 | static u32 challenge_timestamp; | 3328 | static u32 challenge_timestamp; |
| 3326 | static unsigned int challenge_count; | 3329 | static unsigned int challenge_count; |
| 3327 | u32 now = jiffies / HZ; | 3330 | struct tcp_sock *tp = tcp_sk(sk); |
| 3331 | u32 now; | ||
| 3332 | |||
| 3333 | /* First check our per-socket dupack rate limit. */ | ||
| 3334 | if (tcp_oow_rate_limited(sock_net(sk), skb, | ||
| 3335 | LINUX_MIB_TCPACKSKIPPEDCHALLENGE, | ||
| 3336 | &tp->last_oow_ack_time)) | ||
| 3337 | return; | ||
| 3328 | 3338 | ||
| 3339 | /* Then check the check host-wide RFC 5961 rate limit. */ | ||
| 3340 | now = jiffies / HZ; | ||
| 3329 | if (now != challenge_timestamp) { | 3341 | if (now != challenge_timestamp) { |
| 3330 | challenge_timestamp = now; | 3342 | challenge_timestamp = now; |
| 3331 | challenge_count = 0; | 3343 | challenge_count = 0; |
| @@ -3358,34 +3370,34 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) | |||
| 3358 | } | 3370 | } |
| 3359 | 3371 | ||
| 3360 | /* This routine deals with acks during a TLP episode. | 3372 | /* This routine deals with acks during a TLP episode. |
| 3373 | * We mark the end of a TLP episode on receiving TLP dupack or when | ||
| 3374 | * ack is after tlp_high_seq. | ||
| 3361 | * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. | 3375 | * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. |
| 3362 | */ | 3376 | */ |
| 3363 | static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) | 3377 | static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) |
| 3364 | { | 3378 | { |
| 3365 | struct tcp_sock *tp = tcp_sk(sk); | 3379 | struct tcp_sock *tp = tcp_sk(sk); |
| 3366 | bool is_tlp_dupack = (ack == tp->tlp_high_seq) && | ||
| 3367 | !(flag & (FLAG_SND_UNA_ADVANCED | | ||
| 3368 | FLAG_NOT_DUP | FLAG_DATA_SACKED)); | ||
| 3369 | 3380 | ||
| 3370 | /* Mark the end of TLP episode on receiving TLP dupack or when | 3381 | if (before(ack, tp->tlp_high_seq)) |
| 3371 | * ack is after tlp_high_seq. | ||
| 3372 | */ | ||
| 3373 | if (is_tlp_dupack) { | ||
| 3374 | tp->tlp_high_seq = 0; | ||
| 3375 | return; | 3382 | return; |
| 3376 | } | ||
| 3377 | 3383 | ||
| 3378 | if (after(ack, tp->tlp_high_seq)) { | 3384 | if (flag & FLAG_DSACKING_ACK) { |
| 3385 | /* This DSACK means original and TLP probe arrived; no loss */ | ||
| 3386 | tp->tlp_high_seq = 0; | ||
| 3387 | } else if (after(ack, tp->tlp_high_seq)) { | ||
| 3388 | /* ACK advances: there was a loss, so reduce cwnd. Reset | ||
| 3389 | * tlp_high_seq in tcp_init_cwnd_reduction() | ||
| 3390 | */ | ||
| 3391 | tcp_init_cwnd_reduction(sk); | ||
| 3392 | tcp_set_ca_state(sk, TCP_CA_CWR); | ||
| 3393 | tcp_end_cwnd_reduction(sk); | ||
| 3394 | tcp_try_keep_open(sk); | ||
| 3395 | NET_INC_STATS_BH(sock_net(sk), | ||
| 3396 | LINUX_MIB_TCPLOSSPROBERECOVERY); | ||
| 3397 | } else if (!(flag & (FLAG_SND_UNA_ADVANCED | | ||
| 3398 | FLAG_NOT_DUP | FLAG_DATA_SACKED))) { | ||
| 3399 | /* Pure dupack: original and TLP probe arrived; no loss */ | ||
| 3379 | tp->tlp_high_seq = 0; | 3400 | tp->tlp_high_seq = 0; |
| 3380 | /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ | ||
| 3381 | if (!(flag & FLAG_DSACKING_ACK)) { | ||
| 3382 | tcp_init_cwnd_reduction(sk); | ||
| 3383 | tcp_set_ca_state(sk, TCP_CA_CWR); | ||
| 3384 | tcp_end_cwnd_reduction(sk); | ||
| 3385 | tcp_try_keep_open(sk); | ||
| 3386 | NET_INC_STATS_BH(sock_net(sk), | ||
| 3387 | LINUX_MIB_TCPLOSSPROBERECOVERY); | ||
| 3388 | } | ||
| 3389 | } | 3401 | } |
| 3390 | } | 3402 | } |
| 3391 | 3403 | ||
| @@ -3421,7 +3433,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3421 | if (before(ack, prior_snd_una)) { | 3433 | if (before(ack, prior_snd_una)) { |
| 3422 | /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ | 3434 | /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ |
| 3423 | if (before(ack, prior_snd_una - tp->max_window)) { | 3435 | if (before(ack, prior_snd_una - tp->max_window)) { |
| 3424 | tcp_send_challenge_ack(sk); | 3436 | tcp_send_challenge_ack(sk, skb); |
| 3425 | return -1; | 3437 | return -1; |
| 3426 | } | 3438 | } |
| 3427 | goto old_ack; | 3439 | goto old_ack; |
| @@ -4990,7 +5002,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | |||
| 4990 | tcp_paws_discard(sk, skb)) { | 5002 | tcp_paws_discard(sk, skb)) { |
| 4991 | if (!th->rst) { | 5003 | if (!th->rst) { |
| 4992 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | 5004 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); |
| 4993 | tcp_send_dupack(sk, skb); | 5005 | if (!tcp_oow_rate_limited(sock_net(sk), skb, |
| 5006 | LINUX_MIB_TCPACKSKIPPEDPAWS, | ||
| 5007 | &tp->last_oow_ack_time)) | ||
| 5008 | tcp_send_dupack(sk, skb); | ||
| 4994 | goto discard; | 5009 | goto discard; |
| 4995 | } | 5010 | } |
| 4996 | /* Reset is accepted even if it did not pass PAWS. */ | 5011 | /* Reset is accepted even if it did not pass PAWS. */ |
| @@ -5007,7 +5022,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | |||
| 5007 | if (!th->rst) { | 5022 | if (!th->rst) { |
| 5008 | if (th->syn) | 5023 | if (th->syn) |
| 5009 | goto syn_challenge; | 5024 | goto syn_challenge; |
| 5010 | tcp_send_dupack(sk, skb); | 5025 | if (!tcp_oow_rate_limited(sock_net(sk), skb, |
| 5026 | LINUX_MIB_TCPACKSKIPPEDSEQ, | ||
| 5027 | &tp->last_oow_ack_time)) | ||
| 5028 | tcp_send_dupack(sk, skb); | ||
| 5011 | } | 5029 | } |
| 5012 | goto discard; | 5030 | goto discard; |
| 5013 | } | 5031 | } |
| @@ -5023,7 +5041,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | |||
| 5023 | if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) | 5041 | if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) |
| 5024 | tcp_reset(sk); | 5042 | tcp_reset(sk); |
| 5025 | else | 5043 | else |
| 5026 | tcp_send_challenge_ack(sk); | 5044 | tcp_send_challenge_ack(sk, skb); |
| 5027 | goto discard; | 5045 | goto discard; |
| 5028 | } | 5046 | } |
| 5029 | 5047 | ||
| @@ -5037,7 +5055,7 @@ syn_challenge: | |||
| 5037 | if (syn_inerr) | 5055 | if (syn_inerr) |
| 5038 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | 5056 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); |
| 5039 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); | 5057 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); |
| 5040 | tcp_send_challenge_ack(sk); | 5058 | tcp_send_challenge_ack(sk, skb); |
| 5041 | goto discard; | 5059 | goto discard; |
| 5042 | } | 5060 | } |
| 5043 | 5061 | ||
| @@ -5870,10 +5888,9 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) | |||
| 5870 | * TCP ECN negotiation. | 5888 | * TCP ECN negotiation. |
| 5871 | * | 5889 | * |
| 5872 | * Exception: tcp_ca wants ECN. This is required for DCTCP | 5890 | * Exception: tcp_ca wants ECN. This is required for DCTCP |
| 5873 | * congestion control; it requires setting ECT on all packets, | 5891 | * congestion control: Linux DCTCP asserts ECT on all packets, |
| 5874 | * including SYN. We inverse the test in this case: If our | 5892 | * including SYN, which is most optimal solution; however, |
| 5875 | * local socket wants ECN, but peer only set ece/cwr (but not | 5893 | * others, such as FreeBSD do not. |
| 5876 | * ECT in IP header) its probably a non-DCTCP aware sender. | ||
| 5877 | */ | 5894 | */ |
| 5878 | static void tcp_ecn_create_request(struct request_sock *req, | 5895 | static void tcp_ecn_create_request(struct request_sock *req, |
| 5879 | const struct sk_buff *skb, | 5896 | const struct sk_buff *skb, |
| @@ -5883,18 +5900,15 @@ static void tcp_ecn_create_request(struct request_sock *req, | |||
| 5883 | const struct tcphdr *th = tcp_hdr(skb); | 5900 | const struct tcphdr *th = tcp_hdr(skb); |
| 5884 | const struct net *net = sock_net(listen_sk); | 5901 | const struct net *net = sock_net(listen_sk); |
| 5885 | bool th_ecn = th->ece && th->cwr; | 5902 | bool th_ecn = th->ece && th->cwr; |
| 5886 | bool ect, need_ecn, ecn_ok; | 5903 | bool ect, ecn_ok; |
| 5887 | 5904 | ||
| 5888 | if (!th_ecn) | 5905 | if (!th_ecn) |
| 5889 | return; | 5906 | return; |
| 5890 | 5907 | ||
| 5891 | ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); | 5908 | ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); |
| 5892 | need_ecn = tcp_ca_needs_ecn(listen_sk); | ||
| 5893 | ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); | 5909 | ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); |
| 5894 | 5910 | ||
| 5895 | if (!ect && !need_ecn && ecn_ok) | 5911 | if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk)) |
| 5896 | inet_rsk(req)->ecn_ok = 1; | ||
| 5897 | else if (ect && need_ecn) | ||
| 5898 | inet_rsk(req)->ecn_ok = 1; | 5912 | inet_rsk(req)->ecn_ok = 1; |
| 5899 | } | 5913 | } |
| 5900 | 5914 | ||
