aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c90
1 files changed, 52 insertions, 38 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 075ab4d5af5e..fb4cf8b8e121 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@ int sysctl_tcp_thin_dupack __read_mostly;
100 100
101int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; 101int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
102int sysctl_tcp_early_retrans __read_mostly = 3; 102int sysctl_tcp_early_retrans __read_mostly = 3;
103int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
103 104
104#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 105#define FLAG_DATA 0x01 /* Incoming frame contained data. */
105#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 106#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -3183,8 +3184,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3183 3184
3184 tp->fackets_out -= min(pkts_acked, tp->fackets_out); 3185 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3185 3186
3186 if (ca_ops->pkts_acked) 3187 if (ca_ops->pkts_acked) {
3187 ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); 3188 long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
3189 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3190 }
3188 3191
3189 } else if (skb && rtt_update && sack_rtt_us >= 0 && 3192 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3190 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { 3193 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
@@ -3319,13 +3322,22 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3319} 3322}
3320 3323
3321/* RFC 5961 7 [ACK Throttling] */ 3324/* RFC 5961 7 [ACK Throttling] */
3322static void tcp_send_challenge_ack(struct sock *sk) 3325static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3323{ 3326{
3324 /* unprotected vars, we dont care of overwrites */ 3327 /* unprotected vars, we dont care of overwrites */
3325 static u32 challenge_timestamp; 3328 static u32 challenge_timestamp;
3326 static unsigned int challenge_count; 3329 static unsigned int challenge_count;
3327 u32 now = jiffies / HZ; 3330 struct tcp_sock *tp = tcp_sk(sk);
3331 u32 now;
3332
3333 /* First check our per-socket dupack rate limit. */
3334 if (tcp_oow_rate_limited(sock_net(sk), skb,
3335 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3336 &tp->last_oow_ack_time))
3337 return;
3328 3338
3339 /* Then check the check host-wide RFC 5961 rate limit. */
3340 now = jiffies / HZ;
3329 if (now != challenge_timestamp) { 3341 if (now != challenge_timestamp) {
3330 challenge_timestamp = now; 3342 challenge_timestamp = now;
3331 challenge_count = 0; 3343 challenge_count = 0;
@@ -3358,34 +3370,34 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3358} 3370}
3359 3371
3360/* This routine deals with acks during a TLP episode. 3372/* This routine deals with acks during a TLP episode.
3373 * We mark the end of a TLP episode on receiving TLP dupack or when
3374 * ack is after tlp_high_seq.
3361 * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. 3375 * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
3362 */ 3376 */
3363static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) 3377static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3364{ 3378{
3365 struct tcp_sock *tp = tcp_sk(sk); 3379 struct tcp_sock *tp = tcp_sk(sk);
3366 bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
3367 !(flag & (FLAG_SND_UNA_ADVANCED |
3368 FLAG_NOT_DUP | FLAG_DATA_SACKED));
3369 3380
3370 /* Mark the end of TLP episode on receiving TLP dupack or when 3381 if (before(ack, tp->tlp_high_seq))
3371 * ack is after tlp_high_seq.
3372 */
3373 if (is_tlp_dupack) {
3374 tp->tlp_high_seq = 0;
3375 return; 3382 return;
3376 }
3377 3383
3378 if (after(ack, tp->tlp_high_seq)) { 3384 if (flag & FLAG_DSACKING_ACK) {
3385 /* This DSACK means original and TLP probe arrived; no loss */
3386 tp->tlp_high_seq = 0;
3387 } else if (after(ack, tp->tlp_high_seq)) {
3388 /* ACK advances: there was a loss, so reduce cwnd. Reset
3389 * tlp_high_seq in tcp_init_cwnd_reduction()
3390 */
3391 tcp_init_cwnd_reduction(sk);
3392 tcp_set_ca_state(sk, TCP_CA_CWR);
3393 tcp_end_cwnd_reduction(sk);
3394 tcp_try_keep_open(sk);
3395 NET_INC_STATS_BH(sock_net(sk),
3396 LINUX_MIB_TCPLOSSPROBERECOVERY);
3397 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3398 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3399 /* Pure dupack: original and TLP probe arrived; no loss */
3379 tp->tlp_high_seq = 0; 3400 tp->tlp_high_seq = 0;
3380 /* Don't reduce cwnd if DSACK arrives for TLP retrans. */
3381 if (!(flag & FLAG_DSACKING_ACK)) {
3382 tcp_init_cwnd_reduction(sk);
3383 tcp_set_ca_state(sk, TCP_CA_CWR);
3384 tcp_end_cwnd_reduction(sk);
3385 tcp_try_keep_open(sk);
3386 NET_INC_STATS_BH(sock_net(sk),
3387 LINUX_MIB_TCPLOSSPROBERECOVERY);
3388 }
3389 } 3401 }
3390} 3402}
3391 3403
@@ -3421,7 +3433,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3421 if (before(ack, prior_snd_una)) { 3433 if (before(ack, prior_snd_una)) {
3422 /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ 3434 /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
3423 if (before(ack, prior_snd_una - tp->max_window)) { 3435 if (before(ack, prior_snd_una - tp->max_window)) {
3424 tcp_send_challenge_ack(sk); 3436 tcp_send_challenge_ack(sk, skb);
3425 return -1; 3437 return -1;
3426 } 3438 }
3427 goto old_ack; 3439 goto old_ack;
@@ -4758,7 +4770,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
4758 return false; 4770 return false;
4759 4771
4760 /* If we filled the congestion window, do not expand. */ 4772 /* If we filled the congestion window, do not expand. */
4761 if (tp->packets_out >= tp->snd_cwnd) 4773 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
4762 return false; 4774 return false;
4763 4775
4764 return true; 4776 return true;
@@ -4990,7 +5002,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
4990 tcp_paws_discard(sk, skb)) { 5002 tcp_paws_discard(sk, skb)) {
4991 if (!th->rst) { 5003 if (!th->rst) {
4992 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); 5004 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
4993 tcp_send_dupack(sk, skb); 5005 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5006 LINUX_MIB_TCPACKSKIPPEDPAWS,
5007 &tp->last_oow_ack_time))
5008 tcp_send_dupack(sk, skb);
4994 goto discard; 5009 goto discard;
4995 } 5010 }
4996 /* Reset is accepted even if it did not pass PAWS. */ 5011 /* Reset is accepted even if it did not pass PAWS. */
@@ -5007,7 +5022,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5007 if (!th->rst) { 5022 if (!th->rst) {
5008 if (th->syn) 5023 if (th->syn)
5009 goto syn_challenge; 5024 goto syn_challenge;
5010 tcp_send_dupack(sk, skb); 5025 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5026 LINUX_MIB_TCPACKSKIPPEDSEQ,
5027 &tp->last_oow_ack_time))
5028 tcp_send_dupack(sk, skb);
5011 } 5029 }
5012 goto discard; 5030 goto discard;
5013 } 5031 }
@@ -5023,7 +5041,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5023 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) 5041 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
5024 tcp_reset(sk); 5042 tcp_reset(sk);
5025 else 5043 else
5026 tcp_send_challenge_ack(sk); 5044 tcp_send_challenge_ack(sk, skb);
5027 goto discard; 5045 goto discard;
5028 } 5046 }
5029 5047
@@ -5037,7 +5055,7 @@ syn_challenge:
5037 if (syn_inerr) 5055 if (syn_inerr)
5038 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 5056 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5039 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); 5057 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5040 tcp_send_challenge_ack(sk); 5058 tcp_send_challenge_ack(sk, skb);
5041 goto discard; 5059 goto discard;
5042 } 5060 }
5043 5061
@@ -5870,10 +5888,9 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
5870 * TCP ECN negotiation. 5888 * TCP ECN negotiation.
5871 * 5889 *
5872 * Exception: tcp_ca wants ECN. This is required for DCTCP 5890 * Exception: tcp_ca wants ECN. This is required for DCTCP
5873 * congestion control; it requires setting ECT on all packets, 5891 * congestion control: Linux DCTCP asserts ECT on all packets,
5874 * including SYN. We inverse the test in this case: If our 5892 * including SYN, which is most optimal solution; however,
5875 * local socket wants ECN, but peer only set ece/cwr (but not 5893 * others, such as FreeBSD do not.
5876 * ECT in IP header) its probably a non-DCTCP aware sender.
5877 */ 5894 */
5878static void tcp_ecn_create_request(struct request_sock *req, 5895static void tcp_ecn_create_request(struct request_sock *req,
5879 const struct sk_buff *skb, 5896 const struct sk_buff *skb,
@@ -5883,18 +5900,15 @@ static void tcp_ecn_create_request(struct request_sock *req,
5883 const struct tcphdr *th = tcp_hdr(skb); 5900 const struct tcphdr *th = tcp_hdr(skb);
5884 const struct net *net = sock_net(listen_sk); 5901 const struct net *net = sock_net(listen_sk);
5885 bool th_ecn = th->ece && th->cwr; 5902 bool th_ecn = th->ece && th->cwr;
5886 bool ect, need_ecn, ecn_ok; 5903 bool ect, ecn_ok;
5887 5904
5888 if (!th_ecn) 5905 if (!th_ecn)
5889 return; 5906 return;
5890 5907
5891 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); 5908 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
5892 need_ecn = tcp_ca_needs_ecn(listen_sk);
5893 ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); 5909 ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN);
5894 5910
5895 if (!ect && !need_ecn && ecn_ok) 5911 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk))
5896 inet_rsk(req)->ecn_ok = 1;
5897 else if (ect && need_ecn)
5898 inet_rsk(req)->ecn_ok = 1; 5912 inet_rsk(req)->ecn_ok = 1;
5899} 5913}
5900 5914