aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c218
1 files changed, 175 insertions, 43 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f501ac048366..a7ef679dd3ea 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -866,7 +866,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
866/* This must be called before lost_out is incremented */ 866/* This must be called before lost_out is incremented */
867static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) 867static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
868{ 868{
869 if ((tp->retransmit_skb_hint == NULL) || 869 if (!tp->retransmit_skb_hint ||
870 before(TCP_SKB_CB(skb)->seq, 870 before(TCP_SKB_CB(skb)->seq,
871 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) 871 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
872 tp->retransmit_skb_hint = skb; 872 tp->retransmit_skb_hint = skb;
@@ -1256,7 +1256,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
1256 fack_count += pcount; 1256 fack_count += pcount;
1257 1257
1258 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ 1258 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
1259 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && 1259 if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
1260 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) 1260 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1261 tp->lost_cnt_hint += pcount; 1261 tp->lost_cnt_hint += pcount;
1262 1262
@@ -1535,7 +1535,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1535 if (!before(TCP_SKB_CB(skb)->seq, end_seq)) 1535 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1536 break; 1536 break;
1537 1537
1538 if ((next_dup != NULL) && 1538 if (next_dup &&
1539 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) { 1539 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1540 in_sack = tcp_match_skb_to_sack(sk, skb, 1540 in_sack = tcp_match_skb_to_sack(sk, skb,
1541 next_dup->start_seq, 1541 next_dup->start_seq,
@@ -1551,7 +1551,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1551 if (in_sack <= 0) { 1551 if (in_sack <= 0) {
1552 tmp = tcp_shift_skb_data(sk, skb, state, 1552 tmp = tcp_shift_skb_data(sk, skb, state,
1553 start_seq, end_seq, dup_sack); 1553 start_seq, end_seq, dup_sack);
1554 if (tmp != NULL) { 1554 if (tmp) {
1555 if (tmp != skb) { 1555 if (tmp != skb) {
1556 skb = tmp; 1556 skb = tmp;
1557 continue; 1557 continue;
@@ -1614,7 +1614,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1614 struct tcp_sacktag_state *state, 1614 struct tcp_sacktag_state *state,
1615 u32 skip_to_seq) 1615 u32 skip_to_seq)
1616{ 1616{
1617 if (next_dup == NULL) 1617 if (!next_dup)
1618 return skb; 1618 return skb;
1619 1619
1620 if (before(next_dup->start_seq, skip_to_seq)) { 1620 if (before(next_dup->start_seq, skip_to_seq)) {
@@ -1783,7 +1783,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1783 if (tcp_highest_sack_seq(tp) == cache->end_seq) { 1783 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1784 /* ...but better entrypoint exists! */ 1784 /* ...but better entrypoint exists! */
1785 skb = tcp_highest_sack(sk); 1785 skb = tcp_highest_sack(sk);
1786 if (skb == NULL) 1786 if (!skb)
1787 break; 1787 break;
1788 state.fack_count = tp->fackets_out; 1788 state.fack_count = tp->fackets_out;
1789 cache++; 1789 cache++;
@@ -1798,7 +1798,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1798 1798
1799 if (!before(start_seq, tcp_highest_sack_seq(tp))) { 1799 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1800 skb = tcp_highest_sack(sk); 1800 skb = tcp_highest_sack(sk);
1801 if (skb == NULL) 1801 if (!skb)
1802 break; 1802 break;
1803 state.fack_count = tp->fackets_out; 1803 state.fack_count = tp->fackets_out;
1804 } 1804 }
@@ -3099,17 +3099,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3099 if (sacked & TCPCB_SACKED_RETRANS) 3099 if (sacked & TCPCB_SACKED_RETRANS)
3100 tp->retrans_out -= acked_pcount; 3100 tp->retrans_out -= acked_pcount;
3101 flag |= FLAG_RETRANS_DATA_ACKED; 3101 flag |= FLAG_RETRANS_DATA_ACKED;
3102 } else { 3102 } else if (!(sacked & TCPCB_SACKED_ACKED)) {
3103 last_ackt = skb->skb_mstamp; 3103 last_ackt = skb->skb_mstamp;
3104 WARN_ON_ONCE(last_ackt.v64 == 0); 3104 WARN_ON_ONCE(last_ackt.v64 == 0);
3105 if (!first_ackt.v64) 3105 if (!first_ackt.v64)
3106 first_ackt = last_ackt; 3106 first_ackt = last_ackt;
3107 3107
3108 if (!(sacked & TCPCB_SACKED_ACKED)) { 3108 reord = min(pkts_acked, reord);
3109 reord = min(pkts_acked, reord); 3109 if (!after(scb->end_seq, tp->high_seq))
3110 if (!after(scb->end_seq, tp->high_seq)) 3110 flag |= FLAG_ORIG_SACK_ACKED;
3111 flag |= FLAG_ORIG_SACK_ACKED;
3112 }
3113 } 3111 }
3114 3112
3115 if (sacked & TCPCB_SACKED_ACKED) 3113 if (sacked & TCPCB_SACKED_ACKED)
@@ -3322,6 +3320,36 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3322 return flag; 3320 return flag;
3323} 3321}
3324 3322
3323/* Return true if we're currently rate-limiting out-of-window ACKs and
3324 * thus shouldn't send a dupack right now. We rate-limit dupacks in
3325 * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
3326 * attacks that send repeated SYNs or ACKs for the same connection. To
3327 * do this, we do not send a duplicate SYNACK or ACK if the remote
3328 * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
3329 */
3330bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3331 int mib_idx, u32 *last_oow_ack_time)
3332{
3333 /* Data packets without SYNs are not likely part of an ACK loop. */
3334 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
3335 !tcp_hdr(skb)->syn)
3336 goto not_rate_limited;
3337
3338 if (*last_oow_ack_time) {
3339 s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
3340
3341 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
3342 NET_INC_STATS_BH(net, mib_idx);
3343 return true; /* rate-limited: don't send yet! */
3344 }
3345 }
3346
3347 *last_oow_ack_time = tcp_time_stamp;
3348
3349not_rate_limited:
3350 return false; /* not rate-limited: go ahead, send dupack now! */
3351}
3352
3325/* RFC 5961 7 [ACK Throttling] */ 3353/* RFC 5961 7 [ACK Throttling] */
3326static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) 3354static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3327{ 3355{
@@ -3573,6 +3601,23 @@ old_ack:
3573 return 0; 3601 return 0;
3574} 3602}
3575 3603
3604static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3605 bool syn, struct tcp_fastopen_cookie *foc,
3606 bool exp_opt)
3607{
3608 /* Valid only in SYN or SYN-ACK with an even length. */
3609 if (!foc || !syn || len < 0 || (len & 1))
3610 return;
3611
3612 if (len >= TCP_FASTOPEN_COOKIE_MIN &&
3613 len <= TCP_FASTOPEN_COOKIE_MAX)
3614 memcpy(foc->val, cookie, len);
3615 else if (len != 0)
3616 len = -1;
3617 foc->len = len;
3618 foc->exp = exp_opt;
3619}
3620
3576/* Look for tcp options. Normally only called on SYN and SYNACK packets. 3621/* Look for tcp options. Normally only called on SYN and SYNACK packets.
3577 * But, this can also be called on packets in the established flow when 3622 * But, this can also be called on packets in the established flow when
3578 * the fast version below fails. 3623 * the fast version below fails.
@@ -3662,21 +3707,22 @@ void tcp_parse_options(const struct sk_buff *skb,
3662 */ 3707 */
3663 break; 3708 break;
3664#endif 3709#endif
3710 case TCPOPT_FASTOPEN:
3711 tcp_parse_fastopen_option(
3712 opsize - TCPOLEN_FASTOPEN_BASE,
3713 ptr, th->syn, foc, false);
3714 break;
3715
3665 case TCPOPT_EXP: 3716 case TCPOPT_EXP:
3666 /* Fast Open option shares code 254 using a 3717 /* Fast Open option shares code 254 using a
3667 * 16 bits magic number. It's valid only in 3718 * 16 bits magic number.
3668 * SYN or SYN-ACK with an even size.
3669 */ 3719 */
3670 if (opsize < TCPOLEN_EXP_FASTOPEN_BASE || 3720 if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
3671 get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC || 3721 get_unaligned_be16(ptr) ==
3672 foc == NULL || !th->syn || (opsize & 1)) 3722 TCPOPT_FASTOPEN_MAGIC)
3673 break; 3723 tcp_parse_fastopen_option(opsize -
3674 foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE; 3724 TCPOLEN_EXP_FASTOPEN_BASE,
3675 if (foc->len >= TCP_FASTOPEN_COOKIE_MIN && 3725 ptr + 2, th->syn, foc, true);
3676 foc->len <= TCP_FASTOPEN_COOKIE_MAX)
3677 memcpy(foc->val, ptr + 2, foc->len);
3678 else if (foc->len != 0)
3679 foc->len = -1;
3680 break; 3726 break;
3681 3727
3682 } 3728 }
@@ -4640,7 +4686,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
4640 struct sk_buff *head; 4686 struct sk_buff *head;
4641 u32 start, end; 4687 u32 start, end;
4642 4688
4643 if (skb == NULL) 4689 if (!skb)
4644 return; 4690 return;
4645 4691
4646 start = TCP_SKB_CB(skb)->seq; 4692 start = TCP_SKB_CB(skb)->seq;
@@ -5095,7 +5141,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5095{ 5141{
5096 struct tcp_sock *tp = tcp_sk(sk); 5142 struct tcp_sock *tp = tcp_sk(sk);
5097 5143
5098 if (unlikely(sk->sk_rx_dst == NULL)) 5144 if (unlikely(!sk->sk_rx_dst))
5099 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); 5145 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5100 /* 5146 /*
5101 * Header prediction. 5147 * Header prediction.
@@ -5292,7 +5338,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5292 5338
5293 tcp_set_state(sk, TCP_ESTABLISHED); 5339 tcp_set_state(sk, TCP_ESTABLISHED);
5294 5340
5295 if (skb != NULL) { 5341 if (skb) {
5296 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); 5342 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
5297 security_inet_conn_established(sk, skb); 5343 security_inet_conn_established(sk, skb);
5298 } 5344 }
@@ -5330,8 +5376,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5330{ 5376{
5331 struct tcp_sock *tp = tcp_sk(sk); 5377 struct tcp_sock *tp = tcp_sk(sk);
5332 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; 5378 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
5333 u16 mss = tp->rx_opt.mss_clamp; 5379 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5334 bool syn_drop; 5380 bool syn_drop = false;
5335 5381
5336 if (mss == tp->rx_opt.user_mss) { 5382 if (mss == tp->rx_opt.user_mss) {
5337 struct tcp_options_received opt; 5383 struct tcp_options_received opt;
@@ -5343,16 +5389,25 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5343 mss = opt.mss_clamp; 5389 mss = opt.mss_clamp;
5344 } 5390 }
5345 5391
5346 if (!tp->syn_fastopen) /* Ignore an unsolicited cookie */ 5392 if (!tp->syn_fastopen) {
5393 /* Ignore an unsolicited cookie */
5347 cookie->len = -1; 5394 cookie->len = -1;
5395 } else if (tp->total_retrans) {
5396 /* SYN timed out and the SYN-ACK neither has a cookie nor
5397 * acknowledges data. Presumably the remote received only
5398 * the retransmitted (regular) SYNs: either the original
5399 * SYN-data or the corresponding SYN-ACK was dropped.
5400 */
5401 syn_drop = (cookie->len < 0 && data);
5402 } else if (cookie->len < 0 && !tp->syn_data) {
5403 /* We requested a cookie but didn't get it. If we did not use
5404 * the (old) exp opt format then try so next time (try_exp=1).
5405 * Otherwise we go back to use the RFC7413 opt (try_exp=2).
5406 */
5407 try_exp = tp->syn_fastopen_exp ? 2 : 1;
5408 }
5348 5409
5349 /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably 5410 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5350 * the remote receives only the retransmitted (regular) SYNs: either
5351 * the original SYN-data or the corresponding SYN-ACK is lost.
5352 */
5353 syn_drop = (cookie->len <= 0 && data && tp->total_retrans);
5354
5355 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
5356 5411
5357 if (data) { /* Retransmit unacked data in SYN */ 5412 if (data) { /* Retransmit unacked data in SYN */
5358 tcp_for_write_queue_from(data, sk) { 5413 tcp_for_write_queue_from(data, sk) {
@@ -5661,11 +5716,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5661 } 5716 }
5662 5717
5663 req = tp->fastopen_rsk; 5718 req = tp->fastopen_rsk;
5664 if (req != NULL) { 5719 if (req) {
5665 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && 5720 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5666 sk->sk_state != TCP_FIN_WAIT1); 5721 sk->sk_state != TCP_FIN_WAIT1);
5667 5722
5668 if (tcp_check_req(sk, skb, req, NULL, true) == NULL) 5723 if (!tcp_check_req(sk, skb, req, true))
5669 goto discard; 5724 goto discard;
5670 } 5725 }
5671 5726
@@ -5751,7 +5806,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5751 * ACK we have received, this would have acknowledged 5806 * ACK we have received, this would have acknowledged
5752 * our SYNACK so stop the SYNACK timer. 5807 * our SYNACK so stop the SYNACK timer.
5753 */ 5808 */
5754 if (req != NULL) { 5809 if (req) {
5755 /* Return RST if ack_seq is invalid. 5810 /* Return RST if ack_seq is invalid.
5756 * Note that RFC793 only says to generate a 5811 * Note that RFC793 only says to generate a
5757 * DUPACK for it but for TCP Fast Open it seems 5812 * DUPACK for it but for TCP Fast Open it seems
@@ -5913,6 +5968,80 @@ static void tcp_ecn_create_request(struct request_sock *req,
5913 inet_rsk(req)->ecn_ok = 1; 5968 inet_rsk(req)->ecn_ok = 1;
5914} 5969}
5915 5970
5971static void tcp_openreq_init(struct request_sock *req,
5972 const struct tcp_options_received *rx_opt,
5973 struct sk_buff *skb, const struct sock *sk)
5974{
5975 struct inet_request_sock *ireq = inet_rsk(req);
5976
5977 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
5978 req->cookie_ts = 0;
5979 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
5980 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5981 tcp_rsk(req)->snt_synack = tcp_time_stamp;
5982 tcp_rsk(req)->last_oow_ack_time = 0;
5983 req->mss = rx_opt->mss_clamp;
5984 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
5985 ireq->tstamp_ok = rx_opt->tstamp_ok;
5986 ireq->sack_ok = rx_opt->sack_ok;
5987 ireq->snd_wscale = rx_opt->snd_wscale;
5988 ireq->wscale_ok = rx_opt->wscale_ok;
5989 ireq->acked = 0;
5990 ireq->ecn_ok = 0;
5991 ireq->ir_rmt_port = tcp_hdr(skb)->source;
5992 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
5993 ireq->ir_mark = inet_request_mark(sk, skb);
5994}
5995
5996struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
5997 struct sock *sk_listener)
5998{
5999 struct request_sock *req = reqsk_alloc(ops, sk_listener);
6000
6001 if (req) {
6002 struct inet_request_sock *ireq = inet_rsk(req);
6003
6004 kmemcheck_annotate_bitfield(ireq, flags);
6005 ireq->opt = NULL;
6006 atomic64_set(&ireq->ir_cookie, 0);
6007 ireq->ireq_state = TCP_NEW_SYN_RECV;
6008 write_pnet(&ireq->ireq_net, sock_net(sk_listener));
6009 ireq->ireq_family = sk_listener->sk_family;
6010 }
6011
6012 return req;
6013}
6014EXPORT_SYMBOL(inet_reqsk_alloc);
6015
6016/*
6017 * Return true if a syncookie should be sent
6018 */
6019static bool tcp_syn_flood_action(struct sock *sk,
6020 const struct sk_buff *skb,
6021 const char *proto)
6022{
6023 const char *msg = "Dropping request";
6024 bool want_cookie = false;
6025 struct listen_sock *lopt;
6026
6027#ifdef CONFIG_SYN_COOKIES
6028 if (sysctl_tcp_syncookies) {
6029 msg = "Sending cookies";
6030 want_cookie = true;
6031 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
6032 } else
6033#endif
6034 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6035
6036 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
6037 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
6038 lopt->synflood_warned = 1;
6039 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6040 proto, ntohs(tcp_hdr(skb)->dest), msg);
6041 }
6042 return want_cookie;
6043}
6044
5916int tcp_conn_request(struct request_sock_ops *rsk_ops, 6045int tcp_conn_request(struct request_sock_ops *rsk_ops,
5917 const struct tcp_request_sock_ops *af_ops, 6046 const struct tcp_request_sock_ops *af_ops,
5918 struct sock *sk, struct sk_buff *skb) 6047 struct sock *sk, struct sk_buff *skb)
@@ -5950,7 +6079,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
5950 goto drop; 6079 goto drop;
5951 } 6080 }
5952 6081
5953 req = inet_reqsk_alloc(rsk_ops); 6082 req = inet_reqsk_alloc(rsk_ops, sk);
5954 if (!req) 6083 if (!req)
5955 goto drop; 6084 goto drop;
5956 6085
@@ -5967,6 +6096,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
5967 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 6096 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
5968 tcp_openreq_init(req, &tmp_opt, skb, sk); 6097 tcp_openreq_init(req, &tmp_opt, skb, sk);
5969 6098
6099 /* Note: tcp_v6_init_req() might override ir_iif for link locals */
6100 inet_rsk(req)->ir_iif = sk->sk_bound_dev_if;
6101
5970 af_ops->init_req(req, sk, skb); 6102 af_ops->init_req(req, sk, skb);
5971 6103
5972 if (security_inet_conn_request(sk, skb, req)) 6104 if (security_inet_conn_request(sk, skb, req))
@@ -6039,7 +6171,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6039 if (err || want_cookie) 6171 if (err || want_cookie)
6040 goto drop_and_free; 6172 goto drop_and_free;
6041 6173
6042 tcp_rsk(req)->listener = NULL; 6174 tcp_rsk(req)->tfo_listener = false;
6043 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 6175 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
6044 } 6176 }
6045 6177