diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 256 |
1 files changed, 203 insertions, 53 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f501ac048366..bc790ea9960f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -866,7 +866,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, | |||
866 | /* This must be called before lost_out is incremented */ | 866 | /* This must be called before lost_out is incremented */ |
867 | static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) | 867 | static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) |
868 | { | 868 | { |
869 | if ((tp->retransmit_skb_hint == NULL) || | 869 | if (!tp->retransmit_skb_hint || |
870 | before(TCP_SKB_CB(skb)->seq, | 870 | before(TCP_SKB_CB(skb)->seq, |
871 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) | 871 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) |
872 | tp->retransmit_skb_hint = skb; | 872 | tp->retransmit_skb_hint = skb; |
@@ -1256,7 +1256,7 @@ static u8 tcp_sacktag_one(struct sock *sk, | |||
1256 | fack_count += pcount; | 1256 | fack_count += pcount; |
1257 | 1257 | ||
1258 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ | 1258 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ |
1259 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && | 1259 | if (!tcp_is_fack(tp) && tp->lost_skb_hint && |
1260 | before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) | 1260 | before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) |
1261 | tp->lost_cnt_hint += pcount; | 1261 | tp->lost_cnt_hint += pcount; |
1262 | 1262 | ||
@@ -1535,7 +1535,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1535 | if (!before(TCP_SKB_CB(skb)->seq, end_seq)) | 1535 | if (!before(TCP_SKB_CB(skb)->seq, end_seq)) |
1536 | break; | 1536 | break; |
1537 | 1537 | ||
1538 | if ((next_dup != NULL) && | 1538 | if (next_dup && |
1539 | before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) { | 1539 | before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) { |
1540 | in_sack = tcp_match_skb_to_sack(sk, skb, | 1540 | in_sack = tcp_match_skb_to_sack(sk, skb, |
1541 | next_dup->start_seq, | 1541 | next_dup->start_seq, |
@@ -1551,7 +1551,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1551 | if (in_sack <= 0) { | 1551 | if (in_sack <= 0) { |
1552 | tmp = tcp_shift_skb_data(sk, skb, state, | 1552 | tmp = tcp_shift_skb_data(sk, skb, state, |
1553 | start_seq, end_seq, dup_sack); | 1553 | start_seq, end_seq, dup_sack); |
1554 | if (tmp != NULL) { | 1554 | if (tmp) { |
1555 | if (tmp != skb) { | 1555 | if (tmp != skb) { |
1556 | skb = tmp; | 1556 | skb = tmp; |
1557 | continue; | 1557 | continue; |
@@ -1614,7 +1614,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, | |||
1614 | struct tcp_sacktag_state *state, | 1614 | struct tcp_sacktag_state *state, |
1615 | u32 skip_to_seq) | 1615 | u32 skip_to_seq) |
1616 | { | 1616 | { |
1617 | if (next_dup == NULL) | 1617 | if (!next_dup) |
1618 | return skb; | 1618 | return skb; |
1619 | 1619 | ||
1620 | if (before(next_dup->start_seq, skip_to_seq)) { | 1620 | if (before(next_dup->start_seq, skip_to_seq)) { |
@@ -1783,7 +1783,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1783 | if (tcp_highest_sack_seq(tp) == cache->end_seq) { | 1783 | if (tcp_highest_sack_seq(tp) == cache->end_seq) { |
1784 | /* ...but better entrypoint exists! */ | 1784 | /* ...but better entrypoint exists! */ |
1785 | skb = tcp_highest_sack(sk); | 1785 | skb = tcp_highest_sack(sk); |
1786 | if (skb == NULL) | 1786 | if (!skb) |
1787 | break; | 1787 | break; |
1788 | state.fack_count = tp->fackets_out; | 1788 | state.fack_count = tp->fackets_out; |
1789 | cache++; | 1789 | cache++; |
@@ -1798,7 +1798,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1798 | 1798 | ||
1799 | if (!before(start_seq, tcp_highest_sack_seq(tp))) { | 1799 | if (!before(start_seq, tcp_highest_sack_seq(tp))) { |
1800 | skb = tcp_highest_sack(sk); | 1800 | skb = tcp_highest_sack(sk); |
1801 | if (skb == NULL) | 1801 | if (!skb) |
1802 | break; | 1802 | break; |
1803 | state.fack_count = tp->fackets_out; | 1803 | state.fack_count = tp->fackets_out; |
1804 | } | 1804 | } |
@@ -1820,14 +1820,12 @@ advance_sp: | |||
1820 | for (j = 0; j < used_sacks; j++) | 1820 | for (j = 0; j < used_sacks; j++) |
1821 | tp->recv_sack_cache[i++] = sp[j]; | 1821 | tp->recv_sack_cache[i++] = sp[j]; |
1822 | 1822 | ||
1823 | tcp_mark_lost_retrans(sk); | ||
1824 | |||
1825 | tcp_verify_left_out(tp); | ||
1826 | |||
1827 | if ((state.reord < tp->fackets_out) && | 1823 | if ((state.reord < tp->fackets_out) && |
1828 | ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) | 1824 | ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) |
1829 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); | 1825 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); |
1830 | 1826 | ||
1827 | tcp_mark_lost_retrans(sk); | ||
1828 | tcp_verify_left_out(tp); | ||
1831 | out: | 1829 | out: |
1832 | 1830 | ||
1833 | #if FASTRETRANS_DEBUG > 0 | 1831 | #if FASTRETRANS_DEBUG > 0 |
@@ -3099,17 +3097,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3099 | if (sacked & TCPCB_SACKED_RETRANS) | 3097 | if (sacked & TCPCB_SACKED_RETRANS) |
3100 | tp->retrans_out -= acked_pcount; | 3098 | tp->retrans_out -= acked_pcount; |
3101 | flag |= FLAG_RETRANS_DATA_ACKED; | 3099 | flag |= FLAG_RETRANS_DATA_ACKED; |
3102 | } else { | 3100 | } else if (!(sacked & TCPCB_SACKED_ACKED)) { |
3103 | last_ackt = skb->skb_mstamp; | 3101 | last_ackt = skb->skb_mstamp; |
3104 | WARN_ON_ONCE(last_ackt.v64 == 0); | 3102 | WARN_ON_ONCE(last_ackt.v64 == 0); |
3105 | if (!first_ackt.v64) | 3103 | if (!first_ackt.v64) |
3106 | first_ackt = last_ackt; | 3104 | first_ackt = last_ackt; |
3107 | 3105 | ||
3108 | if (!(sacked & TCPCB_SACKED_ACKED)) { | 3106 | reord = min(pkts_acked, reord); |
3109 | reord = min(pkts_acked, reord); | 3107 | if (!after(scb->end_seq, tp->high_seq)) |
3110 | if (!after(scb->end_seq, tp->high_seq)) | 3108 | flag |= FLAG_ORIG_SACK_ACKED; |
3111 | flag |= FLAG_ORIG_SACK_ACKED; | ||
3112 | } | ||
3113 | } | 3109 | } |
3114 | 3110 | ||
3115 | if (sacked & TCPCB_SACKED_ACKED) | 3111 | if (sacked & TCPCB_SACKED_ACKED) |
@@ -3282,6 +3278,24 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp, | |||
3282 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); | 3278 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); |
3283 | } | 3279 | } |
3284 | 3280 | ||
3281 | /* If we update tp->snd_una, also update tp->bytes_acked */ | ||
3282 | static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) | ||
3283 | { | ||
3284 | u32 delta = ack - tp->snd_una; | ||
3285 | |||
3286 | tp->bytes_acked += delta; | ||
3287 | tp->snd_una = ack; | ||
3288 | } | ||
3289 | |||
3290 | /* If we update tp->rcv_nxt, also update tp->bytes_received */ | ||
3291 | static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) | ||
3292 | { | ||
3293 | u32 delta = seq - tp->rcv_nxt; | ||
3294 | |||
3295 | tp->bytes_received += delta; | ||
3296 | tp->rcv_nxt = seq; | ||
3297 | } | ||
3298 | |||
3285 | /* Update our send window. | 3299 | /* Update our send window. |
3286 | * | 3300 | * |
3287 | * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 | 3301 | * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 |
@@ -3317,11 +3331,41 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 | |||
3317 | } | 3331 | } |
3318 | } | 3332 | } |
3319 | 3333 | ||
3320 | tp->snd_una = ack; | 3334 | tcp_snd_una_update(tp, ack); |
3321 | 3335 | ||
3322 | return flag; | 3336 | return flag; |
3323 | } | 3337 | } |
3324 | 3338 | ||
3339 | /* Return true if we're currently rate-limiting out-of-window ACKs and | ||
3340 | * thus shouldn't send a dupack right now. We rate-limit dupacks in | ||
3341 | * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS | ||
3342 | * attacks that send repeated SYNs or ACKs for the same connection. To | ||
3343 | * do this, we do not send a duplicate SYNACK or ACK if the remote | ||
3344 | * endpoint is sending out-of-window SYNs or pure ACKs at a high rate. | ||
3345 | */ | ||
3346 | bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, | ||
3347 | int mib_idx, u32 *last_oow_ack_time) | ||
3348 | { | ||
3349 | /* Data packets without SYNs are not likely part of an ACK loop. */ | ||
3350 | if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) && | ||
3351 | !tcp_hdr(skb)->syn) | ||
3352 | goto not_rate_limited; | ||
3353 | |||
3354 | if (*last_oow_ack_time) { | ||
3355 | s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); | ||
3356 | |||
3357 | if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { | ||
3358 | NET_INC_STATS_BH(net, mib_idx); | ||
3359 | return true; /* rate-limited: don't send yet! */ | ||
3360 | } | ||
3361 | } | ||
3362 | |||
3363 | *last_oow_ack_time = tcp_time_stamp; | ||
3364 | |||
3365 | not_rate_limited: | ||
3366 | return false; /* not rate-limited: go ahead, send dupack now! */ | ||
3367 | } | ||
3368 | |||
3325 | /* RFC 5961 7 [ACK Throttling] */ | 3369 | /* RFC 5961 7 [ACK Throttling] */ |
3326 | static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) | 3370 | static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) |
3327 | { | 3371 | { |
@@ -3469,7 +3513,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3469 | * Note, we use the fact that SND.UNA>=SND.WL2. | 3513 | * Note, we use the fact that SND.UNA>=SND.WL2. |
3470 | */ | 3514 | */ |
3471 | tcp_update_wl(tp, ack_seq); | 3515 | tcp_update_wl(tp, ack_seq); |
3472 | tp->snd_una = ack; | 3516 | tcp_snd_una_update(tp, ack); |
3473 | flag |= FLAG_WIN_UPDATE; | 3517 | flag |= FLAG_WIN_UPDATE; |
3474 | 3518 | ||
3475 | tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); | 3519 | tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); |
@@ -3573,6 +3617,23 @@ old_ack: | |||
3573 | return 0; | 3617 | return 0; |
3574 | } | 3618 | } |
3575 | 3619 | ||
3620 | static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, | ||
3621 | bool syn, struct tcp_fastopen_cookie *foc, | ||
3622 | bool exp_opt) | ||
3623 | { | ||
3624 | /* Valid only in SYN or SYN-ACK with an even length. */ | ||
3625 | if (!foc || !syn || len < 0 || (len & 1)) | ||
3626 | return; | ||
3627 | |||
3628 | if (len >= TCP_FASTOPEN_COOKIE_MIN && | ||
3629 | len <= TCP_FASTOPEN_COOKIE_MAX) | ||
3630 | memcpy(foc->val, cookie, len); | ||
3631 | else if (len != 0) | ||
3632 | len = -1; | ||
3633 | foc->len = len; | ||
3634 | foc->exp = exp_opt; | ||
3635 | } | ||
3636 | |||
3576 | /* Look for tcp options. Normally only called on SYN and SYNACK packets. | 3637 | /* Look for tcp options. Normally only called on SYN and SYNACK packets. |
3577 | * But, this can also be called on packets in the established flow when | 3638 | * But, this can also be called on packets in the established flow when |
3578 | * the fast version below fails. | 3639 | * the fast version below fails. |
@@ -3662,21 +3723,22 @@ void tcp_parse_options(const struct sk_buff *skb, | |||
3662 | */ | 3723 | */ |
3663 | break; | 3724 | break; |
3664 | #endif | 3725 | #endif |
3726 | case TCPOPT_FASTOPEN: | ||
3727 | tcp_parse_fastopen_option( | ||
3728 | opsize - TCPOLEN_FASTOPEN_BASE, | ||
3729 | ptr, th->syn, foc, false); | ||
3730 | break; | ||
3731 | |||
3665 | case TCPOPT_EXP: | 3732 | case TCPOPT_EXP: |
3666 | /* Fast Open option shares code 254 using a | 3733 | /* Fast Open option shares code 254 using a |
3667 | * 16 bits magic number. It's valid only in | 3734 | * 16 bits magic number. |
3668 | * SYN or SYN-ACK with an even size. | ||
3669 | */ | 3735 | */ |
3670 | if (opsize < TCPOLEN_EXP_FASTOPEN_BASE || | 3736 | if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE && |
3671 | get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC || | 3737 | get_unaligned_be16(ptr) == |
3672 | foc == NULL || !th->syn || (opsize & 1)) | 3738 | TCPOPT_FASTOPEN_MAGIC) |
3673 | break; | 3739 | tcp_parse_fastopen_option(opsize - |
3674 | foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE; | 3740 | TCPOLEN_EXP_FASTOPEN_BASE, |
3675 | if (foc->len >= TCP_FASTOPEN_COOKIE_MIN && | 3741 | ptr + 2, th->syn, foc, true); |
3676 | foc->len <= TCP_FASTOPEN_COOKIE_MAX) | ||
3677 | memcpy(foc->val, ptr + 2, foc->len); | ||
3678 | else if (foc->len != 0) | ||
3679 | foc->len = -1; | ||
3680 | break; | 3742 | break; |
3681 | 3743 | ||
3682 | } | 3744 | } |
@@ -4190,7 +4252,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4190 | 4252 | ||
4191 | tail = skb_peek_tail(&sk->sk_receive_queue); | 4253 | tail = skb_peek_tail(&sk->sk_receive_queue); |
4192 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); | 4254 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); |
4193 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4255 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
4194 | if (!eaten) | 4256 | if (!eaten) |
4195 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4257 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
4196 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | 4258 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
@@ -4358,7 +4420,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int | |||
4358 | __skb_pull(skb, hdrlen); | 4420 | __skb_pull(skb, hdrlen); |
4359 | eaten = (tail && | 4421 | eaten = (tail && |
4360 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; | 4422 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; |
4361 | tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4423 | tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); |
4362 | if (!eaten) { | 4424 | if (!eaten) { |
4363 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4425 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
4364 | skb_set_owner_r(skb, sk); | 4426 | skb_set_owner_r(skb, sk); |
@@ -4451,7 +4513,7 @@ queue_and_out: | |||
4451 | 4513 | ||
4452 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); | 4514 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); |
4453 | } | 4515 | } |
4454 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4516 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
4455 | if (skb->len) | 4517 | if (skb->len) |
4456 | tcp_event_data_recv(sk, skb); | 4518 | tcp_event_data_recv(sk, skb); |
4457 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | 4519 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
@@ -4640,7 +4702,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | |||
4640 | struct sk_buff *head; | 4702 | struct sk_buff *head; |
4641 | u32 start, end; | 4703 | u32 start, end; |
4642 | 4704 | ||
4643 | if (skb == NULL) | 4705 | if (!skb) |
4644 | return; | 4706 | return; |
4645 | 4707 | ||
4646 | start = TCP_SKB_CB(skb)->seq; | 4708 | start = TCP_SKB_CB(skb)->seq; |
@@ -4799,6 +4861,8 @@ static void tcp_check_space(struct sock *sk) | |||
4799 | { | 4861 | { |
4800 | if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { | 4862 | if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { |
4801 | sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); | 4863 | sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); |
4864 | /* pairs with tcp_poll() */ | ||
4865 | smp_mb__after_atomic(); | ||
4802 | if (sk->sk_socket && | 4866 | if (sk->sk_socket && |
4803 | test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) | 4867 | test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) |
4804 | tcp_new_space(sk); | 4868 | tcp_new_space(sk); |
@@ -5095,7 +5159,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5095 | { | 5159 | { |
5096 | struct tcp_sock *tp = tcp_sk(sk); | 5160 | struct tcp_sock *tp = tcp_sk(sk); |
5097 | 5161 | ||
5098 | if (unlikely(sk->sk_rx_dst == NULL)) | 5162 | if (unlikely(!sk->sk_rx_dst)) |
5099 | inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); | 5163 | inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); |
5100 | /* | 5164 | /* |
5101 | * Header prediction. | 5165 | * Header prediction. |
@@ -5197,7 +5261,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5197 | tcp_rcv_rtt_measure_ts(sk, skb); | 5261 | tcp_rcv_rtt_measure_ts(sk, skb); |
5198 | 5262 | ||
5199 | __skb_pull(skb, tcp_header_len); | 5263 | __skb_pull(skb, tcp_header_len); |
5200 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 5264 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
5201 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); | 5265 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); |
5202 | eaten = 1; | 5266 | eaten = 1; |
5203 | } | 5267 | } |
@@ -5292,7 +5356,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) | |||
5292 | 5356 | ||
5293 | tcp_set_state(sk, TCP_ESTABLISHED); | 5357 | tcp_set_state(sk, TCP_ESTABLISHED); |
5294 | 5358 | ||
5295 | if (skb != NULL) { | 5359 | if (skb) { |
5296 | icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); | 5360 | icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); |
5297 | security_inet_conn_established(sk, skb); | 5361 | security_inet_conn_established(sk, skb); |
5298 | } | 5362 | } |
@@ -5330,8 +5394,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, | |||
5330 | { | 5394 | { |
5331 | struct tcp_sock *tp = tcp_sk(sk); | 5395 | struct tcp_sock *tp = tcp_sk(sk); |
5332 | struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; | 5396 | struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; |
5333 | u16 mss = tp->rx_opt.mss_clamp; | 5397 | u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; |
5334 | bool syn_drop; | 5398 | bool syn_drop = false; |
5335 | 5399 | ||
5336 | if (mss == tp->rx_opt.user_mss) { | 5400 | if (mss == tp->rx_opt.user_mss) { |
5337 | struct tcp_options_received opt; | 5401 | struct tcp_options_received opt; |
@@ -5343,16 +5407,25 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, | |||
5343 | mss = opt.mss_clamp; | 5407 | mss = opt.mss_clamp; |
5344 | } | 5408 | } |
5345 | 5409 | ||
5346 | if (!tp->syn_fastopen) /* Ignore an unsolicited cookie */ | 5410 | if (!tp->syn_fastopen) { |
5411 | /* Ignore an unsolicited cookie */ | ||
5347 | cookie->len = -1; | 5412 | cookie->len = -1; |
5413 | } else if (tp->total_retrans) { | ||
5414 | /* SYN timed out and the SYN-ACK neither has a cookie nor | ||
5415 | * acknowledges data. Presumably the remote received only | ||
5416 | * the retransmitted (regular) SYNs: either the original | ||
5417 | * SYN-data or the corresponding SYN-ACK was dropped. | ||
5418 | */ | ||
5419 | syn_drop = (cookie->len < 0 && data); | ||
5420 | } else if (cookie->len < 0 && !tp->syn_data) { | ||
5421 | /* We requested a cookie but didn't get it. If we did not use | ||
5422 | * the (old) exp opt format then try so next time (try_exp=1). | ||
5423 | * Otherwise we go back to use the RFC7413 opt (try_exp=2). | ||
5424 | */ | ||
5425 | try_exp = tp->syn_fastopen_exp ? 2 : 1; | ||
5426 | } | ||
5348 | 5427 | ||
5349 | /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably | 5428 | tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); |
5350 | * the remote receives only the retransmitted (regular) SYNs: either | ||
5351 | * the original SYN-data or the corresponding SYN-ACK is lost. | ||
5352 | */ | ||
5353 | syn_drop = (cookie->len <= 0 && data && tp->total_retrans); | ||
5354 | |||
5355 | tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); | ||
5356 | 5429 | ||
5357 | if (data) { /* Retransmit unacked data in SYN */ | 5430 | if (data) { /* Retransmit unacked data in SYN */ |
5358 | tcp_for_write_queue_from(data, sk) { | 5431 | tcp_for_write_queue_from(data, sk) { |
@@ -5661,11 +5734,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5661 | } | 5734 | } |
5662 | 5735 | ||
5663 | req = tp->fastopen_rsk; | 5736 | req = tp->fastopen_rsk; |
5664 | if (req != NULL) { | 5737 | if (req) { |
5665 | WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && | 5738 | WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && |
5666 | sk->sk_state != TCP_FIN_WAIT1); | 5739 | sk->sk_state != TCP_FIN_WAIT1); |
5667 | 5740 | ||
5668 | if (tcp_check_req(sk, skb, req, NULL, true) == NULL) | 5741 | if (!tcp_check_req(sk, skb, req, true)) |
5669 | goto discard; | 5742 | goto discard; |
5670 | } | 5743 | } |
5671 | 5744 | ||
@@ -5751,7 +5824,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5751 | * ACK we have received, this would have acknowledged | 5824 | * ACK we have received, this would have acknowledged |
5752 | * our SYNACK so stop the SYNACK timer. | 5825 | * our SYNACK so stop the SYNACK timer. |
5753 | */ | 5826 | */ |
5754 | if (req != NULL) { | 5827 | if (req) { |
5755 | /* Return RST if ack_seq is invalid. | 5828 | /* Return RST if ack_seq is invalid. |
5756 | * Note that RFC793 only says to generate a | 5829 | * Note that RFC793 only says to generate a |
5757 | * DUPACK for it but for TCP Fast Open it seems | 5830 | * DUPACK for it but for TCP Fast Open it seems |
@@ -5913,6 +5986,80 @@ static void tcp_ecn_create_request(struct request_sock *req, | |||
5913 | inet_rsk(req)->ecn_ok = 1; | 5986 | inet_rsk(req)->ecn_ok = 1; |
5914 | } | 5987 | } |
5915 | 5988 | ||
5989 | static void tcp_openreq_init(struct request_sock *req, | ||
5990 | const struct tcp_options_received *rx_opt, | ||
5991 | struct sk_buff *skb, const struct sock *sk) | ||
5992 | { | ||
5993 | struct inet_request_sock *ireq = inet_rsk(req); | ||
5994 | |||
5995 | req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ | ||
5996 | req->cookie_ts = 0; | ||
5997 | tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; | ||
5998 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; | ||
5999 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
6000 | tcp_rsk(req)->last_oow_ack_time = 0; | ||
6001 | req->mss = rx_opt->mss_clamp; | ||
6002 | req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; | ||
6003 | ireq->tstamp_ok = rx_opt->tstamp_ok; | ||
6004 | ireq->sack_ok = rx_opt->sack_ok; | ||
6005 | ireq->snd_wscale = rx_opt->snd_wscale; | ||
6006 | ireq->wscale_ok = rx_opt->wscale_ok; | ||
6007 | ireq->acked = 0; | ||
6008 | ireq->ecn_ok = 0; | ||
6009 | ireq->ir_rmt_port = tcp_hdr(skb)->source; | ||
6010 | ireq->ir_num = ntohs(tcp_hdr(skb)->dest); | ||
6011 | ireq->ir_mark = inet_request_mark(sk, skb); | ||
6012 | } | ||
6013 | |||
6014 | struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, | ||
6015 | struct sock *sk_listener) | ||
6016 | { | ||
6017 | struct request_sock *req = reqsk_alloc(ops, sk_listener); | ||
6018 | |||
6019 | if (req) { | ||
6020 | struct inet_request_sock *ireq = inet_rsk(req); | ||
6021 | |||
6022 | kmemcheck_annotate_bitfield(ireq, flags); | ||
6023 | ireq->opt = NULL; | ||
6024 | atomic64_set(&ireq->ir_cookie, 0); | ||
6025 | ireq->ireq_state = TCP_NEW_SYN_RECV; | ||
6026 | write_pnet(&ireq->ireq_net, sock_net(sk_listener)); | ||
6027 | ireq->ireq_family = sk_listener->sk_family; | ||
6028 | } | ||
6029 | |||
6030 | return req; | ||
6031 | } | ||
6032 | EXPORT_SYMBOL(inet_reqsk_alloc); | ||
6033 | |||
6034 | /* | ||
6035 | * Return true if a syncookie should be sent | ||
6036 | */ | ||
6037 | static bool tcp_syn_flood_action(struct sock *sk, | ||
6038 | const struct sk_buff *skb, | ||
6039 | const char *proto) | ||
6040 | { | ||
6041 | const char *msg = "Dropping request"; | ||
6042 | bool want_cookie = false; | ||
6043 | struct listen_sock *lopt; | ||
6044 | |||
6045 | #ifdef CONFIG_SYN_COOKIES | ||
6046 | if (sysctl_tcp_syncookies) { | ||
6047 | msg = "Sending cookies"; | ||
6048 | want_cookie = true; | ||
6049 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); | ||
6050 | } else | ||
6051 | #endif | ||
6052 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); | ||
6053 | |||
6054 | lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; | ||
6055 | if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { | ||
6056 | lopt->synflood_warned = 1; | ||
6057 | pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", | ||
6058 | proto, ntohs(tcp_hdr(skb)->dest), msg); | ||
6059 | } | ||
6060 | return want_cookie; | ||
6061 | } | ||
6062 | |||
5916 | int tcp_conn_request(struct request_sock_ops *rsk_ops, | 6063 | int tcp_conn_request(struct request_sock_ops *rsk_ops, |
5917 | const struct tcp_request_sock_ops *af_ops, | 6064 | const struct tcp_request_sock_ops *af_ops, |
5918 | struct sock *sk, struct sk_buff *skb) | 6065 | struct sock *sk, struct sk_buff *skb) |
@@ -5950,7 +6097,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
5950 | goto drop; | 6097 | goto drop; |
5951 | } | 6098 | } |
5952 | 6099 | ||
5953 | req = inet_reqsk_alloc(rsk_ops); | 6100 | req = inet_reqsk_alloc(rsk_ops, sk); |
5954 | if (!req) | 6101 | if (!req) |
5955 | goto drop; | 6102 | goto drop; |
5956 | 6103 | ||
@@ -5967,6 +6114,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
5967 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; | 6114 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; |
5968 | tcp_openreq_init(req, &tmp_opt, skb, sk); | 6115 | tcp_openreq_init(req, &tmp_opt, skb, sk); |
5969 | 6116 | ||
6117 | /* Note: tcp_v6_init_req() might override ir_iif for link locals */ | ||
6118 | inet_rsk(req)->ir_iif = sk->sk_bound_dev_if; | ||
6119 | |||
5970 | af_ops->init_req(req, sk, skb); | 6120 | af_ops->init_req(req, sk, skb); |
5971 | 6121 | ||
5972 | if (security_inet_conn_request(sk, skb, req)) | 6122 | if (security_inet_conn_request(sk, skb, req)) |
@@ -6039,7 +6189,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
6039 | if (err || want_cookie) | 6189 | if (err || want_cookie) |
6040 | goto drop_and_free; | 6190 | goto drop_and_free; |
6041 | 6191 | ||
6042 | tcp_rsk(req)->listener = NULL; | 6192 | tcp_rsk(req)->tfo_listener = false; |
6043 | af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | 6193 | af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
6044 | } | 6194 | } |
6045 | 6195 | ||