aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c103
1 files changed, 83 insertions, 20 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d86784be7ab3..57ae96a04220 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -140,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
140 * "len" is invariant segment length, including TCP header. 140 * "len" is invariant segment length, including TCP header.
141 */ 141 */
142 len += skb->data - skb_transport_header(skb); 142 len += skb->data - skb_transport_header(skb);
143 if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || 143 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
144 /* If PSH is not set, packet should be 144 /* If PSH is not set, packet should be
145 * full sized, provided peer TCP is not badly broken. 145 * full sized, provided peer TCP is not badly broken.
146 * This observation (if it is correct 8)) allows 146 * This observation (if it is correct 8)) allows
@@ -411,7 +411,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
411 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); 411 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
412 412
413 hint = min(hint, tp->rcv_wnd / 2); 413 hint = min(hint, tp->rcv_wnd / 2);
414 hint = min(hint, TCP_MIN_RCVMSS); 414 hint = min(hint, TCP_MSS_DEFAULT);
415 hint = max(hint, TCP_MIN_MSS); 415 hint = max(hint, TCP_MIN_MSS);
416 416
417 inet_csk(sk)->icsk_ack.rcv_mss = hint; 417 inet_csk(sk)->icsk_ack.rcv_mss = hint;
@@ -2300,7 +2300,7 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
2300 * they differ. Since neither occurs due to loss, TCP should really 2300 * they differ. Since neither occurs due to loss, TCP should really
2301 * ignore them. 2301 * ignore them.
2302 */ 2302 */
2303static inline int tcp_dupack_heurestics(struct tcp_sock *tp) 2303static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2304{ 2304{
2305 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2305 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2306} 2306}
@@ -2425,7 +2425,7 @@ static int tcp_time_to_recover(struct sock *sk)
2425 return 1; 2425 return 1;
2426 2426
2427 /* Not-A-Trick#2 : Classic rule... */ 2427 /* Not-A-Trick#2 : Classic rule... */
2428 if (tcp_dupack_heurestics(tp) > tp->reordering) 2428 if (tcp_dupack_heuristics(tp) > tp->reordering)
2429 return 1; 2429 return 1;
2430 2430
2431 /* Trick#3 : when we use RFC2988 timer restart, fast 2431 /* Trick#3 : when we use RFC2988 timer restart, fast
@@ -3698,7 +3698,7 @@ old_ack:
3698 * the fast version below fails. 3698 * the fast version below fails.
3699 */ 3699 */
3700void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, 3700void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3701 int estab) 3701 u8 **hvpp, int estab, struct dst_entry *dst)
3702{ 3702{
3703 unsigned char *ptr; 3703 unsigned char *ptr;
3704 struct tcphdr *th = tcp_hdr(skb); 3704 struct tcphdr *th = tcp_hdr(skb);
@@ -3737,7 +3737,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3737 break; 3737 break;
3738 case TCPOPT_WINDOW: 3738 case TCPOPT_WINDOW:
3739 if (opsize == TCPOLEN_WINDOW && th->syn && 3739 if (opsize == TCPOLEN_WINDOW && th->syn &&
3740 !estab && sysctl_tcp_window_scaling) { 3740 !estab && sysctl_tcp_window_scaling &&
3741 !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) {
3741 __u8 snd_wscale = *(__u8 *)ptr; 3742 __u8 snd_wscale = *(__u8 *)ptr;
3742 opt_rx->wscale_ok = 1; 3743 opt_rx->wscale_ok = 1;
3743 if (snd_wscale > 14) { 3744 if (snd_wscale > 14) {
@@ -3753,7 +3754,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3753 case TCPOPT_TIMESTAMP: 3754 case TCPOPT_TIMESTAMP:
3754 if ((opsize == TCPOLEN_TIMESTAMP) && 3755 if ((opsize == TCPOLEN_TIMESTAMP) &&
3755 ((estab && opt_rx->tstamp_ok) || 3756 ((estab && opt_rx->tstamp_ok) ||
3756 (!estab && sysctl_tcp_timestamps))) { 3757 (!estab && sysctl_tcp_timestamps &&
3758 !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) {
3757 opt_rx->saw_tstamp = 1; 3759 opt_rx->saw_tstamp = 1;
3758 opt_rx->rcv_tsval = get_unaligned_be32(ptr); 3760 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3759 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); 3761 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -3761,7 +3763,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3761 break; 3763 break;
3762 case TCPOPT_SACK_PERM: 3764 case TCPOPT_SACK_PERM:
3763 if (opsize == TCPOLEN_SACK_PERM && th->syn && 3765 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3764 !estab && sysctl_tcp_sack) { 3766 !estab && sysctl_tcp_sack &&
3767 !dst_feature(dst, RTAX_FEATURE_NO_SACK)) {
3765 opt_rx->sack_ok = 1; 3768 opt_rx->sack_ok = 1;
3766 tcp_sack_reset(opt_rx); 3769 tcp_sack_reset(opt_rx);
3767 } 3770 }
@@ -3782,7 +3785,30 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3782 */ 3785 */
3783 break; 3786 break;
3784#endif 3787#endif
3785 } 3788 case TCPOPT_COOKIE:
3789 /* This option is variable length.
3790 */
3791 switch (opsize) {
3792 case TCPOLEN_COOKIE_BASE:
3793 /* not yet implemented */
3794 break;
3795 case TCPOLEN_COOKIE_PAIR:
3796 /* not yet implemented */
3797 break;
3798 case TCPOLEN_COOKIE_MIN+0:
3799 case TCPOLEN_COOKIE_MIN+2:
3800 case TCPOLEN_COOKIE_MIN+4:
3801 case TCPOLEN_COOKIE_MIN+6:
3802 case TCPOLEN_COOKIE_MAX:
3803 /* 16-bit multiple */
3804 opt_rx->cookie_plus = opsize;
3805 *hvpp = ptr;
3806 default:
3807 /* ignore option */
3808 break;
3809 };
3810 break;
3811 };
3786 3812
3787 ptr += opsize-2; 3813 ptr += opsize-2;
3788 length -= opsize; 3814 length -= opsize;
@@ -3810,17 +3836,20 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3810 * If it is wrong it falls back on tcp_parse_options(). 3836 * If it is wrong it falls back on tcp_parse_options().
3811 */ 3837 */
3812static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, 3838static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3813 struct tcp_sock *tp) 3839 struct tcp_sock *tp, u8 **hvpp)
3814{ 3840{
3815 if (th->doff == sizeof(struct tcphdr) >> 2) { 3841 /* In the spirit of fast parsing, compare doff directly to constant
3842 * values. Because equality is used, short doff can be ignored here.
3843 */
3844 if (th->doff == (sizeof(*th) / 4)) {
3816 tp->rx_opt.saw_tstamp = 0; 3845 tp->rx_opt.saw_tstamp = 0;
3817 return 0; 3846 return 0;
3818 } else if (tp->rx_opt.tstamp_ok && 3847 } else if (tp->rx_opt.tstamp_ok &&
3819 th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { 3848 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3820 if (tcp_parse_aligned_timestamp(tp, th)) 3849 if (tcp_parse_aligned_timestamp(tp, th))
3821 return 1; 3850 return 1;
3822 } 3851 }
3823 tcp_parse_options(skb, &tp->rx_opt, 1); 3852 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
3824 return 1; 3853 return 1;
3825} 3854}
3826 3855
@@ -4075,8 +4104,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4075static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) 4104static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4076{ 4105{
4077 struct tcp_sock *tp = tcp_sk(sk); 4106 struct tcp_sock *tp = tcp_sk(sk);
4107 struct dst_entry *dst = __sk_dst_get(sk);
4078 4108
4079 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4109 if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
4110 !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
4080 int mib_idx; 4111 int mib_idx;
4081 4112
4082 if (before(seq, tp->rcv_nxt)) 4113 if (before(seq, tp->rcv_nxt))
@@ -4105,13 +4136,15 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4105static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) 4136static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
4106{ 4137{
4107 struct tcp_sock *tp = tcp_sk(sk); 4138 struct tcp_sock *tp = tcp_sk(sk);
4139 struct dst_entry *dst = __sk_dst_get(sk);
4108 4140
4109 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 4141 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4110 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { 4142 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4111 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 4143 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4112 tcp_enter_quickack_mode(sk); 4144 tcp_enter_quickack_mode(sk);
4113 4145
4114 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4146 if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
4147 !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
4115 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 4148 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4116 4149
4117 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) 4150 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -4845,11 +4878,11 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4845 struct tcp_sock *tp = tcp_sk(sk); 4878 struct tcp_sock *tp = tcp_sk(sk);
4846 4879
4847 /* More than one full frame received... */ 4880 /* More than one full frame received... */
4848 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss 4881 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
4849 /* ... and right edge of window advances far enough. 4882 /* ... and right edge of window advances far enough.
4850 * (tcp_recvmsg() will send ACK otherwise). Or... 4883 * (tcp_recvmsg() will send ACK otherwise). Or...
4851 */ 4884 */
4852 && __tcp_select_window(sk) >= tp->rcv_wnd) || 4885 __tcp_select_window(sk) >= tp->rcv_wnd) ||
4853 /* We ACK each frame or... */ 4886 /* We ACK each frame or... */
4854 tcp_in_quickack_mode(sk) || 4887 tcp_in_quickack_mode(sk) ||
4855 /* We have out of order data. */ 4888 /* We have out of order data. */
@@ -5070,10 +5103,12 @@ out:
5070static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, 5103static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5071 struct tcphdr *th, int syn_inerr) 5104 struct tcphdr *th, int syn_inerr)
5072{ 5105{
5106 u8 *hash_location;
5073 struct tcp_sock *tp = tcp_sk(sk); 5107 struct tcp_sock *tp = tcp_sk(sk);
5074 5108
5075 /* RFC1323: H1. Apply PAWS check first. */ 5109 /* RFC1323: H1. Apply PAWS check first. */
5076 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && 5110 if (tcp_fast_parse_options(skb, th, tp, &hash_location) &&
5111 tp->rx_opt.saw_tstamp &&
5077 tcp_paws_discard(sk, skb)) { 5112 tcp_paws_discard(sk, skb)) {
5078 if (!th->rst) { 5113 if (!th->rst) {
5079 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); 5114 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@@ -5361,11 +5396,14 @@ discard:
5361static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5396static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5362 struct tcphdr *th, unsigned len) 5397 struct tcphdr *th, unsigned len)
5363{ 5398{
5364 struct tcp_sock *tp = tcp_sk(sk); 5399 u8 *hash_location;
5365 struct inet_connection_sock *icsk = inet_csk(sk); 5400 struct inet_connection_sock *icsk = inet_csk(sk);
5401 struct tcp_sock *tp = tcp_sk(sk);
5402 struct dst_entry *dst = __sk_dst_get(sk);
5403 struct tcp_cookie_values *cvp = tp->cookie_values;
5366 int saved_clamp = tp->rx_opt.mss_clamp; 5404 int saved_clamp = tp->rx_opt.mss_clamp;
5367 5405
5368 tcp_parse_options(skb, &tp->rx_opt, 0); 5406 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst);
5369 5407
5370 if (th->ack) { 5408 if (th->ack) {
5371 /* rfc793: 5409 /* rfc793:
@@ -5462,6 +5500,31 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5462 * Change state from SYN-SENT only after copied_seq 5500 * Change state from SYN-SENT only after copied_seq
5463 * is initialized. */ 5501 * is initialized. */
5464 tp->copied_seq = tp->rcv_nxt; 5502 tp->copied_seq = tp->rcv_nxt;
5503
5504 if (cvp != NULL &&
5505 cvp->cookie_pair_size > 0 &&
5506 tp->rx_opt.cookie_plus > 0) {
5507 int cookie_size = tp->rx_opt.cookie_plus
5508 - TCPOLEN_COOKIE_BASE;
5509 int cookie_pair_size = cookie_size
5510 + cvp->cookie_desired;
5511
5512 /* A cookie extension option was sent and returned.
5513 * Note that each incoming SYNACK replaces the
5514 * Responder cookie. The initial exchange is most
5515 * fragile, as protection against spoofing relies
5516 * entirely upon the sequence and timestamp (above).
5517 * This replacement strategy allows the correct pair to
5518 * pass through, while any others will be filtered via
5519 * Responder verification later.
5520 */
5521 if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
5522 memcpy(&cvp->cookie_pair[cvp->cookie_desired],
5523 hash_location, cookie_size);
5524 cvp->cookie_pair_size = cookie_pair_size;
5525 }
5526 }
5527
5465 smp_mb(); 5528 smp_mb();
5466 tcp_set_state(sk, TCP_ESTABLISHED); 5529 tcp_set_state(sk, TCP_ESTABLISHED);
5467 5530