aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2009-12-15 04:29:06 -0500
committerTakashi Iwai <tiwai@suse.de>2009-12-15 04:29:06 -0500
commit709334c87dbdb44150ce436b3d13c814db0dcae9 (patch)
tree5861a45f70c1f283720337abd864498f5afb3dbe /net/ipv4/tcp_input.c
parent0d64b568fcd48b133721c1d322e7c51d85eb12df (diff)
parentf74890277a196949e4004fe2955e1d4fb3930f98 (diff)
Merge branch 'fixes' of git://git.alsa-project.org/alsa-kernel into for-linus
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c138
1 files changed, 115 insertions, 23 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d86784be7ab3..12cab7d74dba 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -140,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
140 * "len" is invariant segment length, including TCP header. 140 * "len" is invariant segment length, including TCP header.
141 */ 141 */
142 len += skb->data - skb_transport_header(skb); 142 len += skb->data - skb_transport_header(skb);
143 if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || 143 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
144 /* If PSH is not set, packet should be 144 /* If PSH is not set, packet should be
145 * full sized, provided peer TCP is not badly broken. 145 * full sized, provided peer TCP is not badly broken.
146 * This observation (if it is correct 8)) allows 146 * This observation (if it is correct 8)) allows
@@ -411,7 +411,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
411 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); 411 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
412 412
413 hint = min(hint, tp->rcv_wnd / 2); 413 hint = min(hint, tp->rcv_wnd / 2);
414 hint = min(hint, TCP_MIN_RCVMSS); 414 hint = min(hint, TCP_MSS_DEFAULT);
415 hint = max(hint, TCP_MIN_MSS); 415 hint = max(hint, TCP_MIN_MSS);
416 416
417 inet_csk(sk)->icsk_ack.rcv_mss = hint; 417 inet_csk(sk)->icsk_ack.rcv_mss = hint;
@@ -2300,7 +2300,7 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
2300 * they differ. Since neither occurs due to loss, TCP should really 2300 * they differ. Since neither occurs due to loss, TCP should really
2301 * ignore them. 2301 * ignore them.
2302 */ 2302 */
2303static inline int tcp_dupack_heurestics(struct tcp_sock *tp) 2303static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2304{ 2304{
2305 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2305 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2306} 2306}
@@ -2425,7 +2425,7 @@ static int tcp_time_to_recover(struct sock *sk)
2425 return 1; 2425 return 1;
2426 2426
2427 /* Not-A-Trick#2 : Classic rule... */ 2427 /* Not-A-Trick#2 : Classic rule... */
2428 if (tcp_dupack_heurestics(tp) > tp->reordering) 2428 if (tcp_dupack_heuristics(tp) > tp->reordering)
2429 return 1; 2429 return 1;
2430 2430
2431 /* Trick#3 : when we use RFC2988 timer restart, fast 2431 /* Trick#3 : when we use RFC2988 timer restart, fast
@@ -2717,6 +2717,35 @@ static void tcp_try_undo_dsack(struct sock *sk)
2717 } 2717 }
2718} 2718}
2719 2719
2720/* We can clear retrans_stamp when there are no retransmissions in the
2721 * window. It would seem that it is trivially available for us in
2722 * tp->retrans_out, however, that kind of assumptions doesn't consider
2723 * what will happen if errors occur when sending retransmission for the
2724 * second time. ...It could the that such segment has only
2725 * TCPCB_EVER_RETRANS set at the present time. It seems that checking
2726 * the head skb is enough except for some reneging corner cases that
2727 * are not worth the effort.
2728 *
2729 * Main reason for all this complexity is the fact that connection dying
2730 * time now depends on the validity of the retrans_stamp, in particular,
2731 * that successive retransmissions of a segment must not advance
2732 * retrans_stamp under any conditions.
2733 */
2734static int tcp_any_retrans_done(struct sock *sk)
2735{
2736 struct tcp_sock *tp = tcp_sk(sk);
2737 struct sk_buff *skb;
2738
2739 if (tp->retrans_out)
2740 return 1;
2741
2742 skb = tcp_write_queue_head(sk);
2743 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2744 return 1;
2745
2746 return 0;
2747}
2748
2720/* Undo during fast recovery after partial ACK. */ 2749/* Undo during fast recovery after partial ACK. */
2721 2750
2722static int tcp_try_undo_partial(struct sock *sk, int acked) 2751static int tcp_try_undo_partial(struct sock *sk, int acked)
@@ -2729,7 +2758,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2729 /* Plain luck! Hole if filled with delayed 2758 /* Plain luck! Hole if filled with delayed
2730 * packet, rather than with a retransmit. 2759 * packet, rather than with a retransmit.
2731 */ 2760 */
2732 if (tp->retrans_out == 0) 2761 if (!tcp_any_retrans_done(sk))
2733 tp->retrans_stamp = 0; 2762 tp->retrans_stamp = 0;
2734 2763
2735 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); 2764 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
@@ -2788,7 +2817,7 @@ static void tcp_try_keep_open(struct sock *sk)
2788 struct tcp_sock *tp = tcp_sk(sk); 2817 struct tcp_sock *tp = tcp_sk(sk);
2789 int state = TCP_CA_Open; 2818 int state = TCP_CA_Open;
2790 2819
2791 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) 2820 if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker)
2792 state = TCP_CA_Disorder; 2821 state = TCP_CA_Disorder;
2793 2822
2794 if (inet_csk(sk)->icsk_ca_state != state) { 2823 if (inet_csk(sk)->icsk_ca_state != state) {
@@ -2803,7 +2832,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2803 2832
2804 tcp_verify_left_out(tp); 2833 tcp_verify_left_out(tp);
2805 2834
2806 if (!tp->frto_counter && tp->retrans_out == 0) 2835 if (!tp->frto_counter && !tcp_any_retrans_done(sk))
2807 tp->retrans_stamp = 0; 2836 tp->retrans_stamp = 0;
2808 2837
2809 if (flag & FLAG_ECE) 2838 if (flag & FLAG_ECE)
@@ -3698,7 +3727,7 @@ old_ack:
3698 * the fast version below fails. 3727 * the fast version below fails.
3699 */ 3728 */
3700void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, 3729void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3701 int estab) 3730 u8 **hvpp, int estab, struct dst_entry *dst)
3702{ 3731{
3703 unsigned char *ptr; 3732 unsigned char *ptr;
3704 struct tcphdr *th = tcp_hdr(skb); 3733 struct tcphdr *th = tcp_hdr(skb);
@@ -3737,7 +3766,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3737 break; 3766 break;
3738 case TCPOPT_WINDOW: 3767 case TCPOPT_WINDOW:
3739 if (opsize == TCPOLEN_WINDOW && th->syn && 3768 if (opsize == TCPOLEN_WINDOW && th->syn &&
3740 !estab && sysctl_tcp_window_scaling) { 3769 !estab && sysctl_tcp_window_scaling &&
3770 !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) {
3741 __u8 snd_wscale = *(__u8 *)ptr; 3771 __u8 snd_wscale = *(__u8 *)ptr;
3742 opt_rx->wscale_ok = 1; 3772 opt_rx->wscale_ok = 1;
3743 if (snd_wscale > 14) { 3773 if (snd_wscale > 14) {
@@ -3753,7 +3783,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3753 case TCPOPT_TIMESTAMP: 3783 case TCPOPT_TIMESTAMP:
3754 if ((opsize == TCPOLEN_TIMESTAMP) && 3784 if ((opsize == TCPOLEN_TIMESTAMP) &&
3755 ((estab && opt_rx->tstamp_ok) || 3785 ((estab && opt_rx->tstamp_ok) ||
3756 (!estab && sysctl_tcp_timestamps))) { 3786 (!estab && sysctl_tcp_timestamps &&
3787 !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) {
3757 opt_rx->saw_tstamp = 1; 3788 opt_rx->saw_tstamp = 1;
3758 opt_rx->rcv_tsval = get_unaligned_be32(ptr); 3789 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3759 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); 3790 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -3761,7 +3792,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3761 break; 3792 break;
3762 case TCPOPT_SACK_PERM: 3793 case TCPOPT_SACK_PERM:
3763 if (opsize == TCPOLEN_SACK_PERM && th->syn && 3794 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3764 !estab && sysctl_tcp_sack) { 3795 !estab && sysctl_tcp_sack &&
3796 !dst_feature(dst, RTAX_FEATURE_NO_SACK)) {
3765 opt_rx->sack_ok = 1; 3797 opt_rx->sack_ok = 1;
3766 tcp_sack_reset(opt_rx); 3798 tcp_sack_reset(opt_rx);
3767 } 3799 }
@@ -3782,7 +3814,30 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3782 */ 3814 */
3783 break; 3815 break;
3784#endif 3816#endif
3785 } 3817 case TCPOPT_COOKIE:
3818 /* This option is variable length.
3819 */
3820 switch (opsize) {
3821 case TCPOLEN_COOKIE_BASE:
3822 /* not yet implemented */
3823 break;
3824 case TCPOLEN_COOKIE_PAIR:
3825 /* not yet implemented */
3826 break;
3827 case TCPOLEN_COOKIE_MIN+0:
3828 case TCPOLEN_COOKIE_MIN+2:
3829 case TCPOLEN_COOKIE_MIN+4:
3830 case TCPOLEN_COOKIE_MIN+6:
3831 case TCPOLEN_COOKIE_MAX:
3832 /* 16-bit multiple */
3833 opt_rx->cookie_plus = opsize;
3834 *hvpp = ptr;
3835 default:
3836 /* ignore option */
3837 break;
3838 };
3839 break;
3840 };
3786 3841
3787 ptr += opsize-2; 3842 ptr += opsize-2;
3788 length -= opsize; 3843 length -= opsize;
@@ -3810,17 +3865,20 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3810 * If it is wrong it falls back on tcp_parse_options(). 3865 * If it is wrong it falls back on tcp_parse_options().
3811 */ 3866 */
3812static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, 3867static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3813 struct tcp_sock *tp) 3868 struct tcp_sock *tp, u8 **hvpp)
3814{ 3869{
3815 if (th->doff == sizeof(struct tcphdr) >> 2) { 3870 /* In the spirit of fast parsing, compare doff directly to constant
3871 * values. Because equality is used, short doff can be ignored here.
3872 */
3873 if (th->doff == (sizeof(*th) / 4)) {
3816 tp->rx_opt.saw_tstamp = 0; 3874 tp->rx_opt.saw_tstamp = 0;
3817 return 0; 3875 return 0;
3818 } else if (tp->rx_opt.tstamp_ok && 3876 } else if (tp->rx_opt.tstamp_ok &&
3819 th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { 3877 th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
3820 if (tcp_parse_aligned_timestamp(tp, th)) 3878 if (tcp_parse_aligned_timestamp(tp, th))
3821 return 1; 3879 return 1;
3822 } 3880 }
3823 tcp_parse_options(skb, &tp->rx_opt, 1); 3881 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
3824 return 1; 3882 return 1;
3825} 3883}
3826 3884
@@ -4075,8 +4133,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4075static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) 4133static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4076{ 4134{
4077 struct tcp_sock *tp = tcp_sk(sk); 4135 struct tcp_sock *tp = tcp_sk(sk);
4136 struct dst_entry *dst = __sk_dst_get(sk);
4078 4137
4079 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4138 if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
4139 !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
4080 int mib_idx; 4140 int mib_idx;
4081 4141
4082 if (before(seq, tp->rcv_nxt)) 4142 if (before(seq, tp->rcv_nxt))
@@ -4105,13 +4165,15 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4105static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) 4165static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
4106{ 4166{
4107 struct tcp_sock *tp = tcp_sk(sk); 4167 struct tcp_sock *tp = tcp_sk(sk);
4168 struct dst_entry *dst = __sk_dst_get(sk);
4108 4169
4109 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 4170 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4110 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { 4171 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4111 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 4172 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4112 tcp_enter_quickack_mode(sk); 4173 tcp_enter_quickack_mode(sk);
4113 4174
4114 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4175 if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
4176 !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
4115 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 4177 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4116 4178
4117 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) 4179 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -4845,11 +4907,11 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4845 struct tcp_sock *tp = tcp_sk(sk); 4907 struct tcp_sock *tp = tcp_sk(sk);
4846 4908
4847 /* More than one full frame received... */ 4909 /* More than one full frame received... */
4848 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss 4910 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
4849 /* ... and right edge of window advances far enough. 4911 /* ... and right edge of window advances far enough.
4850 * (tcp_recvmsg() will send ACK otherwise). Or... 4912 * (tcp_recvmsg() will send ACK otherwise). Or...
4851 */ 4913 */
4852 && __tcp_select_window(sk) >= tp->rcv_wnd) || 4914 __tcp_select_window(sk) >= tp->rcv_wnd) ||
4853 /* We ACK each frame or... */ 4915 /* We ACK each frame or... */
4854 tcp_in_quickack_mode(sk) || 4916 tcp_in_quickack_mode(sk) ||
4855 /* We have out of order data. */ 4917 /* We have out of order data. */
@@ -5070,10 +5132,12 @@ out:
5070static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, 5132static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5071 struct tcphdr *th, int syn_inerr) 5133 struct tcphdr *th, int syn_inerr)
5072{ 5134{
5135 u8 *hash_location;
5073 struct tcp_sock *tp = tcp_sk(sk); 5136 struct tcp_sock *tp = tcp_sk(sk);
5074 5137
5075 /* RFC1323: H1. Apply PAWS check first. */ 5138 /* RFC1323: H1. Apply PAWS check first. */
5076 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && 5139 if (tcp_fast_parse_options(skb, th, tp, &hash_location) &&
5140 tp->rx_opt.saw_tstamp &&
5077 tcp_paws_discard(sk, skb)) { 5141 tcp_paws_discard(sk, skb)) {
5078 if (!th->rst) { 5142 if (!th->rst) {
5079 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); 5143 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@@ -5361,11 +5425,14 @@ discard:
5361static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5425static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5362 struct tcphdr *th, unsigned len) 5426 struct tcphdr *th, unsigned len)
5363{ 5427{
5364 struct tcp_sock *tp = tcp_sk(sk); 5428 u8 *hash_location;
5365 struct inet_connection_sock *icsk = inet_csk(sk); 5429 struct inet_connection_sock *icsk = inet_csk(sk);
5430 struct tcp_sock *tp = tcp_sk(sk);
5431 struct dst_entry *dst = __sk_dst_get(sk);
5432 struct tcp_cookie_values *cvp = tp->cookie_values;
5366 int saved_clamp = tp->rx_opt.mss_clamp; 5433 int saved_clamp = tp->rx_opt.mss_clamp;
5367 5434
5368 tcp_parse_options(skb, &tp->rx_opt, 0); 5435 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst);
5369 5436
5370 if (th->ack) { 5437 if (th->ack) {
5371 /* rfc793: 5438 /* rfc793:
@@ -5462,6 +5529,31 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5462 * Change state from SYN-SENT only after copied_seq 5529 * Change state from SYN-SENT only after copied_seq
5463 * is initialized. */ 5530 * is initialized. */
5464 tp->copied_seq = tp->rcv_nxt; 5531 tp->copied_seq = tp->rcv_nxt;
5532
5533 if (cvp != NULL &&
5534 cvp->cookie_pair_size > 0 &&
5535 tp->rx_opt.cookie_plus > 0) {
5536 int cookie_size = tp->rx_opt.cookie_plus
5537 - TCPOLEN_COOKIE_BASE;
5538 int cookie_pair_size = cookie_size
5539 + cvp->cookie_desired;
5540
5541 /* A cookie extension option was sent and returned.
5542 * Note that each incoming SYNACK replaces the
5543 * Responder cookie. The initial exchange is most
5544 * fragile, as protection against spoofing relies
5545 * entirely upon the sequence and timestamp (above).
5546 * This replacement strategy allows the correct pair to
5547 * pass through, while any others will be filtered via
5548 * Responder verification later.
5549 */
5550 if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
5551 memcpy(&cvp->cookie_pair[cvp->cookie_desired],
5552 hash_location, cookie_size);
5553 cvp->cookie_pair_size = cookie_pair_size;
5554 }
5555 }
5556
5465 smp_mb(); 5557 smp_mb();
5466 tcp_set_state(sk, TCP_ESTABLISHED); 5558 tcp_set_state(sk, TCP_ESTABLISHED);
5467 5559