diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 65 |
1 files changed, 41 insertions, 24 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 28e029632493..b55f60f6fcbe 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -62,6 +62,7 @@ | |||
| 62 | */ | 62 | */ |
| 63 | 63 | ||
| 64 | #include <linux/mm.h> | 64 | #include <linux/mm.h> |
| 65 | #include <linux/slab.h> | ||
| 65 | #include <linux/module.h> | 66 | #include <linux/module.h> |
| 66 | #include <linux/sysctl.h> | 67 | #include <linux/sysctl.h> |
| 67 | #include <linux/kernel.h> | 68 | #include <linux/kernel.h> |
| @@ -77,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1; | |||
| 77 | int sysctl_tcp_sack __read_mostly = 1; | 78 | int sysctl_tcp_sack __read_mostly = 1; |
| 78 | int sysctl_tcp_fack __read_mostly = 1; | 79 | int sysctl_tcp_fack __read_mostly = 1; |
| 79 | int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; | 80 | int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; |
| 81 | EXPORT_SYMBOL(sysctl_tcp_reordering); | ||
| 80 | int sysctl_tcp_ecn __read_mostly = 2; | 82 | int sysctl_tcp_ecn __read_mostly = 2; |
| 83 | EXPORT_SYMBOL(sysctl_tcp_ecn); | ||
| 81 | int sysctl_tcp_dsack __read_mostly = 1; | 84 | int sysctl_tcp_dsack __read_mostly = 1; |
| 82 | int sysctl_tcp_app_win __read_mostly = 31; | 85 | int sysctl_tcp_app_win __read_mostly = 31; |
| 83 | int sysctl_tcp_adv_win_scale __read_mostly = 2; | 86 | int sysctl_tcp_adv_win_scale __read_mostly = 2; |
| 87 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | ||
| 84 | 88 | ||
| 85 | int sysctl_tcp_stdurg __read_mostly; | 89 | int sysctl_tcp_stdurg __read_mostly; |
| 86 | int sysctl_tcp_rfc1337 __read_mostly; | 90 | int sysctl_tcp_rfc1337 __read_mostly; |
| @@ -89,6 +93,8 @@ int sysctl_tcp_frto __read_mostly = 2; | |||
| 89 | int sysctl_tcp_frto_response __read_mostly; | 93 | int sysctl_tcp_frto_response __read_mostly; |
| 90 | int sysctl_tcp_nometrics_save __read_mostly; | 94 | int sysctl_tcp_nometrics_save __read_mostly; |
| 91 | 95 | ||
| 96 | int sysctl_tcp_thin_dupack __read_mostly; | ||
| 97 | |||
| 92 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 98 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
| 93 | int sysctl_tcp_abc __read_mostly; | 99 | int sysctl_tcp_abc __read_mostly; |
| 94 | 100 | ||
| @@ -416,6 +422,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) | |||
| 416 | 422 | ||
| 417 | inet_csk(sk)->icsk_ack.rcv_mss = hint; | 423 | inet_csk(sk)->icsk_ack.rcv_mss = hint; |
| 418 | } | 424 | } |
| 425 | EXPORT_SYMBOL(tcp_initialize_rcv_mss); | ||
| 419 | 426 | ||
| 420 | /* Receiver "autotuning" code. | 427 | /* Receiver "autotuning" code. |
| 421 | * | 428 | * |
| @@ -2447,6 +2454,16 @@ static int tcp_time_to_recover(struct sock *sk) | |||
| 2447 | return 1; | 2454 | return 1; |
| 2448 | } | 2455 | } |
| 2449 | 2456 | ||
| 2457 | /* If a thin stream is detected, retransmit after first | ||
| 2458 | * received dupack. Employ only if SACK is supported in order | ||
| 2459 | * to avoid possible corner-case series of spurious retransmissions | ||
| 2460 | * Use only if there are no unsent data. | ||
| 2461 | */ | ||
| 2462 | if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && | ||
| 2463 | tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && | ||
| 2464 | tcp_is_sack(tp) && !tcp_send_head(sk)) | ||
| 2465 | return 1; | ||
| 2466 | |||
| 2450 | return 0; | 2467 | return 0; |
| 2451 | } | 2468 | } |
| 2452 | 2469 | ||
| @@ -2499,6 +2516,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
| 2499 | int err; | 2516 | int err; |
| 2500 | unsigned int mss; | 2517 | unsigned int mss; |
| 2501 | 2518 | ||
| 2519 | if (packets == 0) | ||
| 2520 | return; | ||
| 2521 | |||
| 2502 | WARN_ON(packets > tp->packets_out); | 2522 | WARN_ON(packets > tp->packets_out); |
| 2503 | if (tp->lost_skb_hint) { | 2523 | if (tp->lost_skb_hint) { |
| 2504 | skb = tp->lost_skb_hint; | 2524 | skb = tp->lost_skb_hint; |
| @@ -2525,7 +2545,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
| 2525 | cnt += tcp_skb_pcount(skb); | 2545 | cnt += tcp_skb_pcount(skb); |
| 2526 | 2546 | ||
| 2527 | if (cnt > packets) { | 2547 | if (cnt > packets) { |
| 2528 | if (tcp_is_sack(tp) || (oldcnt >= packets)) | 2548 | if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || |
| 2549 | (oldcnt >= packets)) | ||
| 2529 | break; | 2550 | break; |
| 2530 | 2551 | ||
| 2531 | mss = skb_shinfo(skb)->gso_size; | 2552 | mss = skb_shinfo(skb)->gso_size; |
| @@ -2623,7 +2644,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
| 2623 | if (sk->sk_family == AF_INET) { | 2644 | if (sk->sk_family == AF_INET) { |
| 2624 | printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", | 2645 | printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", |
| 2625 | msg, | 2646 | msg, |
| 2626 | &inet->daddr, ntohs(inet->dport), | 2647 | &inet->inet_daddr, ntohs(inet->inet_dport), |
| 2627 | tp->snd_cwnd, tcp_left_out(tp), | 2648 | tp->snd_cwnd, tcp_left_out(tp), |
| 2628 | tp->snd_ssthresh, tp->prior_ssthresh, | 2649 | tp->snd_ssthresh, tp->prior_ssthresh, |
| 2629 | tp->packets_out); | 2650 | tp->packets_out); |
| @@ -2633,7 +2654,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
| 2633 | struct ipv6_pinfo *np = inet6_sk(sk); | 2654 | struct ipv6_pinfo *np = inet6_sk(sk); |
| 2634 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", | 2655 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", |
| 2635 | msg, | 2656 | msg, |
| 2636 | &np->daddr, ntohs(inet->dport), | 2657 | &np->daddr, ntohs(inet->inet_dport), |
| 2637 | tp->snd_cwnd, tcp_left_out(tp), | 2658 | tp->snd_cwnd, tcp_left_out(tp), |
| 2638 | tp->snd_ssthresh, tp->prior_ssthresh, | 2659 | tp->snd_ssthresh, tp->prior_ssthresh, |
| 2639 | tp->packets_out); | 2660 | tp->packets_out); |
| @@ -2922,6 +2943,7 @@ void tcp_simple_retransmit(struct sock *sk) | |||
| 2922 | } | 2943 | } |
| 2923 | tcp_xmit_retransmit_queue(sk); | 2944 | tcp_xmit_retransmit_queue(sk); |
| 2924 | } | 2945 | } |
| 2946 | EXPORT_SYMBOL(tcp_simple_retransmit); | ||
| 2925 | 2947 | ||
| 2926 | /* Process an event, which can update packets-in-flight not trivially. | 2948 | /* Process an event, which can update packets-in-flight not trivially. |
| 2927 | * Main goal of this function is to calculate new estimate for left_out, | 2949 | * Main goal of this function is to calculate new estimate for left_out, |
| @@ -3270,7 +3292,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
| 3270 | * connection startup slow start one packet too | 3292 | * connection startup slow start one packet too |
| 3271 | * quickly. This is severely frowned upon behavior. | 3293 | * quickly. This is severely frowned upon behavior. |
| 3272 | */ | 3294 | */ |
| 3273 | if (!(scb->flags & TCPCB_FLAG_SYN)) { | 3295 | if (!(scb->flags & TCPHDR_SYN)) { |
| 3274 | flag |= FLAG_DATA_ACKED; | 3296 | flag |= FLAG_DATA_ACKED; |
| 3275 | } else { | 3297 | } else { |
| 3276 | flag |= FLAG_SYN_ACKED; | 3298 | flag |= FLAG_SYN_ACKED; |
| @@ -3694,7 +3716,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
| 3694 | } | 3716 | } |
| 3695 | 3717 | ||
| 3696 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) | 3718 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) |
| 3697 | dst_confirm(sk->sk_dst_cache); | 3719 | dst_confirm(__sk_dst_get(sk)); |
| 3698 | 3720 | ||
| 3699 | return 1; | 3721 | return 1; |
| 3700 | 3722 | ||
| @@ -3829,18 +3851,20 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
| 3829 | /* 16-bit multiple */ | 3851 | /* 16-bit multiple */ |
| 3830 | opt_rx->cookie_plus = opsize; | 3852 | opt_rx->cookie_plus = opsize; |
| 3831 | *hvpp = ptr; | 3853 | *hvpp = ptr; |
| 3854 | break; | ||
| 3832 | default: | 3855 | default: |
| 3833 | /* ignore option */ | 3856 | /* ignore option */ |
| 3834 | break; | 3857 | break; |
| 3835 | }; | 3858 | } |
| 3836 | break; | 3859 | break; |
| 3837 | }; | 3860 | } |
| 3838 | 3861 | ||
| 3839 | ptr += opsize-2; | 3862 | ptr += opsize-2; |
| 3840 | length -= opsize; | 3863 | length -= opsize; |
| 3841 | } | 3864 | } |
| 3842 | } | 3865 | } |
| 3843 | } | 3866 | } |
| 3867 | EXPORT_SYMBOL(tcp_parse_options); | ||
| 3844 | 3868 | ||
| 3845 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) | 3869 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) |
| 3846 | { | 3870 | { |
| @@ -3907,13 +3931,14 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th) | |||
| 3907 | if (opsize < 2 || opsize > length) | 3931 | if (opsize < 2 || opsize > length) |
| 3908 | return NULL; | 3932 | return NULL; |
| 3909 | if (opcode == TCPOPT_MD5SIG) | 3933 | if (opcode == TCPOPT_MD5SIG) |
| 3910 | return ptr; | 3934 | return opsize == TCPOLEN_MD5SIG ? ptr : NULL; |
| 3911 | } | 3935 | } |
| 3912 | ptr += opsize - 2; | 3936 | ptr += opsize - 2; |
| 3913 | length -= opsize; | 3937 | length -= opsize; |
| 3914 | } | 3938 | } |
| 3915 | return NULL; | 3939 | return NULL; |
| 3916 | } | 3940 | } |
| 3941 | EXPORT_SYMBOL(tcp_parse_md5sig_option); | ||
| 3917 | #endif | 3942 | #endif |
| 3918 | 3943 | ||
| 3919 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) | 3944 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) |
| @@ -4024,6 +4049,8 @@ static void tcp_reset(struct sock *sk) | |||
| 4024 | default: | 4049 | default: |
| 4025 | sk->sk_err = ECONNRESET; | 4050 | sk->sk_err = ECONNRESET; |
| 4026 | } | 4051 | } |
| 4052 | /* This barrier is coupled with smp_rmb() in tcp_poll() */ | ||
| 4053 | smp_wmb(); | ||
| 4027 | 4054 | ||
| 4028 | if (!sock_flag(sk, SOCK_DEAD)) | 4055 | if (!sock_flag(sk, SOCK_DEAD)) |
| 4029 | sk->sk_error_report(sk); | 4056 | sk->sk_error_report(sk); |
| @@ -4303,7 +4330,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
| 4303 | } | 4330 | } |
| 4304 | 4331 | ||
| 4305 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { | 4332 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { |
| 4306 | SOCK_DEBUG(sk, "ofo packet was already received \n"); | 4333 | SOCK_DEBUG(sk, "ofo packet was already received\n"); |
| 4307 | __skb_unlink(skb, &tp->out_of_order_queue); | 4334 | __skb_unlink(skb, &tp->out_of_order_queue); |
| 4308 | __kfree_skb(skb); | 4335 | __kfree_skb(skb); |
| 4309 | continue; | 4336 | continue; |
| @@ -4351,6 +4378,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
| 4351 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) | 4378 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) |
| 4352 | goto drop; | 4379 | goto drop; |
| 4353 | 4380 | ||
| 4381 | skb_dst_drop(skb); | ||
| 4354 | __skb_pull(skb, th->doff * 4); | 4382 | __skb_pull(skb, th->doff * 4); |
| 4355 | 4383 | ||
| 4356 | TCP_ECN_accept_cwr(tp, skb); | 4384 | TCP_ECN_accept_cwr(tp, skb); |
| @@ -5414,6 +5442,7 @@ discard: | |||
| 5414 | __kfree_skb(skb); | 5442 | __kfree_skb(skb); |
| 5415 | return 0; | 5443 | return 0; |
| 5416 | } | 5444 | } |
| 5445 | EXPORT_SYMBOL(tcp_rcv_established); | ||
| 5417 | 5446 | ||
| 5418 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | 5447 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, |
| 5419 | struct tcphdr *th, unsigned len) | 5448 | struct tcphdr *th, unsigned len) |
| @@ -5783,11 +5812,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 5783 | 5812 | ||
| 5784 | /* tcp_ack considers this ACK as duplicate | 5813 | /* tcp_ack considers this ACK as duplicate |
| 5785 | * and does not calculate rtt. | 5814 | * and does not calculate rtt. |
| 5786 | * Fix it at least with timestamps. | 5815 | * Force it here. |
| 5787 | */ | 5816 | */ |
| 5788 | if (tp->rx_opt.saw_tstamp && | 5817 | tcp_ack_update_rtt(sk, 0, 0); |
| 5789 | tp->rx_opt.rcv_tsecr && !tp->srtt) | ||
| 5790 | tcp_ack_saw_tstamp(sk, 0); | ||
| 5791 | 5818 | ||
| 5792 | if (tp->rx_opt.tstamp_ok) | 5819 | if (tp->rx_opt.tstamp_ok) |
| 5793 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 5820 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
| @@ -5819,7 +5846,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 5819 | if (tp->snd_una == tp->write_seq) { | 5846 | if (tp->snd_una == tp->write_seq) { |
| 5820 | tcp_set_state(sk, TCP_FIN_WAIT2); | 5847 | tcp_set_state(sk, TCP_FIN_WAIT2); |
| 5821 | sk->sk_shutdown |= SEND_SHUTDOWN; | 5848 | sk->sk_shutdown |= SEND_SHUTDOWN; |
| 5822 | dst_confirm(sk->sk_dst_cache); | 5849 | dst_confirm(__sk_dst_get(sk)); |
| 5823 | 5850 | ||
| 5824 | if (!sock_flag(sk, SOCK_DEAD)) | 5851 | if (!sock_flag(sk, SOCK_DEAD)) |
| 5825 | /* Wake up lingering close() */ | 5852 | /* Wake up lingering close() */ |
| @@ -5915,14 +5942,4 @@ discard: | |||
| 5915 | } | 5942 | } |
| 5916 | return 0; | 5943 | return 0; |
| 5917 | } | 5944 | } |
| 5918 | |||
| 5919 | EXPORT_SYMBOL(sysctl_tcp_ecn); | ||
| 5920 | EXPORT_SYMBOL(sysctl_tcp_reordering); | ||
| 5921 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | ||
| 5922 | EXPORT_SYMBOL(tcp_parse_options); | ||
| 5923 | #ifdef CONFIG_TCP_MD5SIG | ||
| 5924 | EXPORT_SYMBOL(tcp_parse_md5sig_option); | ||
| 5925 | #endif | ||
| 5926 | EXPORT_SYMBOL(tcp_rcv_established); | ||
| 5927 | EXPORT_SYMBOL(tcp_rcv_state_process); | 5945 | EXPORT_SYMBOL(tcp_rcv_state_process); |
| 5928 | EXPORT_SYMBOL(tcp_initialize_rcv_mss); | ||
