diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 180 |
1 files changed, 95 insertions, 85 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 28e029632493..2549b29b062d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -62,6 +62,7 @@ | |||
62 | */ | 62 | */ |
63 | 63 | ||
64 | #include <linux/mm.h> | 64 | #include <linux/mm.h> |
65 | #include <linux/slab.h> | ||
65 | #include <linux/module.h> | 66 | #include <linux/module.h> |
66 | #include <linux/sysctl.h> | 67 | #include <linux/sysctl.h> |
67 | #include <linux/kernel.h> | 68 | #include <linux/kernel.h> |
@@ -77,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1; | |||
77 | int sysctl_tcp_sack __read_mostly = 1; | 78 | int sysctl_tcp_sack __read_mostly = 1; |
78 | int sysctl_tcp_fack __read_mostly = 1; | 79 | int sysctl_tcp_fack __read_mostly = 1; |
79 | int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; | 80 | int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; |
81 | EXPORT_SYMBOL(sysctl_tcp_reordering); | ||
80 | int sysctl_tcp_ecn __read_mostly = 2; | 82 | int sysctl_tcp_ecn __read_mostly = 2; |
83 | EXPORT_SYMBOL(sysctl_tcp_ecn); | ||
81 | int sysctl_tcp_dsack __read_mostly = 1; | 84 | int sysctl_tcp_dsack __read_mostly = 1; |
82 | int sysctl_tcp_app_win __read_mostly = 31; | 85 | int sysctl_tcp_app_win __read_mostly = 31; |
83 | int sysctl_tcp_adv_win_scale __read_mostly = 2; | 86 | int sysctl_tcp_adv_win_scale __read_mostly = 2; |
87 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | ||
84 | 88 | ||
85 | int sysctl_tcp_stdurg __read_mostly; | 89 | int sysctl_tcp_stdurg __read_mostly; |
86 | int sysctl_tcp_rfc1337 __read_mostly; | 90 | int sysctl_tcp_rfc1337 __read_mostly; |
@@ -89,6 +93,8 @@ int sysctl_tcp_frto __read_mostly = 2; | |||
89 | int sysctl_tcp_frto_response __read_mostly; | 93 | int sysctl_tcp_frto_response __read_mostly; |
90 | int sysctl_tcp_nometrics_save __read_mostly; | 94 | int sysctl_tcp_nometrics_save __read_mostly; |
91 | 95 | ||
96 | int sysctl_tcp_thin_dupack __read_mostly; | ||
97 | |||
92 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 98 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
93 | int sysctl_tcp_abc __read_mostly; | 99 | int sysctl_tcp_abc __read_mostly; |
94 | 100 | ||
@@ -176,7 +182,7 @@ static void tcp_incr_quickack(struct sock *sk) | |||
176 | icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); | 182 | icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); |
177 | } | 183 | } |
178 | 184 | ||
179 | void tcp_enter_quickack_mode(struct sock *sk) | 185 | static void tcp_enter_quickack_mode(struct sock *sk) |
180 | { | 186 | { |
181 | struct inet_connection_sock *icsk = inet_csk(sk); | 187 | struct inet_connection_sock *icsk = inet_csk(sk); |
182 | tcp_incr_quickack(sk); | 188 | tcp_incr_quickack(sk); |
@@ -253,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk) | |||
253 | int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + | 259 | int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + |
254 | sizeof(struct sk_buff); | 260 | sizeof(struct sk_buff); |
255 | 261 | ||
256 | if (sk->sk_sndbuf < 3 * sndmem) | 262 | if (sk->sk_sndbuf < 3 * sndmem) { |
257 | sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); | 263 | sk->sk_sndbuf = 3 * sndmem; |
264 | if (sk->sk_sndbuf > sysctl_tcp_wmem[2]) | ||
265 | sk->sk_sndbuf = sysctl_tcp_wmem[2]; | ||
266 | } | ||
258 | } | 267 | } |
259 | 268 | ||
260 | /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) | 269 | /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) |
@@ -390,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk) | |||
390 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | 399 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && |
391 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | 400 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && |
392 | !tcp_memory_pressure && | 401 | !tcp_memory_pressure && |
393 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { | 402 | atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { |
394 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | 403 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), |
395 | sysctl_tcp_rmem[2]); | 404 | sysctl_tcp_rmem[2]); |
396 | } | 405 | } |
@@ -416,15 +425,16 @@ void tcp_initialize_rcv_mss(struct sock *sk) | |||
416 | 425 | ||
417 | inet_csk(sk)->icsk_ack.rcv_mss = hint; | 426 | inet_csk(sk)->icsk_ack.rcv_mss = hint; |
418 | } | 427 | } |
428 | EXPORT_SYMBOL(tcp_initialize_rcv_mss); | ||
419 | 429 | ||
420 | /* Receiver "autotuning" code. | 430 | /* Receiver "autotuning" code. |
421 | * | 431 | * |
422 | * The algorithm for RTT estimation w/o timestamps is based on | 432 | * The algorithm for RTT estimation w/o timestamps is based on |
423 | * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL. | 433 | * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL. |
424 | * <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps> | 434 | * <http://public.lanl.gov/radiant/pubs.html#DRS> |
425 | * | 435 | * |
426 | * More detail on this code can be found at | 436 | * More detail on this code can be found at |
427 | * <http://www.psc.edu/~jheffner/senior_thesis.ps>, | 437 | * <http://staff.psc.edu/jheffner/>, |
428 | * though this reference is out of date. A new paper | 438 | * though this reference is out of date. A new paper |
429 | * is pending. | 439 | * is pending. |
430 | */ | 440 | */ |
@@ -724,7 +734,7 @@ void tcp_update_metrics(struct sock *sk) | |||
724 | * Reset our results. | 734 | * Reset our results. |
725 | */ | 735 | */ |
726 | if (!(dst_metric_locked(dst, RTAX_RTT))) | 736 | if (!(dst_metric_locked(dst, RTAX_RTT))) |
727 | dst->metrics[RTAX_RTT - 1] = 0; | 737 | dst_metric_set(dst, RTAX_RTT, 0); |
728 | return; | 738 | return; |
729 | } | 739 | } |
730 | 740 | ||
@@ -766,57 +776,48 @@ void tcp_update_metrics(struct sock *sk) | |||
766 | if (dst_metric(dst, RTAX_SSTHRESH) && | 776 | if (dst_metric(dst, RTAX_SSTHRESH) && |
767 | !dst_metric_locked(dst, RTAX_SSTHRESH) && | 777 | !dst_metric_locked(dst, RTAX_SSTHRESH) && |
768 | (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) | 778 | (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) |
769 | dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1; | 779 | dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); |
770 | if (!dst_metric_locked(dst, RTAX_CWND) && | 780 | if (!dst_metric_locked(dst, RTAX_CWND) && |
771 | tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) | 781 | tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) |
772 | dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd; | 782 | dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); |
773 | } else if (tp->snd_cwnd > tp->snd_ssthresh && | 783 | } else if (tp->snd_cwnd > tp->snd_ssthresh && |
774 | icsk->icsk_ca_state == TCP_CA_Open) { | 784 | icsk->icsk_ca_state == TCP_CA_Open) { |
775 | /* Cong. avoidance phase, cwnd is reliable. */ | 785 | /* Cong. avoidance phase, cwnd is reliable. */ |
776 | if (!dst_metric_locked(dst, RTAX_SSTHRESH)) | 786 | if (!dst_metric_locked(dst, RTAX_SSTHRESH)) |
777 | dst->metrics[RTAX_SSTHRESH-1] = | 787 | dst_metric_set(dst, RTAX_SSTHRESH, |
778 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh); | 788 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); |
779 | if (!dst_metric_locked(dst, RTAX_CWND)) | 789 | if (!dst_metric_locked(dst, RTAX_CWND)) |
780 | dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; | 790 | dst_metric_set(dst, RTAX_CWND, |
791 | (dst_metric(dst, RTAX_CWND) + | ||
792 | tp->snd_cwnd) >> 1); | ||
781 | } else { | 793 | } else { |
782 | /* Else slow start did not finish, cwnd is non-sense, | 794 | /* Else slow start did not finish, cwnd is non-sense, |
783 | ssthresh may be also invalid. | 795 | ssthresh may be also invalid. |
784 | */ | 796 | */ |
785 | if (!dst_metric_locked(dst, RTAX_CWND)) | 797 | if (!dst_metric_locked(dst, RTAX_CWND)) |
786 | dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; | 798 | dst_metric_set(dst, RTAX_CWND, |
799 | (dst_metric(dst, RTAX_CWND) + | ||
800 | tp->snd_ssthresh) >> 1); | ||
787 | if (dst_metric(dst, RTAX_SSTHRESH) && | 801 | if (dst_metric(dst, RTAX_SSTHRESH) && |
788 | !dst_metric_locked(dst, RTAX_SSTHRESH) && | 802 | !dst_metric_locked(dst, RTAX_SSTHRESH) && |
789 | tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) | 803 | tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) |
790 | dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; | 804 | dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); |
791 | } | 805 | } |
792 | 806 | ||
793 | if (!dst_metric_locked(dst, RTAX_REORDERING)) { | 807 | if (!dst_metric_locked(dst, RTAX_REORDERING)) { |
794 | if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && | 808 | if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && |
795 | tp->reordering != sysctl_tcp_reordering) | 809 | tp->reordering != sysctl_tcp_reordering) |
796 | dst->metrics[RTAX_REORDERING-1] = tp->reordering; | 810 | dst_metric_set(dst, RTAX_REORDERING, tp->reordering); |
797 | } | 811 | } |
798 | } | 812 | } |
799 | } | 813 | } |
800 | 814 | ||
801 | /* Numbers are taken from RFC3390. | ||
802 | * | ||
803 | * John Heffner states: | ||
804 | * | ||
805 | * The RFC specifies a window of no more than 4380 bytes | ||
806 | * unless 2*MSS > 4380. Reading the pseudocode in the RFC | ||
807 | * is a bit misleading because they use a clamp at 4380 bytes | ||
808 | * rather than use a multiplier in the relevant range. | ||
809 | */ | ||
810 | __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) | 815 | __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) |
811 | { | 816 | { |
812 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); | 817 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); |
813 | 818 | ||
814 | if (!cwnd) { | 819 | if (!cwnd) |
815 | if (tp->mss_cache > 1460) | 820 | cwnd = rfc3390_bytes_to_packets(tp->mss_cache); |
816 | cwnd = 2; | ||
817 | else | ||
818 | cwnd = (tp->mss_cache > 1095) ? 3 : 4; | ||
819 | } | ||
820 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); | 821 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); |
821 | } | 822 | } |
822 | 823 | ||
@@ -915,25 +916,20 @@ static void tcp_init_metrics(struct sock *sk) | |||
915 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 916 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
916 | } | 917 | } |
917 | tcp_set_rto(sk); | 918 | tcp_set_rto(sk); |
918 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) | 919 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) { |
919 | goto reset; | ||
920 | |||
921 | cwnd: | ||
922 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | ||
923 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
924 | return; | ||
925 | |||
926 | reset: | 920 | reset: |
927 | /* Play conservative. If timestamps are not | 921 | /* Play conservative. If timestamps are not |
928 | * supported, TCP will fail to recalculate correct | 922 | * supported, TCP will fail to recalculate correct |
929 | * rtt, if initial rto is too small. FORGET ALL AND RESET! | 923 | * rtt, if initial rto is too small. FORGET ALL AND RESET! |
930 | */ | 924 | */ |
931 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { | 925 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { |
932 | tp->srtt = 0; | 926 | tp->srtt = 0; |
933 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; | 927 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; |
934 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; | 928 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; |
929 | } | ||
935 | } | 930 | } |
936 | goto cwnd; | 931 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); |
932 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
937 | } | 933 | } |
938 | 934 | ||
939 | static void tcp_update_reordering(struct sock *sk, const int metric, | 935 | static void tcp_update_reordering(struct sock *sk, const int metric, |
@@ -2307,7 +2303,7 @@ static inline int tcp_dupack_heuristics(struct tcp_sock *tp) | |||
2307 | 2303 | ||
2308 | static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) | 2304 | static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) |
2309 | { | 2305 | { |
2310 | return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); | 2306 | return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; |
2311 | } | 2307 | } |
2312 | 2308 | ||
2313 | static inline int tcp_head_timedout(struct sock *sk) | 2309 | static inline int tcp_head_timedout(struct sock *sk) |
@@ -2447,6 +2443,16 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2447 | return 1; | 2443 | return 1; |
2448 | } | 2444 | } |
2449 | 2445 | ||
2446 | /* If a thin stream is detected, retransmit after first | ||
2447 | * received dupack. Employ only if SACK is supported in order | ||
2448 | * to avoid possible corner-case series of spurious retransmissions | ||
2449 | * Use only if there are no unsent data. | ||
2450 | */ | ||
2451 | if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && | ||
2452 | tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && | ||
2453 | tcp_is_sack(tp) && !tcp_send_head(sk)) | ||
2454 | return 1; | ||
2455 | |||
2450 | return 0; | 2456 | return 0; |
2451 | } | 2457 | } |
2452 | 2458 | ||
@@ -2491,7 +2497,7 @@ static void tcp_timeout_skbs(struct sock *sk) | |||
2491 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2497 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is |
2492 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2498 | * is against sacked "cnt", otherwise it's against facked "cnt" |
2493 | */ | 2499 | */ |
2494 | static void tcp_mark_head_lost(struct sock *sk, int packets) | 2500 | static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) |
2495 | { | 2501 | { |
2496 | struct tcp_sock *tp = tcp_sk(sk); | 2502 | struct tcp_sock *tp = tcp_sk(sk); |
2497 | struct sk_buff *skb; | 2503 | struct sk_buff *skb; |
@@ -2503,6 +2509,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
2503 | if (tp->lost_skb_hint) { | 2509 | if (tp->lost_skb_hint) { |
2504 | skb = tp->lost_skb_hint; | 2510 | skb = tp->lost_skb_hint; |
2505 | cnt = tp->lost_cnt_hint; | 2511 | cnt = tp->lost_cnt_hint; |
2512 | /* Head already handled? */ | ||
2513 | if (mark_head && skb != tcp_write_queue_head(sk)) | ||
2514 | return; | ||
2506 | } else { | 2515 | } else { |
2507 | skb = tcp_write_queue_head(sk); | 2516 | skb = tcp_write_queue_head(sk); |
2508 | cnt = 0; | 2517 | cnt = 0; |
@@ -2525,7 +2534,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
2525 | cnt += tcp_skb_pcount(skb); | 2534 | cnt += tcp_skb_pcount(skb); |
2526 | 2535 | ||
2527 | if (cnt > packets) { | 2536 | if (cnt > packets) { |
2528 | if (tcp_is_sack(tp) || (oldcnt >= packets)) | 2537 | if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || |
2538 | (oldcnt >= packets)) | ||
2529 | break; | 2539 | break; |
2530 | 2540 | ||
2531 | mss = skb_shinfo(skb)->gso_size; | 2541 | mss = skb_shinfo(skb)->gso_size; |
@@ -2536,6 +2546,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
2536 | } | 2546 | } |
2537 | 2547 | ||
2538 | tcp_skb_mark_lost(tp, skb); | 2548 | tcp_skb_mark_lost(tp, skb); |
2549 | |||
2550 | if (mark_head) | ||
2551 | break; | ||
2539 | } | 2552 | } |
2540 | tcp_verify_left_out(tp); | 2553 | tcp_verify_left_out(tp); |
2541 | } | 2554 | } |
@@ -2547,17 +2560,18 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) | |||
2547 | struct tcp_sock *tp = tcp_sk(sk); | 2560 | struct tcp_sock *tp = tcp_sk(sk); |
2548 | 2561 | ||
2549 | if (tcp_is_reno(tp)) { | 2562 | if (tcp_is_reno(tp)) { |
2550 | tcp_mark_head_lost(sk, 1); | 2563 | tcp_mark_head_lost(sk, 1, 1); |
2551 | } else if (tcp_is_fack(tp)) { | 2564 | } else if (tcp_is_fack(tp)) { |
2552 | int lost = tp->fackets_out - tp->reordering; | 2565 | int lost = tp->fackets_out - tp->reordering; |
2553 | if (lost <= 0) | 2566 | if (lost <= 0) |
2554 | lost = 1; | 2567 | lost = 1; |
2555 | tcp_mark_head_lost(sk, lost); | 2568 | tcp_mark_head_lost(sk, lost, 0); |
2556 | } else { | 2569 | } else { |
2557 | int sacked_upto = tp->sacked_out - tp->reordering; | 2570 | int sacked_upto = tp->sacked_out - tp->reordering; |
2558 | if (sacked_upto < fast_rexmit) | 2571 | if (sacked_upto >= 0) |
2559 | sacked_upto = fast_rexmit; | 2572 | tcp_mark_head_lost(sk, sacked_upto, 0); |
2560 | tcp_mark_head_lost(sk, sacked_upto); | 2573 | else if (fast_rexmit) |
2574 | tcp_mark_head_lost(sk, 1, 1); | ||
2561 | } | 2575 | } |
2562 | 2576 | ||
2563 | tcp_timeout_skbs(sk); | 2577 | tcp_timeout_skbs(sk); |
@@ -2623,7 +2637,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
2623 | if (sk->sk_family == AF_INET) { | 2637 | if (sk->sk_family == AF_INET) { |
2624 | printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", | 2638 | printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", |
2625 | msg, | 2639 | msg, |
2626 | &inet->daddr, ntohs(inet->dport), | 2640 | &inet->inet_daddr, ntohs(inet->inet_dport), |
2627 | tp->snd_cwnd, tcp_left_out(tp), | 2641 | tp->snd_cwnd, tcp_left_out(tp), |
2628 | tp->snd_ssthresh, tp->prior_ssthresh, | 2642 | tp->snd_ssthresh, tp->prior_ssthresh, |
2629 | tp->packets_out); | 2643 | tp->packets_out); |
@@ -2633,7 +2647,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
2633 | struct ipv6_pinfo *np = inet6_sk(sk); | 2647 | struct ipv6_pinfo *np = inet6_sk(sk); |
2634 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", | 2648 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", |
2635 | msg, | 2649 | msg, |
2636 | &np->daddr, ntohs(inet->dport), | 2650 | &np->daddr, ntohs(inet->inet_dport), |
2637 | tp->snd_cwnd, tcp_left_out(tp), | 2651 | tp->snd_cwnd, tcp_left_out(tp), |
2638 | tp->snd_ssthresh, tp->prior_ssthresh, | 2652 | tp->snd_ssthresh, tp->prior_ssthresh, |
2639 | tp->packets_out); | 2653 | tp->packets_out); |
@@ -2866,7 +2880,7 @@ static void tcp_mtup_probe_success(struct sock *sk) | |||
2866 | icsk->icsk_mtup.probe_size; | 2880 | icsk->icsk_mtup.probe_size; |
2867 | tp->snd_cwnd_cnt = 0; | 2881 | tp->snd_cwnd_cnt = 0; |
2868 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2882 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2869 | tp->rcv_ssthresh = tcp_current_ssthresh(sk); | 2883 | tp->snd_ssthresh = tcp_current_ssthresh(sk); |
2870 | 2884 | ||
2871 | icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; | 2885 | icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; |
2872 | icsk->icsk_mtup.probe_size = 0; | 2886 | icsk->icsk_mtup.probe_size = 0; |
@@ -2922,6 +2936,7 @@ void tcp_simple_retransmit(struct sock *sk) | |||
2922 | } | 2936 | } |
2923 | tcp_xmit_retransmit_queue(sk); | 2937 | tcp_xmit_retransmit_queue(sk); |
2924 | } | 2938 | } |
2939 | EXPORT_SYMBOL(tcp_simple_retransmit); | ||
2925 | 2940 | ||
2926 | /* Process an event, which can update packets-in-flight not trivially. | 2941 | /* Process an event, which can update packets-in-flight not trivially. |
2927 | * Main goal of this function is to calculate new estimate for left_out, | 2942 | * Main goal of this function is to calculate new estimate for left_out, |
@@ -2962,7 +2977,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | |||
2962 | before(tp->snd_una, tp->high_seq) && | 2977 | before(tp->snd_una, tp->high_seq) && |
2963 | icsk->icsk_ca_state != TCP_CA_Open && | 2978 | icsk->icsk_ca_state != TCP_CA_Open && |
2964 | tp->fackets_out > tp->reordering) { | 2979 | tp->fackets_out > tp->reordering) { |
2965 | tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); | 2980 | tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); |
2966 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); | 2981 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); |
2967 | } | 2982 | } |
2968 | 2983 | ||
@@ -3270,7 +3285,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3270 | * connection startup slow start one packet too | 3285 | * connection startup slow start one packet too |
3271 | * quickly. This is severely frowned upon behavior. | 3286 | * quickly. This is severely frowned upon behavior. |
3272 | */ | 3287 | */ |
3273 | if (!(scb->flags & TCPCB_FLAG_SYN)) { | 3288 | if (!(scb->flags & TCPHDR_SYN)) { |
3274 | flag |= FLAG_DATA_ACKED; | 3289 | flag |= FLAG_DATA_ACKED; |
3275 | } else { | 3290 | } else { |
3276 | flag |= FLAG_SYN_ACKED; | 3291 | flag |= FLAG_SYN_ACKED; |
@@ -3390,8 +3405,8 @@ static void tcp_ack_probe(struct sock *sk) | |||
3390 | 3405 | ||
3391 | static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) | 3406 | static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) |
3392 | { | 3407 | { |
3393 | return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || | 3408 | return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || |
3394 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open); | 3409 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open; |
3395 | } | 3410 | } |
3396 | 3411 | ||
3397 | static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) | 3412 | static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) |
@@ -3408,9 +3423,9 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, | |||
3408 | const u32 ack, const u32 ack_seq, | 3423 | const u32 ack, const u32 ack_seq, |
3409 | const u32 nwin) | 3424 | const u32 nwin) |
3410 | { | 3425 | { |
3411 | return (after(ack, tp->snd_una) || | 3426 | return after(ack, tp->snd_una) || |
3412 | after(ack_seq, tp->snd_wl1) || | 3427 | after(ack_seq, tp->snd_wl1) || |
3413 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd)); | 3428 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); |
3414 | } | 3429 | } |
3415 | 3430 | ||
3416 | /* Update our send window. | 3431 | /* Update our send window. |
@@ -3694,7 +3709,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
3694 | } | 3709 | } |
3695 | 3710 | ||
3696 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) | 3711 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) |
3697 | dst_confirm(sk->sk_dst_cache); | 3712 | dst_confirm(__sk_dst_get(sk)); |
3698 | 3713 | ||
3699 | return 1; | 3714 | return 1; |
3700 | 3715 | ||
@@ -3829,18 +3844,20 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
3829 | /* 16-bit multiple */ | 3844 | /* 16-bit multiple */ |
3830 | opt_rx->cookie_plus = opsize; | 3845 | opt_rx->cookie_plus = opsize; |
3831 | *hvpp = ptr; | 3846 | *hvpp = ptr; |
3847 | break; | ||
3832 | default: | 3848 | default: |
3833 | /* ignore option */ | 3849 | /* ignore option */ |
3834 | break; | 3850 | break; |
3835 | }; | 3851 | } |
3836 | break; | 3852 | break; |
3837 | }; | 3853 | } |
3838 | 3854 | ||
3839 | ptr += opsize-2; | 3855 | ptr += opsize-2; |
3840 | length -= opsize; | 3856 | length -= opsize; |
3841 | } | 3857 | } |
3842 | } | 3858 | } |
3843 | } | 3859 | } |
3860 | EXPORT_SYMBOL(tcp_parse_options); | ||
3844 | 3861 | ||
3845 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) | 3862 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) |
3846 | { | 3863 | { |
@@ -3907,13 +3924,14 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th) | |||
3907 | if (opsize < 2 || opsize > length) | 3924 | if (opsize < 2 || opsize > length) |
3908 | return NULL; | 3925 | return NULL; |
3909 | if (opcode == TCPOPT_MD5SIG) | 3926 | if (opcode == TCPOPT_MD5SIG) |
3910 | return ptr; | 3927 | return opsize == TCPOLEN_MD5SIG ? ptr : NULL; |
3911 | } | 3928 | } |
3912 | ptr += opsize - 2; | 3929 | ptr += opsize - 2; |
3913 | length -= opsize; | 3930 | length -= opsize; |
3914 | } | 3931 | } |
3915 | return NULL; | 3932 | return NULL; |
3916 | } | 3933 | } |
3934 | EXPORT_SYMBOL(tcp_parse_md5sig_option); | ||
3917 | #endif | 3935 | #endif |
3918 | 3936 | ||
3919 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) | 3937 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) |
@@ -4024,6 +4042,8 @@ static void tcp_reset(struct sock *sk) | |||
4024 | default: | 4042 | default: |
4025 | sk->sk_err = ECONNRESET; | 4043 | sk->sk_err = ECONNRESET; |
4026 | } | 4044 | } |
4045 | /* This barrier is coupled with smp_rmb() in tcp_poll() */ | ||
4046 | smp_wmb(); | ||
4027 | 4047 | ||
4028 | if (!sock_flag(sk, SOCK_DEAD)) | 4048 | if (!sock_flag(sk, SOCK_DEAD)) |
4029 | sk->sk_error_report(sk); | 4049 | sk->sk_error_report(sk); |
@@ -4303,7 +4323,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4303 | } | 4323 | } |
4304 | 4324 | ||
4305 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { | 4325 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { |
4306 | SOCK_DEBUG(sk, "ofo packet was already received \n"); | 4326 | SOCK_DEBUG(sk, "ofo packet was already received\n"); |
4307 | __skb_unlink(skb, &tp->out_of_order_queue); | 4327 | __skb_unlink(skb, &tp->out_of_order_queue); |
4308 | __kfree_skb(skb); | 4328 | __kfree_skb(skb); |
4309 | continue; | 4329 | continue; |
@@ -4351,6 +4371,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
4351 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) | 4371 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) |
4352 | goto drop; | 4372 | goto drop; |
4353 | 4373 | ||
4374 | skb_dst_drop(skb); | ||
4354 | __skb_pull(skb, th->doff * 4); | 4375 | __skb_pull(skb, th->doff * 4); |
4355 | 4376 | ||
4356 | TCP_ECN_accept_cwr(tp, skb); | 4377 | TCP_ECN_accept_cwr(tp, skb); |
@@ -4842,7 +4863,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk) | |||
4842 | return 0; | 4863 | return 0; |
4843 | 4864 | ||
4844 | /* If we are under soft global TCP memory pressure, do not expand. */ | 4865 | /* If we are under soft global TCP memory pressure, do not expand. */ |
4845 | if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) | 4866 | if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) |
4846 | return 0; | 4867 | return 0; |
4847 | 4868 | ||
4848 | /* If we filled the congestion window, do not expand. */ | 4869 | /* If we filled the congestion window, do not expand. */ |
@@ -5414,6 +5435,7 @@ discard: | |||
5414 | __kfree_skb(skb); | 5435 | __kfree_skb(skb); |
5415 | return 0; | 5436 | return 0; |
5416 | } | 5437 | } |
5438 | EXPORT_SYMBOL(tcp_rcv_established); | ||
5417 | 5439 | ||
5418 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | 5440 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, |
5419 | struct tcphdr *th, unsigned len) | 5441 | struct tcphdr *th, unsigned len) |
@@ -5783,11 +5805,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5783 | 5805 | ||
5784 | /* tcp_ack considers this ACK as duplicate | 5806 | /* tcp_ack considers this ACK as duplicate |
5785 | * and does not calculate rtt. | 5807 | * and does not calculate rtt. |
5786 | * Fix it at least with timestamps. | 5808 | * Force it here. |
5787 | */ | 5809 | */ |
5788 | if (tp->rx_opt.saw_tstamp && | 5810 | tcp_ack_update_rtt(sk, 0, 0); |
5789 | tp->rx_opt.rcv_tsecr && !tp->srtt) | ||
5790 | tcp_ack_saw_tstamp(sk, 0); | ||
5791 | 5811 | ||
5792 | if (tp->rx_opt.tstamp_ok) | 5812 | if (tp->rx_opt.tstamp_ok) |
5793 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 5813 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
@@ -5819,7 +5839,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5819 | if (tp->snd_una == tp->write_seq) { | 5839 | if (tp->snd_una == tp->write_seq) { |
5820 | tcp_set_state(sk, TCP_FIN_WAIT2); | 5840 | tcp_set_state(sk, TCP_FIN_WAIT2); |
5821 | sk->sk_shutdown |= SEND_SHUTDOWN; | 5841 | sk->sk_shutdown |= SEND_SHUTDOWN; |
5822 | dst_confirm(sk->sk_dst_cache); | 5842 | dst_confirm(__sk_dst_get(sk)); |
5823 | 5843 | ||
5824 | if (!sock_flag(sk, SOCK_DEAD)) | 5844 | if (!sock_flag(sk, SOCK_DEAD)) |
5825 | /* Wake up lingering close() */ | 5845 | /* Wake up lingering close() */ |
@@ -5915,14 +5935,4 @@ discard: | |||
5915 | } | 5935 | } |
5916 | return 0; | 5936 | return 0; |
5917 | } | 5937 | } |
5918 | |||
5919 | EXPORT_SYMBOL(sysctl_tcp_ecn); | ||
5920 | EXPORT_SYMBOL(sysctl_tcp_reordering); | ||
5921 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | ||
5922 | EXPORT_SYMBOL(tcp_parse_options); | ||
5923 | #ifdef CONFIG_TCP_MD5SIG | ||
5924 | EXPORT_SYMBOL(tcp_parse_md5sig_option); | ||
5925 | #endif | ||
5926 | EXPORT_SYMBOL(tcp_rcv_established); | ||
5927 | EXPORT_SYMBOL(tcp_rcv_state_process); | 5938 | EXPORT_SYMBOL(tcp_rcv_state_process); |
5928 | EXPORT_SYMBOL(tcp_initialize_rcv_mss); | ||