diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 152 |
1 files changed, 74 insertions, 78 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b55f60f6fcbe..bef9f04c22ba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -182,7 +182,7 @@ static void tcp_incr_quickack(struct sock *sk) | |||
182 | icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); | 182 | icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); |
183 | } | 183 | } |
184 | 184 | ||
185 | void tcp_enter_quickack_mode(struct sock *sk) | 185 | static void tcp_enter_quickack_mode(struct sock *sk) |
186 | { | 186 | { |
187 | struct inet_connection_sock *icsk = inet_csk(sk); | 187 | struct inet_connection_sock *icsk = inet_csk(sk); |
188 | tcp_incr_quickack(sk); | 188 | tcp_incr_quickack(sk); |
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk) | |||
259 | int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + | 259 | int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + |
260 | sizeof(struct sk_buff); | 260 | sizeof(struct sk_buff); |
261 | 261 | ||
262 | if (sk->sk_sndbuf < 3 * sndmem) | 262 | if (sk->sk_sndbuf < 3 * sndmem) { |
263 | sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); | 263 | sk->sk_sndbuf = 3 * sndmem; |
264 | if (sk->sk_sndbuf > sysctl_tcp_wmem[2]) | ||
265 | sk->sk_sndbuf = sysctl_tcp_wmem[2]; | ||
266 | } | ||
264 | } | 267 | } |
265 | 268 | ||
266 | /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) | 269 | /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) |
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk) | |||
396 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | 399 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && |
397 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | 400 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && |
398 | !tcp_memory_pressure && | 401 | !tcp_memory_pressure && |
399 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { | 402 | atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { |
400 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | 403 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), |
401 | sysctl_tcp_rmem[2]); | 404 | sysctl_tcp_rmem[2]); |
402 | } | 405 | } |
@@ -428,10 +431,10 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss); | |||
428 | * | 431 | * |
429 | * The algorithm for RTT estimation w/o timestamps is based on | 432 | * The algorithm for RTT estimation w/o timestamps is based on |
430 | * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL. | 433 | * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL. |
431 | * <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps> | 434 | * <http://public.lanl.gov/radiant/pubs.html#DRS> |
432 | * | 435 | * |
433 | * More detail on this code can be found at | 436 | * More detail on this code can be found at |
434 | * <http://www.psc.edu/~jheffner/senior_thesis.ps>, | 437 | * <http://staff.psc.edu/jheffner/>, |
435 | * though this reference is out of date. A new paper | 438 | * though this reference is out of date. A new paper |
436 | * is pending. | 439 | * is pending. |
437 | */ | 440 | */ |
@@ -731,7 +734,7 @@ void tcp_update_metrics(struct sock *sk) | |||
731 | * Reset our results. | 734 | * Reset our results. |
732 | */ | 735 | */ |
733 | if (!(dst_metric_locked(dst, RTAX_RTT))) | 736 | if (!(dst_metric_locked(dst, RTAX_RTT))) |
734 | dst->metrics[RTAX_RTT - 1] = 0; | 737 | dst_metric_set(dst, RTAX_RTT, 0); |
735 | return; | 738 | return; |
736 | } | 739 | } |
737 | 740 | ||
@@ -773,57 +776,48 @@ void tcp_update_metrics(struct sock *sk) | |||
773 | if (dst_metric(dst, RTAX_SSTHRESH) && | 776 | if (dst_metric(dst, RTAX_SSTHRESH) && |
774 | !dst_metric_locked(dst, RTAX_SSTHRESH) && | 777 | !dst_metric_locked(dst, RTAX_SSTHRESH) && |
775 | (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) | 778 | (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) |
776 | dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1; | 779 | dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); |
777 | if (!dst_metric_locked(dst, RTAX_CWND) && | 780 | if (!dst_metric_locked(dst, RTAX_CWND) && |
778 | tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) | 781 | tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) |
779 | dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd; | 782 | dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); |
780 | } else if (tp->snd_cwnd > tp->snd_ssthresh && | 783 | } else if (tp->snd_cwnd > tp->snd_ssthresh && |
781 | icsk->icsk_ca_state == TCP_CA_Open) { | 784 | icsk->icsk_ca_state == TCP_CA_Open) { |
782 | /* Cong. avoidance phase, cwnd is reliable. */ | 785 | /* Cong. avoidance phase, cwnd is reliable. */ |
783 | if (!dst_metric_locked(dst, RTAX_SSTHRESH)) | 786 | if (!dst_metric_locked(dst, RTAX_SSTHRESH)) |
784 | dst->metrics[RTAX_SSTHRESH-1] = | 787 | dst_metric_set(dst, RTAX_SSTHRESH, |
785 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh); | 788 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); |
786 | if (!dst_metric_locked(dst, RTAX_CWND)) | 789 | if (!dst_metric_locked(dst, RTAX_CWND)) |
787 | dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; | 790 | dst_metric_set(dst, RTAX_CWND, |
791 | (dst_metric(dst, RTAX_CWND) + | ||
792 | tp->snd_cwnd) >> 1); | ||
788 | } else { | 793 | } else { |
789 | /* Else slow start did not finish, cwnd is non-sense, | 794 | /* Else slow start did not finish, cwnd is non-sense, |
790 | ssthresh may be also invalid. | 795 | ssthresh may be also invalid. |
791 | */ | 796 | */ |
792 | if (!dst_metric_locked(dst, RTAX_CWND)) | 797 | if (!dst_metric_locked(dst, RTAX_CWND)) |
793 | dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; | 798 | dst_metric_set(dst, RTAX_CWND, |
799 | (dst_metric(dst, RTAX_CWND) + | ||
800 | tp->snd_ssthresh) >> 1); | ||
794 | if (dst_metric(dst, RTAX_SSTHRESH) && | 801 | if (dst_metric(dst, RTAX_SSTHRESH) && |
795 | !dst_metric_locked(dst, RTAX_SSTHRESH) && | 802 | !dst_metric_locked(dst, RTAX_SSTHRESH) && |
796 | tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) | 803 | tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) |
797 | dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; | 804 | dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); |
798 | } | 805 | } |
799 | 806 | ||
800 | if (!dst_metric_locked(dst, RTAX_REORDERING)) { | 807 | if (!dst_metric_locked(dst, RTAX_REORDERING)) { |
801 | if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && | 808 | if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && |
802 | tp->reordering != sysctl_tcp_reordering) | 809 | tp->reordering != sysctl_tcp_reordering) |
803 | dst->metrics[RTAX_REORDERING-1] = tp->reordering; | 810 | dst_metric_set(dst, RTAX_REORDERING, tp->reordering); |
804 | } | 811 | } |
805 | } | 812 | } |
806 | } | 813 | } |
807 | 814 | ||
808 | /* Numbers are taken from RFC3390. | ||
809 | * | ||
810 | * John Heffner states: | ||
811 | * | ||
812 | * The RFC specifies a window of no more than 4380 bytes | ||
813 | * unless 2*MSS > 4380. Reading the pseudocode in the RFC | ||
814 | * is a bit misleading because they use a clamp at 4380 bytes | ||
815 | * rather than use a multiplier in the relevant range. | ||
816 | */ | ||
817 | __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) | 815 | __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) |
818 | { | 816 | { |
819 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); | 817 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); |
820 | 818 | ||
821 | if (!cwnd) { | 819 | if (!cwnd) |
822 | if (tp->mss_cache > 1460) | 820 | cwnd = TCP_INIT_CWND; |
823 | cwnd = 2; | ||
824 | else | ||
825 | cwnd = (tp->mss_cache > 1095) ? 3 : 4; | ||
826 | } | ||
827 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); | 821 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); |
828 | } | 822 | } |
829 | 823 | ||
@@ -922,25 +916,20 @@ static void tcp_init_metrics(struct sock *sk) | |||
922 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 916 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
923 | } | 917 | } |
924 | tcp_set_rto(sk); | 918 | tcp_set_rto(sk); |
925 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) | 919 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) { |
926 | goto reset; | ||
927 | |||
928 | cwnd: | ||
929 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | ||
930 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
931 | return; | ||
932 | |||
933 | reset: | 920 | reset: |
934 | /* Play conservative. If timestamps are not | 921 | /* Play conservative. If timestamps are not |
935 | * supported, TCP will fail to recalculate correct | 922 | * supported, TCP will fail to recalculate correct |
936 | * rtt, if initial rto is too small. FORGET ALL AND RESET! | 923 | * rtt, if initial rto is too small. FORGET ALL AND RESET! |
937 | */ | 924 | */ |
938 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { | 925 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { |
939 | tp->srtt = 0; | 926 | tp->srtt = 0; |
940 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; | 927 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; |
941 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; | 928 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; |
929 | } | ||
942 | } | 930 | } |
943 | goto cwnd; | 931 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); |
932 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
944 | } | 933 | } |
945 | 934 | ||
946 | static void tcp_update_reordering(struct sock *sk, const int metric, | 935 | static void tcp_update_reordering(struct sock *sk, const int metric, |
@@ -1233,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, | |||
1233 | } | 1222 | } |
1234 | 1223 | ||
1235 | /* D-SACK for already forgotten data... Do dumb counting. */ | 1224 | /* D-SACK for already forgotten data... Do dumb counting. */ |
1236 | if (dup_sack && | 1225 | if (dup_sack && tp->undo_marker && tp->undo_retrans && |
1237 | !after(end_seq_0, prior_snd_una) && | 1226 | !after(end_seq_0, prior_snd_una) && |
1238 | after(end_seq_0, tp->undo_marker)) | 1227 | after(end_seq_0, tp->undo_marker)) |
1239 | tp->undo_retrans--; | 1228 | tp->undo_retrans--; |
@@ -1310,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1310 | 1299 | ||
1311 | /* Account D-SACK for retransmitted packet. */ | 1300 | /* Account D-SACK for retransmitted packet. */ |
1312 | if (dup_sack && (sacked & TCPCB_RETRANS)) { | 1301 | if (dup_sack && (sacked & TCPCB_RETRANS)) { |
1313 | if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) | 1302 | if (tp->undo_marker && tp->undo_retrans && |
1303 | after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) | ||
1314 | tp->undo_retrans--; | 1304 | tp->undo_retrans--; |
1315 | if (sacked & TCPCB_SACKED_ACKED) | 1305 | if (sacked & TCPCB_SACKED_ACKED) |
1316 | state->reord = min(fack_count, state->reord); | 1306 | state->reord = min(fack_count, state->reord); |
@@ -2314,7 +2304,7 @@ static inline int tcp_dupack_heuristics(struct tcp_sock *tp) | |||
2314 | 2304 | ||
2315 | static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) | 2305 | static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) |
2316 | { | 2306 | { |
2317 | return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); | 2307 | return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; |
2318 | } | 2308 | } |
2319 | 2309 | ||
2320 | static inline int tcp_head_timedout(struct sock *sk) | 2310 | static inline int tcp_head_timedout(struct sock *sk) |
@@ -2508,7 +2498,7 @@ static void tcp_timeout_skbs(struct sock *sk) | |||
2508 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2498 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is |
2509 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2499 | * is against sacked "cnt", otherwise it's against facked "cnt" |
2510 | */ | 2500 | */ |
2511 | static void tcp_mark_head_lost(struct sock *sk, int packets) | 2501 | static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) |
2512 | { | 2502 | { |
2513 | struct tcp_sock *tp = tcp_sk(sk); | 2503 | struct tcp_sock *tp = tcp_sk(sk); |
2514 | struct sk_buff *skb; | 2504 | struct sk_buff *skb; |
@@ -2516,13 +2506,13 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
2516 | int err; | 2506 | int err; |
2517 | unsigned int mss; | 2507 | unsigned int mss; |
2518 | 2508 | ||
2519 | if (packets == 0) | ||
2520 | return; | ||
2521 | |||
2522 | WARN_ON(packets > tp->packets_out); | 2509 | WARN_ON(packets > tp->packets_out); |
2523 | if (tp->lost_skb_hint) { | 2510 | if (tp->lost_skb_hint) { |
2524 | skb = tp->lost_skb_hint; | 2511 | skb = tp->lost_skb_hint; |
2525 | cnt = tp->lost_cnt_hint; | 2512 | cnt = tp->lost_cnt_hint; |
2513 | /* Head already handled? */ | ||
2514 | if (mark_head && skb != tcp_write_queue_head(sk)) | ||
2515 | return; | ||
2526 | } else { | 2516 | } else { |
2527 | skb = tcp_write_queue_head(sk); | 2517 | skb = tcp_write_queue_head(sk); |
2528 | cnt = 0; | 2518 | cnt = 0; |
@@ -2557,6 +2547,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
2557 | } | 2547 | } |
2558 | 2548 | ||
2559 | tcp_skb_mark_lost(tp, skb); | 2549 | tcp_skb_mark_lost(tp, skb); |
2550 | |||
2551 | if (mark_head) | ||
2552 | break; | ||
2560 | } | 2553 | } |
2561 | tcp_verify_left_out(tp); | 2554 | tcp_verify_left_out(tp); |
2562 | } | 2555 | } |
@@ -2568,17 +2561,18 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) | |||
2568 | struct tcp_sock *tp = tcp_sk(sk); | 2561 | struct tcp_sock *tp = tcp_sk(sk); |
2569 | 2562 | ||
2570 | if (tcp_is_reno(tp)) { | 2563 | if (tcp_is_reno(tp)) { |
2571 | tcp_mark_head_lost(sk, 1); | 2564 | tcp_mark_head_lost(sk, 1, 1); |
2572 | } else if (tcp_is_fack(tp)) { | 2565 | } else if (tcp_is_fack(tp)) { |
2573 | int lost = tp->fackets_out - tp->reordering; | 2566 | int lost = tp->fackets_out - tp->reordering; |
2574 | if (lost <= 0) | 2567 | if (lost <= 0) |
2575 | lost = 1; | 2568 | lost = 1; |
2576 | tcp_mark_head_lost(sk, lost); | 2569 | tcp_mark_head_lost(sk, lost, 0); |
2577 | } else { | 2570 | } else { |
2578 | int sacked_upto = tp->sacked_out - tp->reordering; | 2571 | int sacked_upto = tp->sacked_out - tp->reordering; |
2579 | if (sacked_upto < fast_rexmit) | 2572 | if (sacked_upto >= 0) |
2580 | sacked_upto = fast_rexmit; | 2573 | tcp_mark_head_lost(sk, sacked_upto, 0); |
2581 | tcp_mark_head_lost(sk, sacked_upto); | 2574 | else if (fast_rexmit) |
2575 | tcp_mark_head_lost(sk, 1, 1); | ||
2582 | } | 2576 | } |
2583 | 2577 | ||
2584 | tcp_timeout_skbs(sk); | 2578 | tcp_timeout_skbs(sk); |
@@ -2665,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
2665 | #define DBGUNDO(x...) do { } while (0) | 2659 | #define DBGUNDO(x...) do { } while (0) |
2666 | #endif | 2660 | #endif |
2667 | 2661 | ||
2668 | static void tcp_undo_cwr(struct sock *sk, const int undo) | 2662 | static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) |
2669 | { | 2663 | { |
2670 | struct tcp_sock *tp = tcp_sk(sk); | 2664 | struct tcp_sock *tp = tcp_sk(sk); |
2671 | 2665 | ||
@@ -2677,14 +2671,13 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) | |||
2677 | else | 2671 | else |
2678 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); | 2672 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); |
2679 | 2673 | ||
2680 | if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { | 2674 | if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) { |
2681 | tp->snd_ssthresh = tp->prior_ssthresh; | 2675 | tp->snd_ssthresh = tp->prior_ssthresh; |
2682 | TCP_ECN_withdraw_cwr(tp); | 2676 | TCP_ECN_withdraw_cwr(tp); |
2683 | } | 2677 | } |
2684 | } else { | 2678 | } else { |
2685 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); | 2679 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); |
2686 | } | 2680 | } |
2687 | tcp_moderate_cwnd(tp); | ||
2688 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2681 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2689 | } | 2682 | } |
2690 | 2683 | ||
@@ -2705,7 +2698,7 @@ static int tcp_try_undo_recovery(struct sock *sk) | |||
2705 | * or our original transmission succeeded. | 2698 | * or our original transmission succeeded. |
2706 | */ | 2699 | */ |
2707 | DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); | 2700 | DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); |
2708 | tcp_undo_cwr(sk, 1); | 2701 | tcp_undo_cwr(sk, true); |
2709 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) | 2702 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) |
2710 | mib_idx = LINUX_MIB_TCPLOSSUNDO; | 2703 | mib_idx = LINUX_MIB_TCPLOSSUNDO; |
2711 | else | 2704 | else |
@@ -2732,7 +2725,7 @@ static void tcp_try_undo_dsack(struct sock *sk) | |||
2732 | 2725 | ||
2733 | if (tp->undo_marker && !tp->undo_retrans) { | 2726 | if (tp->undo_marker && !tp->undo_retrans) { |
2734 | DBGUNDO(sk, "D-SACK"); | 2727 | DBGUNDO(sk, "D-SACK"); |
2735 | tcp_undo_cwr(sk, 1); | 2728 | tcp_undo_cwr(sk, true); |
2736 | tp->undo_marker = 0; | 2729 | tp->undo_marker = 0; |
2737 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); | 2730 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); |
2738 | } | 2731 | } |
@@ -2785,7 +2778,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) | |||
2785 | tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); | 2778 | tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); |
2786 | 2779 | ||
2787 | DBGUNDO(sk, "Hoe"); | 2780 | DBGUNDO(sk, "Hoe"); |
2788 | tcp_undo_cwr(sk, 0); | 2781 | tcp_undo_cwr(sk, false); |
2789 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); | 2782 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); |
2790 | 2783 | ||
2791 | /* So... Do not make Hoe's retransmit yet. | 2784 | /* So... Do not make Hoe's retransmit yet. |
@@ -2814,7 +2807,7 @@ static int tcp_try_undo_loss(struct sock *sk) | |||
2814 | 2807 | ||
2815 | DBGUNDO(sk, "partial loss"); | 2808 | DBGUNDO(sk, "partial loss"); |
2816 | tp->lost_out = 0; | 2809 | tp->lost_out = 0; |
2817 | tcp_undo_cwr(sk, 1); | 2810 | tcp_undo_cwr(sk, true); |
2818 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); | 2811 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); |
2819 | inet_csk(sk)->icsk_retransmits = 0; | 2812 | inet_csk(sk)->icsk_retransmits = 0; |
2820 | tp->undo_marker = 0; | 2813 | tp->undo_marker = 0; |
@@ -2828,8 +2821,11 @@ static int tcp_try_undo_loss(struct sock *sk) | |||
2828 | static inline void tcp_complete_cwr(struct sock *sk) | 2821 | static inline void tcp_complete_cwr(struct sock *sk) |
2829 | { | 2822 | { |
2830 | struct tcp_sock *tp = tcp_sk(sk); | 2823 | struct tcp_sock *tp = tcp_sk(sk); |
2831 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | 2824 | /* Do not moderate cwnd if it's already undone in cwr or recovery */ |
2832 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2825 | if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { |
2826 | tp->snd_cwnd = tp->snd_ssthresh; | ||
2827 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2828 | } | ||
2833 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); | 2829 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); |
2834 | } | 2830 | } |
2835 | 2831 | ||
@@ -2887,7 +2883,7 @@ static void tcp_mtup_probe_success(struct sock *sk) | |||
2887 | icsk->icsk_mtup.probe_size; | 2883 | icsk->icsk_mtup.probe_size; |
2888 | tp->snd_cwnd_cnt = 0; | 2884 | tp->snd_cwnd_cnt = 0; |
2889 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2885 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2890 | tp->rcv_ssthresh = tcp_current_ssthresh(sk); | 2886 | tp->snd_ssthresh = tcp_current_ssthresh(sk); |
2891 | 2887 | ||
2892 | icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; | 2888 | icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; |
2893 | icsk->icsk_mtup.probe_size = 0; | 2889 | icsk->icsk_mtup.probe_size = 0; |
@@ -2984,7 +2980,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | |||
2984 | before(tp->snd_una, tp->high_seq) && | 2980 | before(tp->snd_una, tp->high_seq) && |
2985 | icsk->icsk_ca_state != TCP_CA_Open && | 2981 | icsk->icsk_ca_state != TCP_CA_Open && |
2986 | tp->fackets_out > tp->reordering) { | 2982 | tp->fackets_out > tp->reordering) { |
2987 | tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); | 2983 | tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); |
2988 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); | 2984 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); |
2989 | } | 2985 | } |
2990 | 2986 | ||
@@ -3356,7 +3352,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3356 | net_invalid_timestamp())) | 3352 | net_invalid_timestamp())) |
3357 | rtt_us = ktime_us_delta(ktime_get_real(), | 3353 | rtt_us = ktime_us_delta(ktime_get_real(), |
3358 | last_ackt); | 3354 | last_ackt); |
3359 | else if (ca_seq_rtt > 0) | 3355 | else if (ca_seq_rtt >= 0) |
3360 | rtt_us = jiffies_to_usecs(ca_seq_rtt); | 3356 | rtt_us = jiffies_to_usecs(ca_seq_rtt); |
3361 | } | 3357 | } |
3362 | 3358 | ||
@@ -3412,8 +3408,8 @@ static void tcp_ack_probe(struct sock *sk) | |||
3412 | 3408 | ||
3413 | static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) | 3409 | static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) |
3414 | { | 3410 | { |
3415 | return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || | 3411 | return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || |
3416 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open); | 3412 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open; |
3417 | } | 3413 | } |
3418 | 3414 | ||
3419 | static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) | 3415 | static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) |
@@ -3430,9 +3426,9 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, | |||
3430 | const u32 ack, const u32 ack_seq, | 3426 | const u32 ack, const u32 ack_seq, |
3431 | const u32 nwin) | 3427 | const u32 nwin) |
3432 | { | 3428 | { |
3433 | return (after(ack, tp->snd_una) || | 3429 | return after(ack, tp->snd_una) || |
3434 | after(ack_seq, tp->snd_wl1) || | 3430 | after(ack_seq, tp->snd_wl1) || |
3435 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd)); | 3431 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); |
3436 | } | 3432 | } |
3437 | 3433 | ||
3438 | /* Update our send window. | 3434 | /* Update our send window. |
@@ -3500,7 +3496,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag) | |||
3500 | if (flag & FLAG_ECE) | 3496 | if (flag & FLAG_ECE) |
3501 | tcp_ratehalving_spur_to_response(sk); | 3497 | tcp_ratehalving_spur_to_response(sk); |
3502 | else | 3498 | else |
3503 | tcp_undo_cwr(sk, 1); | 3499 | tcp_undo_cwr(sk, true); |
3504 | } | 3500 | } |
3505 | 3501 | ||
3506 | /* F-RTO spurious RTO detection algorithm (RFC4138) | 3502 | /* F-RTO spurious RTO detection algorithm (RFC4138) |
@@ -4406,7 +4402,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
4406 | if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) { | 4402 | if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) { |
4407 | tp->ucopy.len -= chunk; | 4403 | tp->ucopy.len -= chunk; |
4408 | tp->copied_seq += chunk; | 4404 | tp->copied_seq += chunk; |
4409 | eaten = (chunk == skb->len && !th->fin); | 4405 | eaten = (chunk == skb->len); |
4410 | tcp_rcv_space_adjust(sk); | 4406 | tcp_rcv_space_adjust(sk); |
4411 | } | 4407 | } |
4412 | local_bh_disable(); | 4408 | local_bh_disable(); |
@@ -4870,7 +4866,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk) | |||
4870 | return 0; | 4866 | return 0; |
4871 | 4867 | ||
4872 | /* If we are under soft global TCP memory pressure, do not expand. */ | 4868 | /* If we are under soft global TCP memory pressure, do not expand. */ |
4873 | if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) | 4869 | if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) |
4874 | return 0; | 4870 | return 0; |
4875 | 4871 | ||
4876 | /* If we filled the congestion window, do not expand. */ | 4872 | /* If we filled the congestion window, do not expand. */ |