diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 45 |
1 files changed, 28 insertions, 17 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eda4f4a233f3..b54d9d37b636 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -66,6 +66,7 @@ | |||
66 | #include <linux/mm.h> | 66 | #include <linux/mm.h> |
67 | #include <linux/module.h> | 67 | #include <linux/module.h> |
68 | #include <linux/sysctl.h> | 68 | #include <linux/sysctl.h> |
69 | #include <net/dst.h> | ||
69 | #include <net/tcp.h> | 70 | #include <net/tcp.h> |
70 | #include <net/inet_common.h> | 71 | #include <net/inet_common.h> |
71 | #include <linux/ipsec.h> | 72 | #include <linux/ipsec.h> |
@@ -113,8 +114,6 @@ int sysctl_tcp_abc __read_mostly; | |||
113 | #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) | 114 | #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) |
114 | #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) | 115 | #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) |
115 | 116 | ||
116 | #define IsSackFrto() (sysctl_tcp_frto == 0x2) | ||
117 | |||
118 | #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) | 117 | #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) |
119 | #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) | 118 | #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) |
120 | 119 | ||
@@ -605,7 +604,7 @@ static u32 tcp_rto_min(struct sock *sk) | |||
605 | u32 rto_min = TCP_RTO_MIN; | 604 | u32 rto_min = TCP_RTO_MIN; |
606 | 605 | ||
607 | if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) | 606 | if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) |
608 | rto_min = dst->metrics[RTAX_RTO_MIN - 1]; | 607 | rto_min = dst_metric(dst, RTAX_RTO_MIN); |
609 | return rto_min; | 608 | return rto_min; |
610 | } | 609 | } |
611 | 610 | ||
@@ -769,7 +768,7 @@ void tcp_update_metrics(struct sock *sk) | |||
769 | dst->metrics[RTAX_RTTVAR - 1] = m; | 768 | dst->metrics[RTAX_RTTVAR - 1] = m; |
770 | else | 769 | else |
771 | dst->metrics[RTAX_RTTVAR-1] -= | 770 | dst->metrics[RTAX_RTTVAR-1] -= |
772 | (dst->metrics[RTAX_RTTVAR-1] - m)>>2; | 771 | (dst_metric(dst, RTAX_RTTVAR) - m)>>2; |
773 | } | 772 | } |
774 | 773 | ||
775 | if (tp->snd_ssthresh >= 0xFFFF) { | 774 | if (tp->snd_ssthresh >= 0xFFFF) { |
@@ -788,21 +787,21 @@ void tcp_update_metrics(struct sock *sk) | |||
788 | dst->metrics[RTAX_SSTHRESH-1] = | 787 | dst->metrics[RTAX_SSTHRESH-1] = |
789 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh); | 788 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh); |
790 | if (!dst_metric_locked(dst, RTAX_CWND)) | 789 | if (!dst_metric_locked(dst, RTAX_CWND)) |
791 | dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1; | 790 | dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; |
792 | } else { | 791 | } else { |
793 | /* Else slow start did not finish, cwnd is non-sense, | 792 | /* Else slow start did not finish, cwnd is non-sense, |
794 | ssthresh may be also invalid. | 793 | ssthresh may be also invalid. |
795 | */ | 794 | */ |
796 | if (!dst_metric_locked(dst, RTAX_CWND)) | 795 | if (!dst_metric_locked(dst, RTAX_CWND)) |
797 | dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1; | 796 | dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; |
798 | if (dst->metrics[RTAX_SSTHRESH-1] && | 797 | if (dst_metric(dst, RTAX_SSTHRESH) && |
799 | !dst_metric_locked(dst, RTAX_SSTHRESH) && | 798 | !dst_metric_locked(dst, RTAX_SSTHRESH) && |
800 | tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1]) | 799 | tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) |
801 | dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; | 800 | dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; |
802 | } | 801 | } |
803 | 802 | ||
804 | if (!dst_metric_locked(dst, RTAX_REORDERING)) { | 803 | if (!dst_metric_locked(dst, RTAX_REORDERING)) { |
805 | if (dst->metrics[RTAX_REORDERING-1] < tp->reordering && | 804 | if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && |
806 | tp->reordering != sysctl_tcp_reordering) | 805 | tp->reordering != sysctl_tcp_reordering) |
807 | dst->metrics[RTAX_REORDERING-1] = tp->reordering; | 806 | dst->metrics[RTAX_REORDERING-1] = tp->reordering; |
808 | } | 807 | } |
@@ -1685,6 +1684,11 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) | |||
1685 | tp->sacked_out = 0; | 1684 | tp->sacked_out = 0; |
1686 | } | 1685 | } |
1687 | 1686 | ||
1687 | static int tcp_is_sackfrto(const struct tcp_sock *tp) | ||
1688 | { | ||
1689 | return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); | ||
1690 | } | ||
1691 | |||
1688 | /* F-RTO can only be used if TCP has never retransmitted anything other than | 1692 | /* F-RTO can only be used if TCP has never retransmitted anything other than |
1689 | * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) | 1693 | * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) |
1690 | */ | 1694 | */ |
@@ -1701,7 +1705,7 @@ int tcp_use_frto(struct sock *sk) | |||
1701 | if (icsk->icsk_mtup.probe_size) | 1705 | if (icsk->icsk_mtup.probe_size) |
1702 | return 0; | 1706 | return 0; |
1703 | 1707 | ||
1704 | if (IsSackFrto()) | 1708 | if (tcp_is_sackfrto(tp)) |
1705 | return 1; | 1709 | return 1; |
1706 | 1710 | ||
1707 | /* Avoid expensive walking of rexmit queue if possible */ | 1711 | /* Avoid expensive walking of rexmit queue if possible */ |
@@ -1791,7 +1795,7 @@ void tcp_enter_frto(struct sock *sk) | |||
1791 | /* Earlier loss recovery underway (see RFC4138; Appendix B). | 1795 | /* Earlier loss recovery underway (see RFC4138; Appendix B). |
1792 | * The last condition is necessary at least in tp->frto_counter case. | 1796 | * The last condition is necessary at least in tp->frto_counter case. |
1793 | */ | 1797 | */ |
1794 | if (IsSackFrto() && (tp->frto_counter || | 1798 | if (tcp_is_sackfrto(tp) && (tp->frto_counter || |
1795 | ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && | 1799 | ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && |
1796 | after(tp->high_seq, tp->snd_una)) { | 1800 | after(tp->high_seq, tp->snd_una)) { |
1797 | tp->frto_highmark = tp->high_seq; | 1801 | tp->frto_highmark = tp->high_seq; |
@@ -1838,9 +1842,16 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) | |||
1838 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1842 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1839 | } | 1843 | } |
1840 | 1844 | ||
1841 | /* Don't lost mark skbs that were fwd transmitted after RTO */ | 1845 | /* Marking forward transmissions that were made after RTO lost |
1842 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) && | 1846 | * can cause unnecessary retransmissions in some scenarios, |
1843 | !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { | 1847 | * SACK blocks will mitigate that in some but not in all cases. |
1848 | * We used to not mark them but it was causing break-ups with | ||
1849 | * receivers that do only in-order receival. | ||
1850 | * | ||
1851 | * TODO: we could detect presence of such receiver and select | ||
1852 | * different behavior per flow. | ||
1853 | */ | ||
1854 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { | ||
1844 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1855 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1845 | tp->lost_out += tcp_skb_pcount(skb); | 1856 | tp->lost_out += tcp_skb_pcount(skb); |
1846 | } | 1857 | } |
@@ -1856,7 +1867,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) | |||
1856 | tp->reordering = min_t(unsigned int, tp->reordering, | 1867 | tp->reordering = min_t(unsigned int, tp->reordering, |
1857 | sysctl_tcp_reordering); | 1868 | sysctl_tcp_reordering); |
1858 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1869 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1859 | tp->high_seq = tp->frto_highmark; | 1870 | tp->high_seq = tp->snd_nxt; |
1860 | TCP_ECN_queue_cwr(tp); | 1871 | TCP_ECN_queue_cwr(tp); |
1861 | 1872 | ||
1862 | tcp_clear_retrans_hints_partial(tp); | 1873 | tcp_clear_retrans_hints_partial(tp); |
@@ -2478,7 +2489,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) | |||
2478 | 2489 | ||
2479 | tcp_verify_left_out(tp); | 2490 | tcp_verify_left_out(tp); |
2480 | 2491 | ||
2481 | if (tp->retrans_out == 0) | 2492 | if (!tp->frto_counter && tp->retrans_out == 0) |
2482 | tp->retrans_stamp = 0; | 2493 | tp->retrans_stamp = 0; |
2483 | 2494 | ||
2484 | if (flag & FLAG_ECE) | 2495 | if (flag & FLAG_ECE) |
@@ -3123,7 +3134,7 @@ static int tcp_process_frto(struct sock *sk, int flag) | |||
3123 | return 1; | 3134 | return 1; |
3124 | } | 3135 | } |
3125 | 3136 | ||
3126 | if (!IsSackFrto() || tcp_is_reno(tp)) { | 3137 | if (!tcp_is_sackfrto(tp)) { |
3127 | /* RFC4138 shortcoming in step 2; should also have case c): | 3138 | /* RFC4138 shortcoming in step 2; should also have case c): |
3128 | * ACK isn't duplicate nor advances window, e.g., opposite dir | 3139 | * ACK isn't duplicate nor advances window, e.g., opposite dir |
3129 | * data, winupdate | 3140 | * data, winupdate |