diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 128 |
1 files changed, 124 insertions, 4 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2b4461074da..beb63dbc85f5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -74,6 +74,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
74 | /* Account for new data that has been sent to the network. */ | 74 | /* Account for new data that has been sent to the network. */ |
75 | static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) | 75 | static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) |
76 | { | 76 | { |
77 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
77 | struct tcp_sock *tp = tcp_sk(sk); | 78 | struct tcp_sock *tp = tcp_sk(sk); |
78 | unsigned int prior_packets = tp->packets_out; | 79 | unsigned int prior_packets = tp->packets_out; |
79 | 80 | ||
@@ -85,7 +86,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) | |||
85 | tp->frto_counter = 3; | 86 | tp->frto_counter = 3; |
86 | 87 | ||
87 | tp->packets_out += tcp_skb_pcount(skb); | 88 | tp->packets_out += tcp_skb_pcount(skb); |
88 | if (!prior_packets || tp->early_retrans_delayed) | 89 | if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || |
90 | icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) | ||
89 | tcp_rearm_rto(sk); | 91 | tcp_rearm_rto(sk); |
90 | } | 92 | } |
91 | 93 | ||
@@ -1959,6 +1961,9 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1959 | * snd_up-64k-mss .. snd_up cannot be large. However, taking into | 1961 | * snd_up-64k-mss .. snd_up cannot be large. However, taking into |
1960 | * account rare use of URG, this is not a big flaw. | 1962 | * account rare use of URG, this is not a big flaw. |
1961 | * | 1963 | * |
1964 | * Send at most one packet when push_one > 0. Temporarily ignore | ||
1965 | * cwnd limit to force at most one packet out when push_one == 2. | ||
1966 | |||
1962 | * Returns true, if no segments are in flight and we have queued segments, | 1967 | * Returns true, if no segments are in flight and we have queued segments, |
1963 | * but cannot send anything now because of SWS or another problem. | 1968 | * but cannot send anything now because of SWS or another problem. |
1964 | */ | 1969 | */ |
@@ -1994,8 +1999,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1994 | goto repair; /* Skip network transmission */ | 1999 | goto repair; /* Skip network transmission */ |
1995 | 2000 | ||
1996 | cwnd_quota = tcp_cwnd_test(tp, skb); | 2001 | cwnd_quota = tcp_cwnd_test(tp, skb); |
1997 | if (!cwnd_quota) | 2002 | if (!cwnd_quota) { |
1998 | break; | 2003 | if (push_one == 2) |
2004 | /* Force out a loss probe pkt. */ | ||
2005 | cwnd_quota = 1; | ||
2006 | else | ||
2007 | break; | ||
2008 | } | ||
1999 | 2009 | ||
2000 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) | 2010 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) |
2001 | break; | 2011 | break; |
@@ -2049,10 +2059,120 @@ repair: | |||
2049 | if (likely(sent_pkts)) { | 2059 | if (likely(sent_pkts)) { |
2050 | if (tcp_in_cwnd_reduction(sk)) | 2060 | if (tcp_in_cwnd_reduction(sk)) |
2051 | tp->prr_out += sent_pkts; | 2061 | tp->prr_out += sent_pkts; |
2062 | |||
2063 | /* Send one loss probe per tail loss episode. */ | ||
2064 | if (push_one != 2) | ||
2065 | tcp_schedule_loss_probe(sk); | ||
2052 | tcp_cwnd_validate(sk); | 2066 | tcp_cwnd_validate(sk); |
2053 | return false; | 2067 | return false; |
2054 | } | 2068 | } |
2055 | return !tp->packets_out && tcp_send_head(sk); | 2069 | return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); |
2070 | } | ||
2071 | |||
2072 | bool tcp_schedule_loss_probe(struct sock *sk) | ||
2073 | { | ||
2074 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2075 | struct tcp_sock *tp = tcp_sk(sk); | ||
2076 | u32 timeout, tlp_time_stamp, rto_time_stamp; | ||
2077 | u32 rtt = tp->srtt >> 3; | ||
2078 | |||
2079 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) | ||
2080 | return false; | ||
2081 | /* No consecutive loss probes. */ | ||
2082 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { | ||
2083 | tcp_rearm_rto(sk); | ||
2084 | return false; | ||
2085 | } | ||
2086 | /* Don't do any loss probe on a Fast Open connection before 3WHS | ||
2087 | * finishes. | ||
2088 | */ | ||
2089 | if (sk->sk_state == TCP_SYN_RECV) | ||
2090 | return false; | ||
2091 | |||
2092 | /* TLP is only scheduled when next timer event is RTO. */ | ||
2093 | if (icsk->icsk_pending != ICSK_TIME_RETRANS) | ||
2094 | return false; | ||
2095 | |||
2096 | /* Schedule a loss probe in 2*RTT for SACK capable connections | ||
2097 | * in Open state, that are either limited by cwnd or application. | ||
2098 | */ | ||
2099 | if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || | ||
2100 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | ||
2101 | return false; | ||
2102 | |||
2103 | if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && | ||
2104 | tcp_send_head(sk)) | ||
2105 | return false; | ||
2106 | |||
2107 | /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account | ||
2108 | * for delayed ack when there's one outstanding packet. | ||
2109 | */ | ||
2110 | timeout = rtt << 1; | ||
2111 | if (tp->packets_out == 1) | ||
2112 | timeout = max_t(u32, timeout, | ||
2113 | (rtt + (rtt >> 1) + TCP_DELACK_MAX)); | ||
2114 | timeout = max_t(u32, timeout, msecs_to_jiffies(10)); | ||
2115 | |||
2116 | /* If RTO is shorter, just schedule TLP in its place. */ | ||
2117 | tlp_time_stamp = tcp_time_stamp + timeout; | ||
2118 | rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; | ||
2119 | if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { | ||
2120 | s32 delta = rto_time_stamp - tcp_time_stamp; | ||
2121 | if (delta > 0) | ||
2122 | timeout = delta; | ||
2123 | } | ||
2124 | |||
2125 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, | ||
2126 | TCP_RTO_MAX); | ||
2127 | return true; | ||
2128 | } | ||
2129 | |||
2130 | /* When probe timeout (PTO) fires, send a new segment if one exists, else | ||
2131 | * retransmit the last segment. | ||
2132 | */ | ||
2133 | void tcp_send_loss_probe(struct sock *sk) | ||
2134 | { | ||
2135 | struct sk_buff *skb; | ||
2136 | int pcount; | ||
2137 | int mss = tcp_current_mss(sk); | ||
2138 | int err = -1; | ||
2139 | |||
2140 | if (tcp_send_head(sk) != NULL) { | ||
2141 | err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); | ||
2142 | goto rearm_timer; | ||
2143 | } | ||
2144 | |||
2145 | /* Retransmit last segment. */ | ||
2146 | skb = tcp_write_queue_tail(sk); | ||
2147 | if (WARN_ON(!skb)) | ||
2148 | goto rearm_timer; | ||
2149 | |||
2150 | pcount = tcp_skb_pcount(skb); | ||
2151 | if (WARN_ON(!pcount)) | ||
2152 | goto rearm_timer; | ||
2153 | |||
2154 | if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { | ||
2155 | if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) | ||
2156 | goto rearm_timer; | ||
2157 | skb = tcp_write_queue_tail(sk); | ||
2158 | } | ||
2159 | |||
2160 | if (WARN_ON(!skb || !tcp_skb_pcount(skb))) | ||
2161 | goto rearm_timer; | ||
2162 | |||
2163 | /* Probe with zero data doesn't trigger fast recovery. */ | ||
2164 | if (skb->len > 0) | ||
2165 | err = __tcp_retransmit_skb(sk, skb); | ||
2166 | |||
2167 | rearm_timer: | ||
2168 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
2169 | inet_csk(sk)->icsk_rto, | ||
2170 | TCP_RTO_MAX); | ||
2171 | |||
2172 | if (likely(!err)) | ||
2173 | NET_INC_STATS_BH(sock_net(sk), | ||
2174 | LINUX_MIB_TCPLOSSPROBES); | ||
2175 | return; | ||
2056 | } | 2176 | } |
2057 | 2177 | ||
2058 | /* Push out any pending frames which were held back due to | 2178 | /* Push out any pending frames which were held back due to |