aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/tcp.h6
-rw-r--r--include/net/tcp.h11
-rw-r--r--net/ipv4/tcp_output.c37
3 files changed, 35 insertions, 19 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bc35e4709e8e..a0513210798f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -197,7 +197,8 @@ struct tcp_sock {
197 u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ 197 u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
198 syn_data:1, /* SYN includes data */ 198 syn_data:1, /* SYN includes data */
199 syn_fastopen:1, /* SYN includes Fast Open option */ 199 syn_fastopen:1, /* SYN includes Fast Open option */
200 syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ 200 syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
201 is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
201 u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ 202 u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
202 203
203/* RTT measurement */ 204/* RTT measurement */
@@ -209,6 +210,8 @@ struct tcp_sock {
209 210
210 u32 packets_out; /* Packets which are "in flight" */ 211 u32 packets_out; /* Packets which are "in flight" */
211 u32 retrans_out; /* Retransmitted packets out */ 212 u32 retrans_out; /* Retransmitted packets out */
213 u32 max_packets_out; /* max packets_out in last window */
214 u32 max_packets_seq; /* right edge of max_packets_out flight */
212 215
213 u16 urg_data; /* Saved octet of OOB data and control flags */ 216 u16 urg_data; /* Saved octet of OOB data and control flags */
214 u8 ecn_flags; /* ECN status bits. */ 217 u8 ecn_flags; /* ECN status bits. */
@@ -230,7 +233,6 @@ struct tcp_sock {
230 u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ 233 u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
231 u32 snd_cwnd_used; 234 u32 snd_cwnd_used;
232 u32 snd_cwnd_stamp; 235 u32 snd_cwnd_stamp;
233 u32 lsnd_pending; /* packets inflight or unsent since last xmit */
234 u32 prior_cwnd; /* Congestion window at start of Recovery. */ 236 u32 prior_cwnd; /* Congestion window at start of Recovery. */
235 u32 prr_delivered; /* Number of newly delivered packets to 237 u32 prr_delivered; /* Number of newly delivered packets to
236 * receiver in Recovery. */ 238 * receiver in Recovery. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f5d6ca4a9d28..e80abe4486cb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -971,8 +971,9 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
971 971
972/* We follow the spirit of RFC2861 to validate cwnd but implement a more 972/* We follow the spirit of RFC2861 to validate cwnd but implement a more
973 * flexible approach. The RFC suggests cwnd should not be raised unless 973 * flexible approach. The RFC suggests cwnd should not be raised unless
974 * it was fully used previously. But we allow cwnd to grow as long as the 974 * it was fully used previously. And that's exactly what we do in
975 * application has used half the cwnd. 975 * congestion avoidance mode. But in slow start we allow cwnd to grow
976 * as long as the application has used half the cwnd.
976 * Example : 977 * Example :
977 * cwnd is 10 (IW10), but application sends 9 frames. 978 * cwnd is 10 (IW10), but application sends 9 frames.
978 * We allow cwnd to reach 18 when all frames are ACKed. 979 * We allow cwnd to reach 18 when all frames are ACKed.
@@ -985,7 +986,11 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
985{ 986{
986 const struct tcp_sock *tp = tcp_sk(sk); 987 const struct tcp_sock *tp = tcp_sk(sk);
987 988
988 return tp->snd_cwnd < 2 * tp->lsnd_pending; 989 /* If in slow start, ensure cwnd grows to twice what was ACKed. */
990 if (tp->snd_cwnd <= tp->snd_ssthresh)
991 return tp->snd_cwnd < 2 * tp->max_packets_out;
992
993 return tp->is_cwnd_limited;
989} 994}
990 995
991static inline void tcp_check_probe_timer(struct sock *sk) 996static inline void tcp_check_probe_timer(struct sock *sk)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3d61c52bdf79..d463c35db33d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1402,11 +1402,19 @@ static void tcp_cwnd_application_limited(struct sock *sk)
1402 tp->snd_cwnd_stamp = tcp_time_stamp; 1402 tp->snd_cwnd_stamp = tcp_time_stamp;
1403} 1403}
1404 1404
1405static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs) 1405static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1406{ 1406{
1407 struct tcp_sock *tp = tcp_sk(sk); 1407 struct tcp_sock *tp = tcp_sk(sk);
1408 1408
1409 tp->lsnd_pending = tp->packets_out + unsent_segs; 1409 /* Track the maximum number of outstanding packets in each
1410 * window, and remember whether we were cwnd-limited then.
1411 */
1412 if (!before(tp->snd_una, tp->max_packets_seq) ||
1413 tp->packets_out > tp->max_packets_out) {
1414 tp->max_packets_out = tp->packets_out;
1415 tp->max_packets_seq = tp->snd_nxt;
1416 tp->is_cwnd_limited = is_cwnd_limited;
1417 }
1410 1418
1411 if (tcp_is_cwnd_limited(sk)) { 1419 if (tcp_is_cwnd_limited(sk)) {
1412 /* Network is feed fully. */ 1420 /* Network is feed fully. */
@@ -1660,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1660 * 1668 *
1661 * This algorithm is from John Heffner. 1669 * This algorithm is from John Heffner.
1662 */ 1670 */
1663static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) 1671static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1672 bool *is_cwnd_limited)
1664{ 1673{
1665 struct tcp_sock *tp = tcp_sk(sk); 1674 struct tcp_sock *tp = tcp_sk(sk);
1666 const struct inet_connection_sock *icsk = inet_csk(sk); 1675 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1724,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1724 if (!tp->tso_deferred) 1733 if (!tp->tso_deferred)
1725 tp->tso_deferred = 1 | (jiffies << 1); 1734 tp->tso_deferred = 1 | (jiffies << 1);
1726 1735
1736 if (cong_win < send_win && cong_win < skb->len)
1737 *is_cwnd_limited = true;
1738
1727 return true; 1739 return true;
1728 1740
1729send_now: 1741send_now:
@@ -1881,9 +1893,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1881{ 1893{
1882 struct tcp_sock *tp = tcp_sk(sk); 1894 struct tcp_sock *tp = tcp_sk(sk);
1883 struct sk_buff *skb; 1895 struct sk_buff *skb;
1884 unsigned int tso_segs, sent_pkts, unsent_segs = 0; 1896 unsigned int tso_segs, sent_pkts;
1885 int cwnd_quota; 1897 int cwnd_quota;
1886 int result; 1898 int result;
1899 bool is_cwnd_limited = false;
1887 1900
1888 sent_pkts = 0; 1901 sent_pkts = 0;
1889 1902
@@ -1908,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1908 1921
1909 cwnd_quota = tcp_cwnd_test(tp, skb); 1922 cwnd_quota = tcp_cwnd_test(tp, skb);
1910 if (!cwnd_quota) { 1923 if (!cwnd_quota) {
1924 is_cwnd_limited = true;
1911 if (push_one == 2) 1925 if (push_one == 2)
1912 /* Force out a loss probe pkt. */ 1926 /* Force out a loss probe pkt. */
1913 cwnd_quota = 1; 1927 cwnd_quota = 1;
@@ -1924,8 +1938,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1924 nonagle : TCP_NAGLE_PUSH)))) 1938 nonagle : TCP_NAGLE_PUSH))))
1925 break; 1939 break;
1926 } else { 1940 } else {
1927 if (!push_one && tcp_tso_should_defer(sk, skb)) 1941 if (!push_one &&
1928 goto compute_unsent_segs; 1942 tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
1943 break;
1929 } 1944 }
1930 1945
1931 /* TCP Small Queues : 1946 /* TCP Small Queues :
@@ -1950,14 +1965,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1950 * there is no smp_mb__after_set_bit() yet 1965 * there is no smp_mb__after_set_bit() yet
1951 */ 1966 */
1952 smp_mb__after_clear_bit(); 1967 smp_mb__after_clear_bit();
1953 if (atomic_read(&sk->sk_wmem_alloc) > limit) { 1968 if (atomic_read(&sk->sk_wmem_alloc) > limit)
1954 u32 unsent_bytes;
1955
1956compute_unsent_segs:
1957 unsent_bytes = tp->write_seq - tp->snd_nxt;
1958 unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
1959 break; 1969 break;
1960 }
1961 } 1970 }
1962 1971
1963 limit = mss_now; 1972 limit = mss_now;
@@ -1997,7 +2006,7 @@ repair:
1997 /* Send one loss probe per tail loss episode. */ 2006 /* Send one loss probe per tail loss episode. */
1998 if (push_one != 2) 2007 if (push_one != 2)
1999 tcp_schedule_loss_probe(sk); 2008 tcp_schedule_loss_probe(sk);
2000 tcp_cwnd_validate(sk, unsent_segs); 2009 tcp_cwnd_validate(sk, is_cwnd_limited);
2001 return false; 2010 return false;
2002 } 2011 }
2003 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); 2012 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));