aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-02-26 17:10:18 -0500
committerDavid S. Miller <davem@davemloft.net>2015-02-28 15:10:39 -0500
commit5f852eb536ad651b8734559dcf4353514cb0bea3 (patch)
treee6661e5fba290360597892ec96a58444db7c78a0
parent6588af614e7b79294fbcd4a666a7422c0c854e80 (diff)
tcp: tso: remove tp->tso_deferred
TSO relies on ability to defer sending a small amount of packets. Heuristic is to wait for future ACKS in hope to send more packets at once. Current algorithm uses a per socket tso_deferred field as a pseudo timer. This pseudo timer relies on future ACK, but there is no guarantee we receive them in time. Fix would be to use a real timer, but cost of such timer is probably too expensive for typical cases. This patch changes the logic to test the time of last transmit, because we should not add bursts of more than 1ms for any given flow. We've used this patch for about two years at Google, before FQ/pacing as it would reduce a fair amount of bursts. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/tcp.h1
-rw-r--r--net/ipv4/tcp_output.c14
2 files changed, 5 insertions, 10 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1a7adb411647..97dbf16f7d9d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -236,7 +236,6 @@ struct tcp_sock {
236 u32 lost_out; /* Lost packets */ 236 u32 lost_out; /* Lost packets */
237 u32 sacked_out; /* SACK'd packets */ 237 u32 sacked_out; /* SACK'd packets */
238 u32 fackets_out; /* FACK'd packets */ 238 u32 fackets_out; /* FACK'd packets */
239 u32 tso_deferred;
240 239
241 /* from STCP, retrans queue hinting */ 240 /* from STCP, retrans queue hinting */
242 struct sk_buff* lost_skb_hint; 241 struct sk_buff* lost_skb_hint;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a2a796c5536b..cb95c7a9d1e7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1763,9 +1763,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1763 if (icsk->icsk_ca_state != TCP_CA_Open) 1763 if (icsk->icsk_ca_state != TCP_CA_Open)
1764 goto send_now; 1764 goto send_now;
1765 1765
1766 /* Defer for less than two clock ticks. */ 1766 /* Avoid bursty behavior by allowing defer
1767 if (tp->tso_deferred && 1767 * only if the last write was recent.
1768 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) 1768 */
1769 if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
1769 goto send_now; 1770 goto send_now;
1770 1771
1771 in_flight = tcp_packets_in_flight(tp); 1772 in_flight = tcp_packets_in_flight(tp);
@@ -1807,11 +1808,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1807 goto send_now; 1808 goto send_now;
1808 } 1809 }
1809 1810
1810 /* Ok, it looks like it is advisable to defer. 1811 /* Ok, it looks like it is advisable to defer. */
1811 * Do not rearm the timer if already set to not break TCP ACK clocking.
1812 */
1813 if (!tp->tso_deferred)
1814 tp->tso_deferred = 1 | (jiffies << 1);
1815 1812
1816 if (cong_win < send_win && cong_win < skb->len) 1813 if (cong_win < send_win && cong_win < skb->len)
1817 *is_cwnd_limited = true; 1814 *is_cwnd_limited = true;
@@ -1819,7 +1816,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1819 return true; 1816 return true;
1820 1817
1821send_now: 1818send_now:
1822 tp->tso_deferred = 0;
1823 return false; 1819 return false;
1824} 1820}
1825 1821