diff options
author | Eric Dumazet <edumazet@google.com> | 2015-02-26 17:10:18 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-02-28 15:10:39 -0500 |
commit | 5f852eb536ad651b8734559dcf4353514cb0bea3 (patch) | |
tree | e6661e5fba290360597892ec96a58444db7c78a0 | |
parent | 6588af614e7b79294fbcd4a666a7422c0c854e80 (diff) |
tcp: tso: remove tp->tso_deferred
TSO relies on ability to defer sending a small amount of packets.
Heuristic is to wait for future ACKS in hope to send more packets at once.
Current algorithm uses a per socket tso_deferred field as a pseudo timer.
This pseudo timer relies on future ACK, but there is no guarantee
we receive them in time.
Fix would be to use a real timer, but cost of such timer is probably too
expensive for typical cases.
This patch changes the logic to test the time of last transmit,
because we should not add bursts of more than 1ms for any given flow.
We've used this patch for about two years at Google, before FQ/pacing
as it would reduce a fair amount of bursts.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/tcp.h | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 14 |
2 files changed, 5 insertions, 10 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1a7adb411647..97dbf16f7d9d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -236,7 +236,6 @@ struct tcp_sock { | |||
236 | u32 lost_out; /* Lost packets */ | 236 | u32 lost_out; /* Lost packets */ |
237 | u32 sacked_out; /* SACK'd packets */ | 237 | u32 sacked_out; /* SACK'd packets */ |
238 | u32 fackets_out; /* FACK'd packets */ | 238 | u32 fackets_out; /* FACK'd packets */ |
239 | u32 tso_deferred; | ||
240 | 239 | ||
241 | /* from STCP, retrans queue hinting */ | 240 | /* from STCP, retrans queue hinting */ |
242 | struct sk_buff* lost_skb_hint; | 241 | struct sk_buff* lost_skb_hint; |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a2a796c5536b..cb95c7a9d1e7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1763,9 +1763,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, | |||
1763 | if (icsk->icsk_ca_state != TCP_CA_Open) | 1763 | if (icsk->icsk_ca_state != TCP_CA_Open) |
1764 | goto send_now; | 1764 | goto send_now; |
1765 | 1765 | ||
1766 | /* Defer for less than two clock ticks. */ | 1766 | /* Avoid bursty behavior by allowing defer |
1767 | if (tp->tso_deferred && | 1767 | * only if the last write was recent. |
1768 | (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) | 1768 | */ |
1769 | if ((s32)(tcp_time_stamp - tp->lsndtime) > 0) | ||
1769 | goto send_now; | 1770 | goto send_now; |
1770 | 1771 | ||
1771 | in_flight = tcp_packets_in_flight(tp); | 1772 | in_flight = tcp_packets_in_flight(tp); |
@@ -1807,11 +1808,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, | |||
1807 | goto send_now; | 1808 | goto send_now; |
1808 | } | 1809 | } |
1809 | 1810 | ||
1810 | /* Ok, it looks like it is advisable to defer. | 1811 | /* Ok, it looks like it is advisable to defer. */ |
1811 | * Do not rearm the timer if already set to not break TCP ACK clocking. | ||
1812 | */ | ||
1813 | if (!tp->tso_deferred) | ||
1814 | tp->tso_deferred = 1 | (jiffies << 1); | ||
1815 | 1812 | ||
1816 | if (cong_win < send_win && cong_win < skb->len) | 1813 | if (cong_win < send_win && cong_win < skb->len) |
1817 | *is_cwnd_limited = true; | 1814 | *is_cwnd_limited = true; |
@@ -1819,7 +1816,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, | |||
1819 | return true; | 1816 | return true; |
1820 | 1817 | ||
1821 | send_now: | 1818 | send_now: |
1822 | tp->tso_deferred = 0; | ||
1823 | return false; | 1819 | return false; |
1824 | } | 1820 | } |
1825 | 1821 | ||