aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2018-05-17 17:47:26 -0400
committerDavid S. Miller <davem@davemloft.net>2018-05-18 11:40:27 -0400
commit5d9f4262b7ea41ca9981cc790e37cca6e37c789e (patch)
treecae663c7a66f7a37aeb26d1587012b9d91fb1a2c /net/ipv4/tcp_input.c
parenta3893637e1eb0ef5eb1bbc52b3a8d2dfa317a35d (diff)
tcp: add SACK compression
When TCP receives an out-of-order packet, it immediately sends a SACK packet, generating network load but also forcing the receiver to send 1-MSS pathological packets, increasing its RTX queue length/depth, and thus processing time. Wifi networks suffer from this aggressive behavior, but generally speaking, all these SACK packets add fuel to the fire when networks are under congestion. This patch adds a high resolution timer and tp->compressed_ack counter. Instead of sending a SACK, we program this timer with a small delay, based on RTT and capped to 1 ms : delay = min ( 5 % of RTT, 1 ms) If subsequent SACKs need to be sent while the timer has not yet expired, we simply increment tp->compressed_ack. When timer expires, a SACK is sent with the latest information. Whenever an ACK is sent (if data is sent, or if in-order data is received) timer is canceled. Note that tcp_sack_new_ofo_skb() is able to force a SACK to be sent if the sack blocks need to be shuffled, even if the timer has not expired. A new SNMP counter is added in the following patch. Two other patches add sysctls to allow changing the 1,000,000 and 44 values that this commit hard-coded. Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Acked-by: Toke Høiland-Jørgensen <toke@toke.dk> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c35
1 files changed, 29 insertions, 6 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f5622b250665..cc2ac5346b92 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4249,6 +4249,8 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4249 * If the sack array is full, forget about the last one. 4249 * If the sack array is full, forget about the last one.
4250 */ 4250 */
4251 if (this_sack >= TCP_NUM_SACKS) { 4251 if (this_sack >= TCP_NUM_SACKS) {
4252 if (tp->compressed_ack)
4253 tcp_send_ack(sk);
4252 this_sack--; 4254 this_sack--;
4253 tp->rx_opt.num_sacks--; 4255 tp->rx_opt.num_sacks--;
4254 sp--; 4256 sp--;
@@ -5081,6 +5083,7 @@ static inline void tcp_data_snd_check(struct sock *sk)
5081static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) 5083static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5082{ 5084{
5083 struct tcp_sock *tp = tcp_sk(sk); 5085 struct tcp_sock *tp = tcp_sk(sk);
5086 unsigned long rtt, delay;
5084 5087
5085 /* More than one full frame received... */ 5088 /* More than one full frame received... */
5086 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && 5089 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
@@ -5092,15 +5095,35 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
5092 (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || 5095 (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
5093 __tcp_select_window(sk) >= tp->rcv_wnd)) || 5096 __tcp_select_window(sk) >= tp->rcv_wnd)) ||
5094 /* We ACK each frame or... */ 5097 /* We ACK each frame or... */
5095 tcp_in_quickack_mode(sk) || 5098 tcp_in_quickack_mode(sk)) {
5096 /* We have out of order data. */ 5099send_now:
5097 (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) {
5098 /* Then ack it now */
5099 tcp_send_ack(sk); 5100 tcp_send_ack(sk);
5100 } else { 5101 return;
5101 /* Else, send delayed ack. */ 5102 }
5103
5104 if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
5102 tcp_send_delayed_ack(sk); 5105 tcp_send_delayed_ack(sk);
5106 return;
5103 } 5107 }
5108
5109 if (!tcp_is_sack(tp) || tp->compressed_ack >= 44)
5110 goto send_now;
5111 tp->compressed_ack++;
5112
5113 if (hrtimer_is_queued(&tp->compressed_ack_timer))
5114 return;
5115
5116 /* compress ack timer : 5 % of rtt, but no more than 1 ms */
5117
5118 rtt = tp->rcv_rtt_est.rtt_us;
5119 if (tp->srtt_us && tp->srtt_us < rtt)
5120 rtt = tp->srtt_us;
5121
5122 delay = min_t(unsigned long, NSEC_PER_MSEC,
5123 rtt * (NSEC_PER_USEC >> 3)/20);
5124 sock_hold(sk);
5125 hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
5126 HRTIMER_MODE_REL_PINNED_SOFT);
5104} 5127}
5105 5128
5106static inline void tcp_ack_snd_check(struct sock *sk) 5129static inline void tcp_ack_snd_check(struct sock *sk)