diff options
author | David S. Miller <davem@davemloft.net> | 2015-05-09 16:42:32 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-05-09 16:42:32 -0400 |
commit | 82ae9c6060c6dbaf103273a5c51b8f58b951d9a2 (patch) | |
tree | 45729a77a2017ec24e8185e7279b97a2019f87fe /net | |
parent | b063bc5ea77b1c1c0e7798f641f53504d0f64bf8 (diff) | |
parent | e520af48c7e5acae5f17f82a79ba7ab7cf156f3b (diff) |
Merge branch 'tcp-more-reliable-window-probes'
Eric Dumazet says:
====================
tcp: more reliable window probes
This series address a problem caused by small rto_min timers in DC,
leading to either timer storms or early flow terminations.
We also add two new SNMP counters for proper monitoring :
TCPWinProbe and TCPKeepAlive
v2: added TCPKeepAlive counter, as suggested by Yuchung & Neal
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/proc.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 15 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 2 |
4 files changed, 12 insertions, 9 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index e1f3b911dd1e..da5d483e236a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -298,6 +298,8 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
298 | SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2), | 298 | SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2), |
299 | SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT), | 299 | SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT), |
300 | SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE), | 300 | SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE), |
301 | SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE), | ||
302 | SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), | ||
301 | SNMP_MIB_SENTINEL | 303 | SNMP_MIB_SENTINEL |
302 | }; | 304 | }; |
303 | 305 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index df2ca615cd0c..cf8b20ff6658 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -3233,7 +3233,7 @@ static void tcp_ack_probe(struct sock *sk) | |||
3233 | * This function is not for random using! | 3233 | * This function is not for random using! |
3234 | */ | 3234 | */ |
3235 | } else { | 3235 | } else { |
3236 | unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); | 3236 | unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); |
3237 | 3237 | ||
3238 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 3238 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
3239 | when, TCP_RTO_MAX); | 3239 | when, TCP_RTO_MAX); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a369e8a70b2c..7386d32cd670 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -3382,7 +3382,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack); | |||
3382 | * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is | 3382 | * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is |
3383 | * out-of-date with SND.UNA-1 to probe window. | 3383 | * out-of-date with SND.UNA-1 to probe window. |
3384 | */ | 3384 | */ |
3385 | static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | 3385 | static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) |
3386 | { | 3386 | { |
3387 | struct tcp_sock *tp = tcp_sk(sk); | 3387 | struct tcp_sock *tp = tcp_sk(sk); |
3388 | struct sk_buff *skb; | 3388 | struct sk_buff *skb; |
@@ -3400,6 +3400,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
3400 | */ | 3400 | */ |
3401 | tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); | 3401 | tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); |
3402 | skb_mstamp_get(&skb->skb_mstamp); | 3402 | skb_mstamp_get(&skb->skb_mstamp); |
3403 | NET_INC_STATS_BH(sock_net(sk), mib); | ||
3403 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); | 3404 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
3404 | } | 3405 | } |
3405 | 3406 | ||
@@ -3407,12 +3408,12 @@ void tcp_send_window_probe(struct sock *sk) | |||
3407 | { | 3408 | { |
3408 | if (sk->sk_state == TCP_ESTABLISHED) { | 3409 | if (sk->sk_state == TCP_ESTABLISHED) { |
3409 | tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; | 3410 | tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; |
3410 | tcp_xmit_probe_skb(sk, 0); | 3411 | tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE); |
3411 | } | 3412 | } |
3412 | } | 3413 | } |
3413 | 3414 | ||
3414 | /* Initiate keepalive or window probe from timer. */ | 3415 | /* Initiate keepalive or window probe from timer. */ |
3415 | int tcp_write_wakeup(struct sock *sk) | 3416 | int tcp_write_wakeup(struct sock *sk, int mib) |
3416 | { | 3417 | { |
3417 | struct tcp_sock *tp = tcp_sk(sk); | 3418 | struct tcp_sock *tp = tcp_sk(sk); |
3418 | struct sk_buff *skb; | 3419 | struct sk_buff *skb; |
@@ -3449,8 +3450,8 @@ int tcp_write_wakeup(struct sock *sk) | |||
3449 | return err; | 3450 | return err; |
3450 | } else { | 3451 | } else { |
3451 | if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) | 3452 | if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) |
3452 | tcp_xmit_probe_skb(sk, 1); | 3453 | tcp_xmit_probe_skb(sk, 1, mib); |
3453 | return tcp_xmit_probe_skb(sk, 0); | 3454 | return tcp_xmit_probe_skb(sk, 0, mib); |
3454 | } | 3455 | } |
3455 | } | 3456 | } |
3456 | 3457 | ||
@@ -3464,7 +3465,7 @@ void tcp_send_probe0(struct sock *sk) | |||
3464 | unsigned long probe_max; | 3465 | unsigned long probe_max; |
3465 | int err; | 3466 | int err; |
3466 | 3467 | ||
3467 | err = tcp_write_wakeup(sk); | 3468 | err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); |
3468 | 3469 | ||
3469 | if (tp->packets_out || !tcp_send_head(sk)) { | 3470 | if (tp->packets_out || !tcp_send_head(sk)) { |
3470 | /* Cancel probe timer, if it is not required. */ | 3471 | /* Cancel probe timer, if it is not required. */ |
@@ -3490,7 +3491,7 @@ void tcp_send_probe0(struct sock *sk) | |||
3490 | probe_max = TCP_RESOURCE_PROBE_INTERVAL; | 3491 | probe_max = TCP_RESOURCE_PROBE_INTERVAL; |
3491 | } | 3492 | } |
3492 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 3493 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
3493 | inet_csk_rto_backoff(icsk, probe_max), | 3494 | tcp_probe0_when(sk, probe_max), |
3494 | TCP_RTO_MAX); | 3495 | TCP_RTO_MAX); |
3495 | } | 3496 | } |
3496 | 3497 | ||
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8c65dc147d8b..65bf670e8714 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -616,7 +616,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
616 | tcp_write_err(sk); | 616 | tcp_write_err(sk); |
617 | goto out; | 617 | goto out; |
618 | } | 618 | } |
619 | if (tcp_write_wakeup(sk) <= 0) { | 619 | if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { |
620 | icsk->icsk_probes_out++; | 620 | icsk->icsk_probes_out++; |
621 | elapsed = keepalive_intvl_when(tp); | 621 | elapsed = keepalive_intvl_when(tp); |
622 | } else { | 622 | } else { |