aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-05-09 16:42:32 -0400
committerDavid S. Miller <davem@davemloft.net>2015-05-09 16:42:32 -0400
commit82ae9c6060c6dbaf103273a5c51b8f58b951d9a2 (patch)
tree45729a77a2017ec24e8185e7279b97a2019f87fe /net
parentb063bc5ea77b1c1c0e7798f641f53504d0f64bf8 (diff)
parente520af48c7e5acae5f17f82a79ba7ab7cf156f3b (diff)
Merge branch 'tcp-more-reliable-window-probes'
Eric Dumazet says: ==================== tcp: more reliable window probes This series address a problem caused by small rto_min timers in DC, leading to either timer storms or early flow terminations. We also add two new SNMP counters for proper monitoring : TCPWinProbe and TCPKeepAlive v2: added TCPKeepAlive counter, as suggested by Yuchung & Neal ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_output.c15
-rw-r--r--net/ipv4/tcp_timer.c2
4 files changed, 12 insertions, 9 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e1f3b911dd1e..da5d483e236a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -298,6 +298,8 @@ static const struct snmp_mib snmp4_net_list[] = {
298 SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2), 298 SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2),
299 SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT), 299 SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT),
300 SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE), 300 SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE),
301 SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE),
302 SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
301 SNMP_MIB_SENTINEL 303 SNMP_MIB_SENTINEL
302}; 304};
303 305
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index df2ca615cd0c..cf8b20ff6658 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3233,7 +3233,7 @@ static void tcp_ack_probe(struct sock *sk)
3233 * This function is not for random using! 3233 * This function is not for random using!
3234 */ 3234 */
3235 } else { 3235 } else {
3236 unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); 3236 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3237 3237
3238 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 3238 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3239 when, TCP_RTO_MAX); 3239 when, TCP_RTO_MAX);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a369e8a70b2c..7386d32cd670 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3382,7 +3382,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack);
3382 * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is 3382 * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
3383 * out-of-date with SND.UNA-1 to probe window. 3383 * out-of-date with SND.UNA-1 to probe window.
3384 */ 3384 */
3385static int tcp_xmit_probe_skb(struct sock *sk, int urgent) 3385static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
3386{ 3386{
3387 struct tcp_sock *tp = tcp_sk(sk); 3387 struct tcp_sock *tp = tcp_sk(sk);
3388 struct sk_buff *skb; 3388 struct sk_buff *skb;
@@ -3400,6 +3400,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3400 */ 3400 */
3401 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); 3401 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3402 skb_mstamp_get(&skb->skb_mstamp); 3402 skb_mstamp_get(&skb->skb_mstamp);
3403 NET_INC_STATS_BH(sock_net(sk), mib);
3403 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 3404 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3404} 3405}
3405 3406
@@ -3407,12 +3408,12 @@ void tcp_send_window_probe(struct sock *sk)
3407{ 3408{
3408 if (sk->sk_state == TCP_ESTABLISHED) { 3409 if (sk->sk_state == TCP_ESTABLISHED) {
3409 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; 3410 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3410 tcp_xmit_probe_skb(sk, 0); 3411 tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
3411 } 3412 }
3412} 3413}
3413 3414
3414/* Initiate keepalive or window probe from timer. */ 3415/* Initiate keepalive or window probe from timer. */
3415int tcp_write_wakeup(struct sock *sk) 3416int tcp_write_wakeup(struct sock *sk, int mib)
3416{ 3417{
3417 struct tcp_sock *tp = tcp_sk(sk); 3418 struct tcp_sock *tp = tcp_sk(sk);
3418 struct sk_buff *skb; 3419 struct sk_buff *skb;
@@ -3449,8 +3450,8 @@ int tcp_write_wakeup(struct sock *sk)
3449 return err; 3450 return err;
3450 } else { 3451 } else {
3451 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) 3452 if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
3452 tcp_xmit_probe_skb(sk, 1); 3453 tcp_xmit_probe_skb(sk, 1, mib);
3453 return tcp_xmit_probe_skb(sk, 0); 3454 return tcp_xmit_probe_skb(sk, 0, mib);
3454 } 3455 }
3455} 3456}
3456 3457
@@ -3464,7 +3465,7 @@ void tcp_send_probe0(struct sock *sk)
3464 unsigned long probe_max; 3465 unsigned long probe_max;
3465 int err; 3466 int err;
3466 3467
3467 err = tcp_write_wakeup(sk); 3468 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3468 3469
3469 if (tp->packets_out || !tcp_send_head(sk)) { 3470 if (tp->packets_out || !tcp_send_head(sk)) {
3470 /* Cancel probe timer, if it is not required. */ 3471 /* Cancel probe timer, if it is not required. */
@@ -3490,7 +3491,7 @@ void tcp_send_probe0(struct sock *sk)
3490 probe_max = TCP_RESOURCE_PROBE_INTERVAL; 3491 probe_max = TCP_RESOURCE_PROBE_INTERVAL;
3491 } 3492 }
3492 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 3493 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3493 inet_csk_rto_backoff(icsk, probe_max), 3494 tcp_probe0_when(sk, probe_max),
3494 TCP_RTO_MAX); 3495 TCP_RTO_MAX);
3495} 3496}
3496 3497
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8c65dc147d8b..65bf670e8714 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -616,7 +616,7 @@ static void tcp_keepalive_timer (unsigned long data)
616 tcp_write_err(sk); 616 tcp_write_err(sk);
617 goto out; 617 goto out;
618 } 618 }
619 if (tcp_write_wakeup(sk) <= 0) { 619 if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
620 icsk->icsk_probes_out++; 620 icsk->icsk_probes_out++;
621 elapsed = keepalive_intvl_when(tp); 621 elapsed = keepalive_intvl_when(tp);
622 } else { 622 } else {