diff options
author | Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> | 2007-02-27 13:09:49 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-04-26 01:23:23 -0400 |
commit | 3cfe3baaf07c9e40a75f9a70662de56df1c246a8 (patch) | |
tree | 989b2aff491b5df3aa0215f611a8c7f1360c67e9 | |
parent | c5e7af0df5d7234afd8596560d9f570cfc6c18bf (diff) |
[TCP]: Add two new spurious RTO responses to FRTO
New sysctl tcp_frto_response is added to select amongst these
responses:
- Rate halving based; reuses CA_CWR state (default)
- Very conservative; used to be the only one available (=1)
- Undo cwr; undoes ssthresh and cwnd reductions (=2)
The response with rate halving requires a new parameter to
tcp_enter_cwr because FRTO has already reduced ssthresh and
doing a second reduction there has to be prevented. In addition,
to keep things nice on 80 cols screen, a local variable was
added.
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/sysctl.h | 1 | ||||
-rw-r--r-- | include/net/tcp.h | 3 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 36 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 |
5 files changed, 44 insertions, 6 deletions
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 98e0fd241a25..c9ccb550206f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -439,6 +439,7 @@ enum | |||
439 | NET_TCP_AVAIL_CONG_CONTROL=122, | 439 | NET_TCP_AVAIL_CONG_CONTROL=122, |
440 | NET_TCP_ALLOWED_CONG_CONTROL=123, | 440 | NET_TCP_ALLOWED_CONG_CONTROL=123, |
441 | NET_TCP_MAX_SSTHRESH=124, | 441 | NET_TCP_MAX_SSTHRESH=124, |
442 | NET_TCP_FRTO_RESPONSE=125, | ||
442 | }; | 443 | }; |
443 | 444 | ||
444 | enum { | 445 | enum { |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 6d09f5085f6a..f0c9e3400a09 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -220,6 +220,7 @@ extern int sysctl_tcp_app_win; | |||
220 | extern int sysctl_tcp_adv_win_scale; | 220 | extern int sysctl_tcp_adv_win_scale; |
221 | extern int sysctl_tcp_tw_reuse; | 221 | extern int sysctl_tcp_tw_reuse; |
222 | extern int sysctl_tcp_frto; | 222 | extern int sysctl_tcp_frto; |
223 | extern int sysctl_tcp_frto_response; | ||
223 | extern int sysctl_tcp_low_latency; | 224 | extern int sysctl_tcp_low_latency; |
224 | extern int sysctl_tcp_dma_copybreak; | 225 | extern int sysctl_tcp_dma_copybreak; |
225 | extern int sysctl_tcp_nometrics_save; | 226 | extern int sysctl_tcp_nometrics_save; |
@@ -738,7 +739,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) | |||
738 | tp->left_out = tp->sacked_out + tp->lost_out; | 739 | tp->left_out = tp->sacked_out + tp->lost_out; |
739 | } | 740 | } |
740 | 741 | ||
741 | extern void tcp_enter_cwr(struct sock *sk); | 742 | extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); |
742 | extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); | 743 | extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); |
743 | 744 | ||
744 | /* Slow start with delack produces 3 packets of burst, so that | 745 | /* Slow start with delack produces 3 packets of burst, so that |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d68effe98e8d..6817d6485df5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -647,6 +647,14 @@ ctl_table ipv4_table[] = { | |||
647 | .proc_handler = &proc_dointvec | 647 | .proc_handler = &proc_dointvec |
648 | }, | 648 | }, |
649 | { | 649 | { |
650 | .ctl_name = NET_TCP_FRTO_RESPONSE, | ||
651 | .procname = "tcp_frto_response", | ||
652 | .data = &sysctl_tcp_frto_response, | ||
653 | .maxlen = sizeof(int), | ||
654 | .mode = 0644, | ||
655 | .proc_handler = &proc_dointvec | ||
656 | }, | ||
657 | { | ||
650 | .ctl_name = NET_TCP_LOW_LATENCY, | 658 | .ctl_name = NET_TCP_LOW_LATENCY, |
651 | .procname = "tcp_low_latency", | 659 | .procname = "tcp_low_latency", |
652 | .data = &sysctl_tcp_low_latency, | 660 | .data = &sysctl_tcp_low_latency, |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f6ba07f0d816..322e43c56461 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly; | |||
86 | int sysctl_tcp_rfc1337 __read_mostly; | 86 | int sysctl_tcp_rfc1337 __read_mostly; |
87 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; | 87 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; |
88 | int sysctl_tcp_frto __read_mostly; | 88 | int sysctl_tcp_frto __read_mostly; |
89 | int sysctl_tcp_frto_response __read_mostly; | ||
89 | int sysctl_tcp_nometrics_save __read_mostly; | 90 | int sysctl_tcp_nometrics_save __read_mostly; |
90 | 91 | ||
91 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 92 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
@@ -762,15 +763,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) | |||
762 | } | 763 | } |
763 | 764 | ||
764 | /* Set slow start threshold and cwnd not falling to slow start */ | 765 | /* Set slow start threshold and cwnd not falling to slow start */ |
765 | void tcp_enter_cwr(struct sock *sk) | 766 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) |
766 | { | 767 | { |
767 | struct tcp_sock *tp = tcp_sk(sk); | 768 | struct tcp_sock *tp = tcp_sk(sk); |
769 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
768 | 770 | ||
769 | tp->prior_ssthresh = 0; | 771 | tp->prior_ssthresh = 0; |
770 | tp->bytes_acked = 0; | 772 | tp->bytes_acked = 0; |
771 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 773 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
772 | tp->undo_marker = 0; | 774 | tp->undo_marker = 0; |
773 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | 775 | if (set_ssthresh) |
776 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
774 | tp->snd_cwnd = min(tp->snd_cwnd, | 777 | tp->snd_cwnd = min(tp->snd_cwnd, |
775 | tcp_packets_in_flight(tp) + 1U); | 778 | tcp_packets_in_flight(tp) + 1U); |
776 | tp->snd_cwnd_cnt = 0; | 779 | tp->snd_cwnd_cnt = 0; |
@@ -2003,7 +2006,7 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) | |||
2003 | tp->retrans_stamp = 0; | 2006 | tp->retrans_stamp = 0; |
2004 | 2007 | ||
2005 | if (flag&FLAG_ECE) | 2008 | if (flag&FLAG_ECE) |
2006 | tcp_enter_cwr(sk); | 2009 | tcp_enter_cwr(sk, 1); |
2007 | 2010 | ||
2008 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { | 2011 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { |
2009 | int state = TCP_CA_Open; | 2012 | int state = TCP_CA_Open; |
@@ -2579,6 +2582,21 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) | |||
2579 | tcp_moderate_cwnd(tp); | 2582 | tcp_moderate_cwnd(tp); |
2580 | } | 2583 | } |
2581 | 2584 | ||
2585 | /* A conservative spurious RTO response algorithm: reduce cwnd using | ||
2586 | * rate halving and continue in congestion avoidance. | ||
2587 | */ | ||
2588 | static void tcp_ratehalving_spur_to_response(struct sock *sk) | ||
2589 | { | ||
2590 | struct tcp_sock *tp = tcp_sk(sk); | ||
2591 | tcp_enter_cwr(sk, 0); | ||
2592 | tp->high_seq = tp->frto_highmark; /* Smoother w/o this? - ij */ | ||
2593 | } | ||
2594 | |||
2595 | static void tcp_undo_spur_to_response(struct sock *sk) | ||
2596 | { | ||
2597 | tcp_undo_cwr(sk, 1); | ||
2598 | } | ||
2599 | |||
2582 | /* F-RTO spurious RTO detection algorithm (RFC4138) | 2600 | /* F-RTO spurious RTO detection algorithm (RFC4138) |
2583 | * | 2601 | * |
2584 | * F-RTO affects during two new ACKs following RTO (well, almost, see inline | 2602 | * F-RTO affects during two new ACKs following RTO (well, almost, see inline |
@@ -2661,7 +2679,17 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) | |||
2661 | tp->frto_counter = 2; | 2679 | tp->frto_counter = 2; |
2662 | return 1; | 2680 | return 1; |
2663 | } else /* frto_counter == 2 */ { | 2681 | } else /* frto_counter == 2 */ { |
2664 | tcp_conservative_spur_to_response(tp); | 2682 | switch (sysctl_tcp_frto_response) { |
2683 | case 2: | ||
2684 | tcp_undo_spur_to_response(sk); | ||
2685 | break; | ||
2686 | case 1: | ||
2687 | tcp_conservative_spur_to_response(tp); | ||
2688 | break; | ||
2689 | default: | ||
2690 | tcp_ratehalving_spur_to_response(sk); | ||
2691 | break; | ||
2692 | }; | ||
2665 | tp->frto_counter = 0; | 2693 | tp->frto_counter = 0; |
2666 | } | 2694 | } |
2667 | return 0; | 2695 | return 0; |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3c24881f2a65..d19b2f3b70fd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -545,7 +545,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
545 | if (likely(err <= 0)) | 545 | if (likely(err <= 0)) |
546 | return err; | 546 | return err; |
547 | 547 | ||
548 | tcp_enter_cwr(sk); | 548 | tcp_enter_cwr(sk, 1); |
549 | 549 | ||
550 | return net_xmit_eval(err); | 550 | return net_xmit_eval(err); |
551 | 551 | ||