aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlpo Järvinen <ilpo.jarvinen@helsinki.fi>2007-02-27 13:09:49 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2007-04-26 01:23:23 -0400
commit3cfe3baaf07c9e40a75f9a70662de56df1c246a8 (patch)
tree989b2aff491b5df3aa0215f611a8c7f1360c67e9
parentc5e7af0df5d7234afd8596560d9f570cfc6c18bf (diff)
[TCP]: Add two new spurious RTO responses to FRTO
New sysctl tcp_frto_response is added to select amongst these responses: - Rate halving based; reuses CA_CWR state (default) - Very conservative; used to be the only one available (=1) - Undo cwr; undoes ssthresh and cwnd reductions (=2) The response with rate halving requires a new parameter to tcp_enter_cwr because FRTO has already reduced ssthresh and doing a second reduction there has to be prevented. In addition, to keep things nice on 80 cols screen, a local variable was added. Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--include/net/tcp.h3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c8
-rw-r--r--net/ipv4/tcp_input.c36
-rw-r--r--net/ipv4/tcp_output.c2
5 files changed, 44 insertions, 6 deletions
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 98e0fd241a25..c9ccb550206f 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -439,6 +439,7 @@ enum
439 NET_TCP_AVAIL_CONG_CONTROL=122, 439 NET_TCP_AVAIL_CONG_CONTROL=122,
440 NET_TCP_ALLOWED_CONG_CONTROL=123, 440 NET_TCP_ALLOWED_CONG_CONTROL=123,
441 NET_TCP_MAX_SSTHRESH=124, 441 NET_TCP_MAX_SSTHRESH=124,
442 NET_TCP_FRTO_RESPONSE=125,
442}; 443};
443 444
444enum { 445enum {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6d09f5085f6a..f0c9e3400a09 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -220,6 +220,7 @@ extern int sysctl_tcp_app_win;
220extern int sysctl_tcp_adv_win_scale; 220extern int sysctl_tcp_adv_win_scale;
221extern int sysctl_tcp_tw_reuse; 221extern int sysctl_tcp_tw_reuse;
222extern int sysctl_tcp_frto; 222extern int sysctl_tcp_frto;
223extern int sysctl_tcp_frto_response;
223extern int sysctl_tcp_low_latency; 224extern int sysctl_tcp_low_latency;
224extern int sysctl_tcp_dma_copybreak; 225extern int sysctl_tcp_dma_copybreak;
225extern int sysctl_tcp_nometrics_save; 226extern int sysctl_tcp_nometrics_save;
@@ -738,7 +739,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
738 tp->left_out = tp->sacked_out + tp->lost_out; 739 tp->left_out = tp->sacked_out + tp->lost_out;
739} 740}
740 741
741extern void tcp_enter_cwr(struct sock *sk); 742extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
742extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); 743extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
743 744
744/* Slow start with delack produces 3 packets of burst, so that 745/* Slow start with delack produces 3 packets of burst, so that
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d68effe98e8d..6817d6485df5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -647,6 +647,14 @@ ctl_table ipv4_table[] = {
647 .proc_handler = &proc_dointvec 647 .proc_handler = &proc_dointvec
648 }, 648 },
649 { 649 {
650 .ctl_name = NET_TCP_FRTO_RESPONSE,
651 .procname = "tcp_frto_response",
652 .data = &sysctl_tcp_frto_response,
653 .maxlen = sizeof(int),
654 .mode = 0644,
655 .proc_handler = &proc_dointvec
656 },
657 {
650 .ctl_name = NET_TCP_LOW_LATENCY, 658 .ctl_name = NET_TCP_LOW_LATENCY,
651 .procname = "tcp_low_latency", 659 .procname = "tcp_low_latency",
652 .data = &sysctl_tcp_low_latency, 660 .data = &sysctl_tcp_low_latency,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f6ba07f0d816..322e43c56461 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly;
86int sysctl_tcp_rfc1337 __read_mostly; 86int sysctl_tcp_rfc1337 __read_mostly;
87int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 87int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
88int sysctl_tcp_frto __read_mostly; 88int sysctl_tcp_frto __read_mostly;
89int sysctl_tcp_frto_response __read_mostly;
89int sysctl_tcp_nometrics_save __read_mostly; 90int sysctl_tcp_nometrics_save __read_mostly;
90 91
91int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; 92int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
@@ -762,15 +763,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
762} 763}
763 764
764/* Set slow start threshold and cwnd not falling to slow start */ 765/* Set slow start threshold and cwnd not falling to slow start */
765void tcp_enter_cwr(struct sock *sk) 766void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
766{ 767{
767 struct tcp_sock *tp = tcp_sk(sk); 768 struct tcp_sock *tp = tcp_sk(sk);
769 const struct inet_connection_sock *icsk = inet_csk(sk);
768 770
769 tp->prior_ssthresh = 0; 771 tp->prior_ssthresh = 0;
770 tp->bytes_acked = 0; 772 tp->bytes_acked = 0;
771 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { 773 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
772 tp->undo_marker = 0; 774 tp->undo_marker = 0;
773 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); 775 if (set_ssthresh)
776 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
774 tp->snd_cwnd = min(tp->snd_cwnd, 777 tp->snd_cwnd = min(tp->snd_cwnd,
775 tcp_packets_in_flight(tp) + 1U); 778 tcp_packets_in_flight(tp) + 1U);
776 tp->snd_cwnd_cnt = 0; 779 tp->snd_cwnd_cnt = 0;
@@ -2003,7 +2006,7 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
2003 tp->retrans_stamp = 0; 2006 tp->retrans_stamp = 0;
2004 2007
2005 if (flag&FLAG_ECE) 2008 if (flag&FLAG_ECE)
2006 tcp_enter_cwr(sk); 2009 tcp_enter_cwr(sk, 1);
2007 2010
2008 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2011 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2009 int state = TCP_CA_Open; 2012 int state = TCP_CA_Open;
@@ -2579,6 +2582,21 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
2579 tcp_moderate_cwnd(tp); 2582 tcp_moderate_cwnd(tp);
2580} 2583}
2581 2584
2585/* A conservative spurious RTO response algorithm: reduce cwnd using
2586 * rate halving and continue in congestion avoidance.
2587 */
2588static void tcp_ratehalving_spur_to_response(struct sock *sk)
2589{
2590 struct tcp_sock *tp = tcp_sk(sk);
2591 tcp_enter_cwr(sk, 0);
2592 tp->high_seq = tp->frto_highmark; /* Smoother w/o this? - ij */
2593}
2594
2595static void tcp_undo_spur_to_response(struct sock *sk)
2596{
2597 tcp_undo_cwr(sk, 1);
2598}
2599
2582/* F-RTO spurious RTO detection algorithm (RFC4138) 2600/* F-RTO spurious RTO detection algorithm (RFC4138)
2583 * 2601 *
2584 * F-RTO affects during two new ACKs following RTO (well, almost, see inline 2602 * F-RTO affects during two new ACKs following RTO (well, almost, see inline
@@ -2661,7 +2679,17 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
2661 tp->frto_counter = 2; 2679 tp->frto_counter = 2;
2662 return 1; 2680 return 1;
2663 } else /* frto_counter == 2 */ { 2681 } else /* frto_counter == 2 */ {
2664 tcp_conservative_spur_to_response(tp); 2682 switch (sysctl_tcp_frto_response) {
2683 case 2:
2684 tcp_undo_spur_to_response(sk);
2685 break;
2686 case 1:
2687 tcp_conservative_spur_to_response(tp);
2688 break;
2689 default:
2690 tcp_ratehalving_spur_to_response(sk);
2691 break;
2692 };
2665 tp->frto_counter = 0; 2693 tp->frto_counter = 0;
2666 } 2694 }
2667 return 0; 2695 return 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3c24881f2a65..d19b2f3b70fd 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -545,7 +545,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
545 if (likely(err <= 0)) 545 if (likely(err <= 0))
546 return err; 546 return err;
547 547
548 tcp_enter_cwr(sk); 548 tcp_enter_cwr(sk, 1);
549 549
550 return net_xmit_eval(err); 550 return net_xmit_eval(err);
551 551