aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@osdl.org>2005-11-10 20:09:53 -0500
committerDavid S. Miller <davem@davemloft.net>2005-11-10 20:09:53 -0500
commit9772efb970780aeed488c19d8b4afd46c3b484af (patch)
treede016aaa29c8a95e98c7abaa70c8b590160e2886
parent7faffa1c7fb9b8e8917e3225d4e2638270c0a48b (diff)
[TCP]: Appropriate Byte Count support
This is an updated version of the RFC3465 ABC patch originally for Linux 2.6.11-rc4 by Yee-Ting Li. ABC is a way of counting bytes ack'd rather than packets when updating congestion control. The orignal ABC described in the RFC applied to a Reno style algorithm. For advanced congestion control there is little change after leaving slow start. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt5
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--include/linux/tcp.h1
-rw-r--r--include/net/tcp.h19
-rw-r--r--net/ipv4/sysctl_net_ipv4.c8
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_cong.c31
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv4/tcp_minisocks.c1
9 files changed, 63 insertions, 11 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 65895bb51414..ebc09a159f62 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
78 78
79TCP variables: 79TCP variables:
80 80
81tcp_abc - INTEGER
82 Controls Appropriate Byte Count defined in RFC3465. If set to
83 0 then does congestion avoid once per ack. 1 is conservative
84 value, and 2 is more agressive.
85
81tcp_syn_retries - INTEGER 86tcp_syn_retries - INTEGER
82 Number of times initial SYNs for an active TCP connection attempt 87 Number of times initial SYNs for an active TCP connection attempt
83 will be retransmitted. Should not be higher than 255. Default value 88 will be retransmitted. Should not be higher than 255. Default value
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 22cf5e1ac987..ab2791b3189d 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -390,6 +390,7 @@ enum
390 NET_TCP_BIC_BETA=108, 390 NET_TCP_BIC_BETA=108,
391 NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, 391 NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
392 NET_TCP_CONG_CONTROL=110, 392 NET_TCP_CONG_CONTROL=110,
393 NET_TCP_ABC=111,
393}; 394};
394 395
395enum { 396enum {
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index ac4ca44c75ca..737b32e52956 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -326,6 +326,7 @@ struct tcp_sock {
326 __u32 snd_up; /* Urgent pointer */ 326 __u32 snd_up; /* Urgent pointer */
327 327
328 __u32 total_retrans; /* Total retransmits for entire connection */ 328 __u32 total_retrans; /* Total retransmits for entire connection */
329 __u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */
329 330
330 unsigned int keepalive_time; /* time before keep alive takes place */ 331 unsigned int keepalive_time; /* time before keep alive takes place */
331 unsigned int keepalive_intvl; /* time interval between keep alive probes */ 332 unsigned int keepalive_intvl; /* time interval between keep alive probes */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 54c399886275..44ba4a21cbdc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency;
218extern int sysctl_tcp_nometrics_save; 218extern int sysctl_tcp_nometrics_save;
219extern int sysctl_tcp_moderate_rcvbuf; 219extern int sysctl_tcp_moderate_rcvbuf;
220extern int sysctl_tcp_tso_win_divisor; 220extern int sysctl_tcp_tso_win_divisor;
221extern int sysctl_tcp_abc;
221 222
222extern atomic_t tcp_memory_allocated; 223extern atomic_t tcp_memory_allocated;
223extern atomic_t tcp_sockets_allocated; 224extern atomic_t tcp_sockets_allocated;
@@ -770,6 +771,23 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
770 */ 771 */
771static inline void tcp_slow_start(struct tcp_sock *tp) 772static inline void tcp_slow_start(struct tcp_sock *tp)
772{ 773{
774 if (sysctl_tcp_abc) {
775 /* RFC3465: Slow Start
776 * TCP sender SHOULD increase cwnd by the number of
777 * previously unacknowledged bytes ACKed by each incoming
778 * acknowledgment, provided the increase is not more than L
779 */
780 if (tp->bytes_acked < tp->mss_cache)
781 return;
782
783 /* We MAY increase by 2 if discovered delayed ack */
784 if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
785 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
786 tp->snd_cwnd++;
787 }
788 }
789 tp->bytes_acked = 0;
790
773 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 791 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
774 tp->snd_cwnd++; 792 tp->snd_cwnd++;
775} 793}
@@ -804,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk)
804 struct tcp_sock *tp = tcp_sk(sk); 822 struct tcp_sock *tp = tcp_sk(sk);
805 823
806 tp->prior_ssthresh = 0; 824 tp->prior_ssthresh = 0;
825 tp->bytes_acked = 0;
807 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { 826 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
808 __tcp_enter_cwr(sk); 827 __tcp_enter_cwr(sk);
809 tcp_set_ca_state(sk, TCP_CA_CWR); 828 tcp_set_ca_state(sk, TCP_CA_CWR);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 652685623519..01444a02b48b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
645 .proc_handler = &proc_tcp_congestion_control, 645 .proc_handler = &proc_tcp_congestion_control,
646 .strategy = &sysctl_tcp_congestion_control, 646 .strategy = &sysctl_tcp_congestion_control,
647 }, 647 },
648 {
649 .ctl_name = NET_TCP_ABC,
650 .procname = "tcp_abc",
651 .data = &sysctl_tcp_abc,
652 .maxlen = sizeof(int),
653 .mode = 0644,
654 .proc_handler = &proc_dointvec,
655 },
648 656
649 { .ctl_name = 0 } 657 { .ctl_name = 0 }
650}; 658};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 72b7c22e1ea5..cfaf76133759 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
1669 tp->packets_out = 0; 1669 tp->packets_out = 0;
1670 tp->snd_ssthresh = 0x7fffffff; 1670 tp->snd_ssthresh = 0x7fffffff;
1671 tp->snd_cwnd_cnt = 0; 1671 tp->snd_cwnd_cnt = 0;
1672 tp->bytes_acked = 0;
1672 tcp_set_ca_state(sk, TCP_CA_Open); 1673 tcp_set_ca_state(sk, TCP_CA_Open);
1673 tcp_clear_retrans(tp); 1674 tcp_clear_retrans(tp);
1674 inet_csk_delack_init(sk); 1675 inet_csk_delack_init(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 6d3e883b48f6..c7cc62c8dc12 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -192,17 +192,26 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
192 /* In "safe" area, increase. */ 192 /* In "safe" area, increase. */
193 if (tp->snd_cwnd <= tp->snd_ssthresh) 193 if (tp->snd_cwnd <= tp->snd_ssthresh)
194 tcp_slow_start(tp); 194 tcp_slow_start(tp);
195 else { 195
196 /* In dangerous area, increase slowly. 196 /* In dangerous area, increase slowly. */
197 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd 197 else if (sysctl_tcp_abc) {
198 */ 198 /* RFC3465: Apppriate Byte Count
199 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { 199 * increase once for each full cwnd acked
200 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 200 */
201 tp->snd_cwnd++; 201 if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
202 tp->snd_cwnd_cnt = 0; 202 tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
203 } else 203 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
204 tp->snd_cwnd_cnt++; 204 tp->snd_cwnd++;
205 } 205 }
206 } else {
207 /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
208 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
209 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
210 tp->snd_cwnd++;
211 tp->snd_cwnd_cnt = 0;
212 } else
213 tp->snd_cwnd_cnt++;
214 }
206} 215}
207EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 216EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
208 217
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e43065654930..4cb5e6f408dc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
89int sysctl_tcp_nometrics_save; 89int sysctl_tcp_nometrics_save;
90 90
91int sysctl_tcp_moderate_rcvbuf = 1; 91int sysctl_tcp_moderate_rcvbuf = 1;
92int sysctl_tcp_abc = 1;
92 93
93#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 94#define FLAG_DATA 0x01 /* Incoming frame contained data. */
94#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 95#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int how)
1247 tp->snd_cwnd_cnt = 0; 1248 tp->snd_cwnd_cnt = 0;
1248 tp->snd_cwnd_stamp = tcp_time_stamp; 1249 tp->snd_cwnd_stamp = tcp_time_stamp;
1249 1250
1251 tp->bytes_acked = 0;
1250 tcp_clear_retrans(tp); 1252 tcp_clear_retrans(tp);
1251 1253
1252 /* Push undo marker, if it was plain RTO and nothing 1254 /* Push undo marker, if it was plain RTO and nothing
@@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
1904 TCP_ECN_queue_cwr(tp); 1906 TCP_ECN_queue_cwr(tp);
1905 } 1907 }
1906 1908
1909 tp->bytes_acked = 0;
1907 tp->snd_cwnd_cnt = 0; 1910 tp->snd_cwnd_cnt = 0;
1908 tcp_set_ca_state(sk, TCP_CA_Recovery); 1911 tcp_set_ca_state(sk, TCP_CA_Recovery);
1909 } 1912 }
@@ -2310,6 +2313,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
2310 if (before(ack, prior_snd_una)) 2313 if (before(ack, prior_snd_una))
2311 goto old_ack; 2314 goto old_ack;
2312 2315
2316 if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
2317 tp->bytes_acked += ack - prior_snd_una;
2318
2313 if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { 2319 if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
2314 /* Window is constant, pure forward advance. 2320 /* Window is constant, pure forward advance.
2315 * No more checks are required. 2321 * No more checks are required.
@@ -4370,6 +4376,7 @@ discard:
4370 4376
4371EXPORT_SYMBOL(sysctl_tcp_ecn); 4377EXPORT_SYMBOL(sysctl_tcp_ecn);
4372EXPORT_SYMBOL(sysctl_tcp_reordering); 4378EXPORT_SYMBOL(sysctl_tcp_reordering);
4379EXPORT_SYMBOL(sysctl_tcp_abc);
4373EXPORT_SYMBOL(tcp_parse_options); 4380EXPORT_SYMBOL(tcp_parse_options);
4374EXPORT_SYMBOL(tcp_rcv_established); 4381EXPORT_SYMBOL(tcp_rcv_established);
4375EXPORT_SYMBOL(tcp_rcv_state_process); 4382EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b1a63b2c6b4a..9203a21e299f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
380 */ 380 */
381 newtp->snd_cwnd = 2; 381 newtp->snd_cwnd = 2;
382 newtp->snd_cwnd_cnt = 0; 382 newtp->snd_cwnd_cnt = 0;
383 newtp->bytes_acked = 0;
383 384
384 newtp->frto_counter = 0; 385 newtp->frto_counter = 0;
385 newtp->frto_highmark = 0; 386 newtp->frto_highmark = 0;