diff options
author | Stephen Hemminger <shemminger@osdl.org> | 2005-11-10 20:09:53 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-11-10 20:09:53 -0500 |
commit | 9772efb970780aeed488c19d8b4afd46c3b484af (patch) | |
tree | de016aaa29c8a95e98c7abaa70c8b590160e2886 | |
parent | 7faffa1c7fb9b8e8917e3225d4e2638270c0a48b (diff) |
[TCP]: Appropriate Byte Count support
This is an updated version of the RFC3465 ABC patch originally
for Linux 2.6.11-rc4 by Yee-Ting Li. ABC is a way of counting
bytes ack'd rather than packets when updating congestion control.
The orignal ABC described in the RFC applied to a Reno style
algorithm. For advanced congestion control there is little
change after leaving slow start.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 5 | ||||
-rw-r--r-- | include/linux/sysctl.h | 1 | ||||
-rw-r--r-- | include/linux/tcp.h | 1 | ||||
-rw-r--r-- | include/net/tcp.h | 19 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 1 |
9 files changed, 63 insertions, 11 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 65895bb51414..ebc09a159f62 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER | |||
78 | 78 | ||
79 | TCP variables: | 79 | TCP variables: |
80 | 80 | ||
81 | tcp_abc - INTEGER | ||
82 | Controls Appropriate Byte Count defined in RFC3465. If set to | ||
83 | 0 then does congestion avoid once per ack. 1 is conservative | ||
84 | value, and 2 is more agressive. | ||
85 | |||
81 | tcp_syn_retries - INTEGER | 86 | tcp_syn_retries - INTEGER |
82 | Number of times initial SYNs for an active TCP connection attempt | 87 | Number of times initial SYNs for an active TCP connection attempt |
83 | will be retransmitted. Should not be higher than 255. Default value | 88 | will be retransmitted. Should not be higher than 255. Default value |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 22cf5e1ac987..ab2791b3189d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -390,6 +390,7 @@ enum | |||
390 | NET_TCP_BIC_BETA=108, | 390 | NET_TCP_BIC_BETA=108, |
391 | NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, | 391 | NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, |
392 | NET_TCP_CONG_CONTROL=110, | 392 | NET_TCP_CONG_CONTROL=110, |
393 | NET_TCP_ABC=111, | ||
393 | }; | 394 | }; |
394 | 395 | ||
395 | enum { | 396 | enum { |
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ac4ca44c75ca..737b32e52956 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -326,6 +326,7 @@ struct tcp_sock { | |||
326 | __u32 snd_up; /* Urgent pointer */ | 326 | __u32 snd_up; /* Urgent pointer */ |
327 | 327 | ||
328 | __u32 total_retrans; /* Total retransmits for entire connection */ | 328 | __u32 total_retrans; /* Total retransmits for entire connection */ |
329 | __u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */ | ||
329 | 330 | ||
330 | unsigned int keepalive_time; /* time before keep alive takes place */ | 331 | unsigned int keepalive_time; /* time before keep alive takes place */ |
331 | unsigned int keepalive_intvl; /* time interval between keep alive probes */ | 332 | unsigned int keepalive_intvl; /* time interval between keep alive probes */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 54c399886275..44ba4a21cbdc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency; | |||
218 | extern int sysctl_tcp_nometrics_save; | 218 | extern int sysctl_tcp_nometrics_save; |
219 | extern int sysctl_tcp_moderate_rcvbuf; | 219 | extern int sysctl_tcp_moderate_rcvbuf; |
220 | extern int sysctl_tcp_tso_win_divisor; | 220 | extern int sysctl_tcp_tso_win_divisor; |
221 | extern int sysctl_tcp_abc; | ||
221 | 222 | ||
222 | extern atomic_t tcp_memory_allocated; | 223 | extern atomic_t tcp_memory_allocated; |
223 | extern atomic_t tcp_sockets_allocated; | 224 | extern atomic_t tcp_sockets_allocated; |
@@ -770,6 +771,23 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) | |||
770 | */ | 771 | */ |
771 | static inline void tcp_slow_start(struct tcp_sock *tp) | 772 | static inline void tcp_slow_start(struct tcp_sock *tp) |
772 | { | 773 | { |
774 | if (sysctl_tcp_abc) { | ||
775 | /* RFC3465: Slow Start | ||
776 | * TCP sender SHOULD increase cwnd by the number of | ||
777 | * previously unacknowledged bytes ACKed by each incoming | ||
778 | * acknowledgment, provided the increase is not more than L | ||
779 | */ | ||
780 | if (tp->bytes_acked < tp->mss_cache) | ||
781 | return; | ||
782 | |||
783 | /* We MAY increase by 2 if discovered delayed ack */ | ||
784 | if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { | ||
785 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
786 | tp->snd_cwnd++; | ||
787 | } | ||
788 | } | ||
789 | tp->bytes_acked = 0; | ||
790 | |||
773 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 791 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) |
774 | tp->snd_cwnd++; | 792 | tp->snd_cwnd++; |
775 | } | 793 | } |
@@ -804,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk) | |||
804 | struct tcp_sock *tp = tcp_sk(sk); | 822 | struct tcp_sock *tp = tcp_sk(sk); |
805 | 823 | ||
806 | tp->prior_ssthresh = 0; | 824 | tp->prior_ssthresh = 0; |
825 | tp->bytes_acked = 0; | ||
807 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 826 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
808 | __tcp_enter_cwr(sk); | 827 | __tcp_enter_cwr(sk); |
809 | tcp_set_ca_state(sk, TCP_CA_CWR); | 828 | tcp_set_ca_state(sk, TCP_CA_CWR); |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 652685623519..01444a02b48b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = { | |||
645 | .proc_handler = &proc_tcp_congestion_control, | 645 | .proc_handler = &proc_tcp_congestion_control, |
646 | .strategy = &sysctl_tcp_congestion_control, | 646 | .strategy = &sysctl_tcp_congestion_control, |
647 | }, | 647 | }, |
648 | { | ||
649 | .ctl_name = NET_TCP_ABC, | ||
650 | .procname = "tcp_abc", | ||
651 | .data = &sysctl_tcp_abc, | ||
652 | .maxlen = sizeof(int), | ||
653 | .mode = 0644, | ||
654 | .proc_handler = &proc_dointvec, | ||
655 | }, | ||
648 | 656 | ||
649 | { .ctl_name = 0 } | 657 | { .ctl_name = 0 } |
650 | }; | 658 | }; |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 72b7c22e1ea5..cfaf76133759 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
1669 | tp->packets_out = 0; | 1669 | tp->packets_out = 0; |
1670 | tp->snd_ssthresh = 0x7fffffff; | 1670 | tp->snd_ssthresh = 0x7fffffff; |
1671 | tp->snd_cwnd_cnt = 0; | 1671 | tp->snd_cwnd_cnt = 0; |
1672 | tp->bytes_acked = 0; | ||
1672 | tcp_set_ca_state(sk, TCP_CA_Open); | 1673 | tcp_set_ca_state(sk, TCP_CA_Open); |
1673 | tcp_clear_retrans(tp); | 1674 | tcp_clear_retrans(tp); |
1674 | inet_csk_delack_init(sk); | 1675 | inet_csk_delack_init(sk); |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 6d3e883b48f6..c7cc62c8dc12 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -192,17 +192,26 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, | |||
192 | /* In "safe" area, increase. */ | 192 | /* In "safe" area, increase. */ |
193 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 193 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
194 | tcp_slow_start(tp); | 194 | tcp_slow_start(tp); |
195 | else { | 195 | |
196 | /* In dangerous area, increase slowly. | 196 | /* In dangerous area, increase slowly. */ |
197 | * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd | 197 | else if (sysctl_tcp_abc) { |
198 | */ | 198 | /* RFC3465: Apppriate Byte Count |
199 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | 199 | * increase once for each full cwnd acked |
200 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 200 | */ |
201 | tp->snd_cwnd++; | 201 | if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { |
202 | tp->snd_cwnd_cnt = 0; | 202 | tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; |
203 | } else | 203 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) |
204 | tp->snd_cwnd_cnt++; | 204 | tp->snd_cwnd++; |
205 | } | 205 | } |
206 | } else { | ||
207 | /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ | ||
208 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | ||
209 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
210 | tp->snd_cwnd++; | ||
211 | tp->snd_cwnd_cnt = 0; | ||
212 | } else | ||
213 | tp->snd_cwnd_cnt++; | ||
214 | } | ||
206 | } | 215 | } |
207 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); | 216 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); |
208 | 217 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e43065654930..4cb5e6f408dc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -89,6 +89,7 @@ int sysctl_tcp_frto; | |||
89 | int sysctl_tcp_nometrics_save; | 89 | int sysctl_tcp_nometrics_save; |
90 | 90 | ||
91 | int sysctl_tcp_moderate_rcvbuf = 1; | 91 | int sysctl_tcp_moderate_rcvbuf = 1; |
92 | int sysctl_tcp_abc = 1; | ||
92 | 93 | ||
93 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 94 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
94 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 95 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
@@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1247 | tp->snd_cwnd_cnt = 0; | 1248 | tp->snd_cwnd_cnt = 0; |
1248 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1249 | tp->snd_cwnd_stamp = tcp_time_stamp; |
1249 | 1250 | ||
1251 | tp->bytes_acked = 0; | ||
1250 | tcp_clear_retrans(tp); | 1252 | tcp_clear_retrans(tp); |
1251 | 1253 | ||
1252 | /* Push undo marker, if it was plain RTO and nothing | 1254 | /* Push undo marker, if it was plain RTO and nothing |
@@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1904 | TCP_ECN_queue_cwr(tp); | 1906 | TCP_ECN_queue_cwr(tp); |
1905 | } | 1907 | } |
1906 | 1908 | ||
1909 | tp->bytes_acked = 0; | ||
1907 | tp->snd_cwnd_cnt = 0; | 1910 | tp->snd_cwnd_cnt = 0; |
1908 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 1911 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
1909 | } | 1912 | } |
@@ -2310,6 +2313,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2310 | if (before(ack, prior_snd_una)) | 2313 | if (before(ack, prior_snd_una)) |
2311 | goto old_ack; | 2314 | goto old_ack; |
2312 | 2315 | ||
2316 | if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR) | ||
2317 | tp->bytes_acked += ack - prior_snd_una; | ||
2318 | |||
2313 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { | 2319 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { |
2314 | /* Window is constant, pure forward advance. | 2320 | /* Window is constant, pure forward advance. |
2315 | * No more checks are required. | 2321 | * No more checks are required. |
@@ -4370,6 +4376,7 @@ discard: | |||
4370 | 4376 | ||
4371 | EXPORT_SYMBOL(sysctl_tcp_ecn); | 4377 | EXPORT_SYMBOL(sysctl_tcp_ecn); |
4372 | EXPORT_SYMBOL(sysctl_tcp_reordering); | 4378 | EXPORT_SYMBOL(sysctl_tcp_reordering); |
4379 | EXPORT_SYMBOL(sysctl_tcp_abc); | ||
4373 | EXPORT_SYMBOL(tcp_parse_options); | 4380 | EXPORT_SYMBOL(tcp_parse_options); |
4374 | EXPORT_SYMBOL(tcp_rcv_established); | 4381 | EXPORT_SYMBOL(tcp_rcv_established); |
4375 | EXPORT_SYMBOL(tcp_rcv_state_process); | 4382 | EXPORT_SYMBOL(tcp_rcv_state_process); |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b1a63b2c6b4a..9203a21e299f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
380 | */ | 380 | */ |
381 | newtp->snd_cwnd = 2; | 381 | newtp->snd_cwnd = 2; |
382 | newtp->snd_cwnd_cnt = 0; | 382 | newtp->snd_cwnd_cnt = 0; |
383 | newtp->bytes_acked = 0; | ||
383 | 384 | ||
384 | newtp->frto_counter = 0; | 385 | newtp->frto_counter = 0; |
385 | newtp->frto_highmark = 0; | 386 | newtp->frto_highmark = 0; |