diff options
| -rw-r--r-- | Documentation/networking/ip-sysctl.txt | 5 | ||||
| -rw-r--r-- | include/linux/sysctl.h | 1 | ||||
| -rw-r--r-- | include/linux/tcp.h | 1 | ||||
| -rw-r--r-- | include/net/tcp.h | 19 | ||||
| -rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 8 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 1 | ||||
| -rw-r--r-- | net/ipv4/tcp_cong.c | 31 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 7 | ||||
| -rw-r--r-- | net/ipv4/tcp_minisocks.c | 1 |
9 files changed, 63 insertions, 11 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 65895bb51414..ebc09a159f62 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
| @@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER | |||
| 78 | 78 | ||
| 79 | TCP variables: | 79 | TCP variables: |
| 80 | 80 | ||
| 81 | tcp_abc - INTEGER | ||
| 82 | Controls Appropriate Byte Count defined in RFC3465. If set to | ||
| 83 | 0 then does congestion avoid once per ack. 1 is conservative | ||
| 84 | value, and 2 is more agressive. | ||
| 85 | |||
| 81 | tcp_syn_retries - INTEGER | 86 | tcp_syn_retries - INTEGER |
| 82 | Number of times initial SYNs for an active TCP connection attempt | 87 | Number of times initial SYNs for an active TCP connection attempt |
| 83 | will be retransmitted. Should not be higher than 255. Default value | 88 | will be retransmitted. Should not be higher than 255. Default value |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 22cf5e1ac987..ab2791b3189d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
| @@ -390,6 +390,7 @@ enum | |||
| 390 | NET_TCP_BIC_BETA=108, | 390 | NET_TCP_BIC_BETA=108, |
| 391 | NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, | 391 | NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, |
| 392 | NET_TCP_CONG_CONTROL=110, | 392 | NET_TCP_CONG_CONTROL=110, |
| 393 | NET_TCP_ABC=111, | ||
| 393 | }; | 394 | }; |
| 394 | 395 | ||
| 395 | enum { | 396 | enum { |
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ac4ca44c75ca..737b32e52956 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
| @@ -326,6 +326,7 @@ struct tcp_sock { | |||
| 326 | __u32 snd_up; /* Urgent pointer */ | 326 | __u32 snd_up; /* Urgent pointer */ |
| 327 | 327 | ||
| 328 | __u32 total_retrans; /* Total retransmits for entire connection */ | 328 | __u32 total_retrans; /* Total retransmits for entire connection */ |
| 329 | __u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */ | ||
| 329 | 330 | ||
| 330 | unsigned int keepalive_time; /* time before keep alive takes place */ | 331 | unsigned int keepalive_time; /* time before keep alive takes place */ |
| 331 | unsigned int keepalive_intvl; /* time interval between keep alive probes */ | 332 | unsigned int keepalive_intvl; /* time interval between keep alive probes */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 54c399886275..44ba4a21cbdc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
| @@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency; | |||
| 218 | extern int sysctl_tcp_nometrics_save; | 218 | extern int sysctl_tcp_nometrics_save; |
| 219 | extern int sysctl_tcp_moderate_rcvbuf; | 219 | extern int sysctl_tcp_moderate_rcvbuf; |
| 220 | extern int sysctl_tcp_tso_win_divisor; | 220 | extern int sysctl_tcp_tso_win_divisor; |
| 221 | extern int sysctl_tcp_abc; | ||
| 221 | 222 | ||
| 222 | extern atomic_t tcp_memory_allocated; | 223 | extern atomic_t tcp_memory_allocated; |
| 223 | extern atomic_t tcp_sockets_allocated; | 224 | extern atomic_t tcp_sockets_allocated; |
| @@ -770,6 +771,23 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) | |||
| 770 | */ | 771 | */ |
| 771 | static inline void tcp_slow_start(struct tcp_sock *tp) | 772 | static inline void tcp_slow_start(struct tcp_sock *tp) |
| 772 | { | 773 | { |
| 774 | if (sysctl_tcp_abc) { | ||
| 775 | /* RFC3465: Slow Start | ||
| 776 | * TCP sender SHOULD increase cwnd by the number of | ||
| 777 | * previously unacknowledged bytes ACKed by each incoming | ||
| 778 | * acknowledgment, provided the increase is not more than L | ||
| 779 | */ | ||
| 780 | if (tp->bytes_acked < tp->mss_cache) | ||
| 781 | return; | ||
| 782 | |||
| 783 | /* We MAY increase by 2 if discovered delayed ack */ | ||
| 784 | if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { | ||
| 785 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
| 786 | tp->snd_cwnd++; | ||
| 787 | } | ||
| 788 | } | ||
| 789 | tp->bytes_acked = 0; | ||
| 790 | |||
| 773 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 791 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) |
| 774 | tp->snd_cwnd++; | 792 | tp->snd_cwnd++; |
| 775 | } | 793 | } |
| @@ -804,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk) | |||
| 804 | struct tcp_sock *tp = tcp_sk(sk); | 822 | struct tcp_sock *tp = tcp_sk(sk); |
| 805 | 823 | ||
| 806 | tp->prior_ssthresh = 0; | 824 | tp->prior_ssthresh = 0; |
| 825 | tp->bytes_acked = 0; | ||
| 807 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 826 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
| 808 | __tcp_enter_cwr(sk); | 827 | __tcp_enter_cwr(sk); |
| 809 | tcp_set_ca_state(sk, TCP_CA_CWR); | 828 | tcp_set_ca_state(sk, TCP_CA_CWR); |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 652685623519..01444a02b48b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
| @@ -645,6 +645,14 @@ ctl_table ipv4_table[] = { | |||
| 645 | .proc_handler = &proc_tcp_congestion_control, | 645 | .proc_handler = &proc_tcp_congestion_control, |
| 646 | .strategy = &sysctl_tcp_congestion_control, | 646 | .strategy = &sysctl_tcp_congestion_control, |
| 647 | }, | 647 | }, |
| 648 | { | ||
| 649 | .ctl_name = NET_TCP_ABC, | ||
| 650 | .procname = "tcp_abc", | ||
| 651 | .data = &sysctl_tcp_abc, | ||
| 652 | .maxlen = sizeof(int), | ||
| 653 | .mode = 0644, | ||
| 654 | .proc_handler = &proc_dointvec, | ||
| 655 | }, | ||
| 648 | 656 | ||
| 649 | { .ctl_name = 0 } | 657 | { .ctl_name = 0 } |
| 650 | }; | 658 | }; |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 72b7c22e1ea5..cfaf76133759 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
| 1669 | tp->packets_out = 0; | 1669 | tp->packets_out = 0; |
| 1670 | tp->snd_ssthresh = 0x7fffffff; | 1670 | tp->snd_ssthresh = 0x7fffffff; |
| 1671 | tp->snd_cwnd_cnt = 0; | 1671 | tp->snd_cwnd_cnt = 0; |
| 1672 | tp->bytes_acked = 0; | ||
| 1672 | tcp_set_ca_state(sk, TCP_CA_Open); | 1673 | tcp_set_ca_state(sk, TCP_CA_Open); |
| 1673 | tcp_clear_retrans(tp); | 1674 | tcp_clear_retrans(tp); |
| 1674 | inet_csk_delack_init(sk); | 1675 | inet_csk_delack_init(sk); |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 6d3e883b48f6..c7cc62c8dc12 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -192,17 +192,26 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, | |||
| 192 | /* In "safe" area, increase. */ | 192 | /* In "safe" area, increase. */ |
| 193 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 193 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
| 194 | tcp_slow_start(tp); | 194 | tcp_slow_start(tp); |
| 195 | else { | 195 | |
| 196 | /* In dangerous area, increase slowly. | 196 | /* In dangerous area, increase slowly. */ |
| 197 | * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd | 197 | else if (sysctl_tcp_abc) { |
| 198 | */ | 198 | /* RFC3465: Apppriate Byte Count |
| 199 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | 199 | * increase once for each full cwnd acked |
| 200 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 200 | */ |
| 201 | tp->snd_cwnd++; | 201 | if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { |
| 202 | tp->snd_cwnd_cnt = 0; | 202 | tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; |
| 203 | } else | 203 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) |
| 204 | tp->snd_cwnd_cnt++; | 204 | tp->snd_cwnd++; |
| 205 | } | 205 | } |
| 206 | } else { | ||
| 207 | /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ | ||
| 208 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | ||
| 209 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
| 210 | tp->snd_cwnd++; | ||
| 211 | tp->snd_cwnd_cnt = 0; | ||
| 212 | } else | ||
| 213 | tp->snd_cwnd_cnt++; | ||
| 214 | } | ||
| 206 | } | 215 | } |
| 207 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); | 216 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); |
| 208 | 217 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e43065654930..4cb5e6f408dc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -89,6 +89,7 @@ int sysctl_tcp_frto; | |||
| 89 | int sysctl_tcp_nometrics_save; | 89 | int sysctl_tcp_nometrics_save; |
| 90 | 90 | ||
| 91 | int sysctl_tcp_moderate_rcvbuf = 1; | 91 | int sysctl_tcp_moderate_rcvbuf = 1; |
| 92 | int sysctl_tcp_abc = 1; | ||
| 92 | 93 | ||
| 93 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 94 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
| 94 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 95 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
| @@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
| 1247 | tp->snd_cwnd_cnt = 0; | 1248 | tp->snd_cwnd_cnt = 0; |
| 1248 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1249 | tp->snd_cwnd_stamp = tcp_time_stamp; |
| 1249 | 1250 | ||
| 1251 | tp->bytes_acked = 0; | ||
| 1250 | tcp_clear_retrans(tp); | 1252 | tcp_clear_retrans(tp); |
| 1251 | 1253 | ||
| 1252 | /* Push undo marker, if it was plain RTO and nothing | 1254 | /* Push undo marker, if it was plain RTO and nothing |
| @@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
| 1904 | TCP_ECN_queue_cwr(tp); | 1906 | TCP_ECN_queue_cwr(tp); |
| 1905 | } | 1907 | } |
| 1906 | 1908 | ||
| 1909 | tp->bytes_acked = 0; | ||
| 1907 | tp->snd_cwnd_cnt = 0; | 1910 | tp->snd_cwnd_cnt = 0; |
| 1908 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 1911 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
| 1909 | } | 1912 | } |
| @@ -2310,6 +2313,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
| 2310 | if (before(ack, prior_snd_una)) | 2313 | if (before(ack, prior_snd_una)) |
| 2311 | goto old_ack; | 2314 | goto old_ack; |
| 2312 | 2315 | ||
| 2316 | if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR) | ||
| 2317 | tp->bytes_acked += ack - prior_snd_una; | ||
| 2318 | |||
| 2313 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { | 2319 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { |
| 2314 | /* Window is constant, pure forward advance. | 2320 | /* Window is constant, pure forward advance. |
| 2315 | * No more checks are required. | 2321 | * No more checks are required. |
| @@ -4370,6 +4376,7 @@ discard: | |||
| 4370 | 4376 | ||
| 4371 | EXPORT_SYMBOL(sysctl_tcp_ecn); | 4377 | EXPORT_SYMBOL(sysctl_tcp_ecn); |
| 4372 | EXPORT_SYMBOL(sysctl_tcp_reordering); | 4378 | EXPORT_SYMBOL(sysctl_tcp_reordering); |
| 4379 | EXPORT_SYMBOL(sysctl_tcp_abc); | ||
| 4373 | EXPORT_SYMBOL(tcp_parse_options); | 4380 | EXPORT_SYMBOL(tcp_parse_options); |
| 4374 | EXPORT_SYMBOL(tcp_rcv_established); | 4381 | EXPORT_SYMBOL(tcp_rcv_established); |
| 4375 | EXPORT_SYMBOL(tcp_rcv_state_process); | 4382 | EXPORT_SYMBOL(tcp_rcv_state_process); |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b1a63b2c6b4a..9203a21e299f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
| @@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
| 380 | */ | 380 | */ |
| 381 | newtp->snd_cwnd = 2; | 381 | newtp->snd_cwnd = 2; |
| 382 | newtp->snd_cwnd_cnt = 0; | 382 | newtp->snd_cwnd_cnt = 0; |
| 383 | newtp->bytes_acked = 0; | ||
| 383 | 384 | ||
| 384 | newtp->frto_counter = 0; | 385 | newtp->frto_counter = 0; |
| 385 | newtp->frto_highmark = 0; | 386 | newtp->frto_highmark = 0; |
