From 22b71c8f4f3db8df92f5e7b081c265bc56c0bd2f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:23:12 +0000 Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion This patch consolidates initial-window code common to TCP and CCID-2: * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341). Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index df6a2eb20193..a64022199b62 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -779,6 +779,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * + * John Heffner states: + * + * The RFC specifies a window of no more than 4380 bytes + * unless 2*MSS > 4380. Reading the pseudocode in the RFC + * is a bit misleading because they use a clamp at 4380 bytes + * rather than a multiplier in the relevant range. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); +} + extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); -- cgit v1.2.2 From 3d5b99ae82f8742e3bb1f8634fd11ac36ea19ee1 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:27:34 +0000 Subject: TCP: update initial windows according to RFC 5681 This updates the use of larger initial windows, as originally specified in RFC 3390, to use the newer IW values specified in RFC 5681, section 3.1. The changes made in RFC 5681 are: a) the setting now is more clearly specified in units of segments (as the comments by John Heffner emphasized, this was not very clear in RFC 3390); b) for connections with 1095 < SMSS <= 2190 there is now a change: - RFC 3390 says that IW <= 4380, - RFC 5681 says that IW = 3 * SMSS <= 6570. Since RFC 3390 is older and "only" proposed standard, whereas the newer RFC 5681 is already draft standard, it seems preferable to use the newer IW variant. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index a64022199b62..11dbacc886c0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -781,17 +781,11 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* * Convert RFC 3390 larger initial window into an equivalent number of packets. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than a multiplier in the relevant range. + * This is based on the numbers specified in RFC 5681, 3.1. */ static inline u32 rfc3390_bytes_to_packets(const u32 smss) { - return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); } extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); -- cgit v1.2.2 From 1b9f409293529da4630bfc5d6d8e7d7451a6ccb5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 28 Sep 2010 19:30:14 +0000 Subject: tcp: tcp_enter_quickack_mode can be static Function only used in tcp_input.c Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 914a60c7ad62..4fee0424af7e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -346,8 +346,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, } } -extern void tcp_enter_quickack_mode(struct sock *sk); - #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 -- cgit v1.2.2 From 8d987e5c75107ca7515fa19e857cfa24aab6ec8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Nov 2010 23:24:26 +0000 Subject: net: avoid limits overflow Robin Holt tried to boot a 16TB machine and found some limits were reached : sysctl_tcp_mem[2], sysctl_udp_mem[2] We can switch infrastructure to use long "instead" of "int", now atomic_long_t primitives are available for free. Signed-off-by: Eric Dumazet Reported-by: Robin Holt Reviewed-by: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/net/tcp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4fee0424af7e..e36c874c7fb1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -224,7 +224,7 @@ extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_ecn; extern int sysctl_tcp_dsack; -extern int sysctl_tcp_mem[3]; +extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; @@ -247,7 +247,7 @@ extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; -extern atomic_t tcp_memory_allocated; +extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; @@ -280,7 +280,7 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) } if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) + atomic_long_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) return true; return false; } -- cgit v1.2.2 From 3f419d2d487821093ee46e898b5f8747f9edc9cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Nov 2010 13:37:14 -0800 Subject: inet: Turn ->remember_stamp into ->get_peer in connection AF ops. Then we can make a completely generic tcp_remember_stamp() that uses ->get_peer() as a helper, minimizing the AF specific code and minimizing the eventual code duplication when we implement the ipv6 side of TW recycling. Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index e36c874c7fb1..3e239641d4ee 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -312,7 +312,7 @@ extern void tcp_shutdown (struct sock *sk, int how); extern int tcp_v4_rcv(struct sk_buff *skb); -extern int tcp_v4_remember_stamp(struct sock *sk); +extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); -- cgit v1.2.2 From ccb7c410ddc054b8c1ae780319bc98ae092d3854 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Dec 2010 18:09:13 -0800 Subject: timewait_sock: Create and use getpeer op. The only thing AF-specific about remembering the timestamp for a time-wait TCP socket is getting the peer. Abstract that behind a new timewait_sock_ops vector. Support for real IPV6 sockets is not filled in yet, but curiously this makes timewait recycling start to work for v4-mapped ipv6 sockets. Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3e239641d4ee..4097320caa25 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -313,6 +313,7 @@ extern void tcp_shutdown (struct sock *sk, int how); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it); +extern void *tcp_v4_tw_get_peer(struct sock *sk); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); -- cgit v1.2.2 From dca9b2404a6d6579828da2425c051462701efd3f Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 1 Dec 2010 18:05:17 +0000 Subject: net: kill unused macros from head file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These macros have been defined for several years since v2.6.12-rc2(tracing by git), but never be used. So remove them. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4097320caa25..3f227baee4be 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -100,12 +100,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a * connection: ~180sec is RFC minimum */ - -#define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned - * socket. 7 is ~50sec-16min. - */ - - #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN -- cgit v1.2.2 From bc2ce894e113ed95b92541134b002fdc641e8080 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Dec 2010 14:08:34 -0800 Subject: tcp: relax tcp_paws_check() Some windows versions have wrong RFC1323 implementations, with SYN and SYNACKS messages containing zero tcp timestamps. We relaxed in commit fc1ad92dfc4e363 the passive connection case (Windows connects to a linux machine), but the reverse case (linux connects to a Windows machine) has an analogue problem when tsvals from windows machine are 'negative' (high order bit set) : PAWS triggers and we drops incoming messages. Fix this by making zero ts_recent value special, allowing frame to be processed. Based on a report and initial patch from Dmitiy Balakin Bugzilla reference : https://bugzilla.kernel.org/show_bug.cgi?id=24842 Reported-by: dmitriy.balakin@nicneiron.ru Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3f227baee4be..2ab6c9c1c53a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1038,7 +1038,13 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, return 1; if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) return 1; - + /* + * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0, + * then following tcp messages have valid values. Ignore 0 value, + * or else 'negative' tsval might forbid us to accept their packets. + */ + if (!rx_opt->ts_recent) + return 1; return 0; } -- cgit v1.2.2 From 4c306a9291a077879fc3e933326caac3bc319caa Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Sun, 19 Dec 2010 21:59:35 -0800 Subject: net: kill unused macros These macros never be used, so remove them. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 2ab6c9c1c53a..b4480300cadf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1158,8 +1158,6 @@ struct tcp_md5sig_pool { union tcp_md5sum_block md5_blk; }; -#define TCP_MD5SIG_MAXKEYS (~(u32)0) /* really?! */ - /* - functions */ extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, struct sock *sk, struct request_sock *req, -- cgit v1.2.2 From 356f039822b8d802138f7121c80d2a9286976dbd Mon Sep 17 00:00:00 2001 From: Nandita Dukkipati Date: Mon, 20 Dec 2010 14:15:56 +0000 Subject: TCP: increase default initial receive window. This patch changes the default initial receive window to 10 mss (defined constant). The default window is limited to the maximum of 10*1460 and 2*mss (when mss > 1460). draft-ietf-tcpm-initcwnd-00 is a proposal to the IETF that recommends increasing TCP's initial congestion window to 10 mss or about 15KB. Leading up to this proposal were several large-scale live Internet experiments with an initial congestion window of 10 mss (IW10), where we showed that the average latency of HTTP responses improved by approximately 10%. This was accompanied by a slight increase in retransmission rate (0.5%), most of which is coming from applications opening multiple simultaneous connections. To understand the extreme worst case scenarios, and fairness issues (IW10 versus IW3), we further conducted controlled testbed experiments. We came away finding minimal negative impact even under low link bandwidths (dial-ups) and small buffers. These results are extremely encouraging to adopting IW10. However, an initial congestion window of 10 mss is useless unless a TCP receiver advertises an initial receive window of at least 10 mss. Fortunately, in the large-scale Internet experiments we found that most widely used operating systems advertised large initial receive windows of 64KB, allowing us to experiment with a wide range of initial congestion windows. Linux systems were among the few exceptions that advertised a small receive window of 6KB. The purpose of this patch is to fix this shortcoming. References: 1. A comprehensive list of all IW10 references to date. http://code.google.com/speed/protocols/tcpm-IW10.html 2. Paper describing results from large-scale Internet experiments with IW10. http://ccr.sigcomm.org/drupal/?q=node/621 3. Controlled testbed experiments under worst case scenarios and a fairness study. http://www.ietf.org/proceedings/79/slides/tcpm-0.pdf 4. Raw test data from testbed experiments (Linux senders/receivers) with initial congestion and receive windows of both 10 mss. http://research.csc.ncsu.edu/netsrv/?q=content/iw10 5. Internet-Draft. Increasing TCP's Initial Window. https://datatracker.ietf.org/doc/draft-ietf-tcpm-initcwnd/ Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index b4480300cadf..38509f047382 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -60,6 +60,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define MAX_TCP_WINDOW 32767U +/* Offer an initial receive window of 10 mss. */ +#define TCP_DEFAULT_INIT_RCVWND 10 + /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U -- cgit v1.2.2 From 04ed3e741d0f133e02bed7fa5c98edba128f90e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 24 Jan 2011 15:32:47 -0800 Subject: net: change netdev->features to u32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quoting Ben Hutchings: we presumably won't be defining features that can only be enabled on 64-bit architectures. Occurences found by `grep -r` on net/, drivers/net, include/ [ Move features and vlan_features next to each other in struct netdev, as per Eric Dumazet's suggestion -DaveM ] Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 38509f047382..917911165e3b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1404,7 +1404,7 @@ extern struct request_sock_ops tcp6_request_sock_ops; extern void tcp_v4_destroy_sock(struct sock *sk); extern int tcp_v4_gso_send_check(struct sk_buff *skb); -extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); +extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features); extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb); extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, -- cgit v1.2.2 From 442b9635c569fef038d5367a7acd906db4677ae1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Feb 2011 17:05:11 -0800 Subject: tcp: Increase the initial congestion window to 10. Signed-off-by: David S. Miller Acked-by: Nandita Dukkipati --- include/net/tcp.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 917911165e3b..7118668ad534 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -196,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* TCP thin-stream limits */ #define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */ +/* TCP initial congestion window */ +#define TCP_INIT_CWND 10 + extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ @@ -799,15 +802,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) -/* - * Convert RFC 3390 larger initial window into an equivalent number of packets. - * This is based on the numbers specified in RFC 5681, 3.1. - */ -static inline u32 rfc3390_bytes_to_packets(const u32 smss) -{ - return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); -} - extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); -- cgit v1.2.2 From 7eb38527c4e485923fa3f87d11ce11b4e6ebf807 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 5 Feb 2011 18:13:45 -0800 Subject: tcp: Add reference to initial CWND ietf draft. Suggested by Alexander Zimmermann Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 7118668ad534..adfe6dbe9053 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -196,7 +196,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* TCP thin-stream limits */ #define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */ -/* TCP initial congestion window */ +/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */ #define TCP_INIT_CWND 10 extern struct inet_timewait_death_row tcp_death_row; -- cgit v1.2.2 From 089c34827e52346f0303d1e6a7b744c1f4da3095 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Sat, 19 Feb 2011 21:55:45 +0000 Subject: tcp: Remove debug macro of TCP_CHECK_TIMER Now, TCP_CHECK_TIMER is not used for debuging, it does nothing. And, it has been there for several years, maybe 6 years. Remove it to keep code clearer. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index adfe6dbe9053..cda30ea354a2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1068,8 +1068,6 @@ static inline int tcp_paws_reject(const struct tcp_options_received *rx_opt, return 1; } -#define TCP_CHECK_TIMER(sk) do { } while (0) - static inline void tcp_mib_init(struct net *net) { /* See RFC 2012 */ -- cgit v1.2.2