From 8d987e5c75107ca7515fa19e857cfa24aab6ec8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Nov 2010 23:24:26 +0000 Subject: net: avoid limits overflow Robin Holt tried to boot a 16TB machine and found some limits were reached : sysctl_tcp_mem[2], sysctl_udp_mem[2] We can switch infrastructure to use long "instead" of "int", now atomic_long_t primitives are available for free. Signed-off-by: Eric Dumazet Reported-by: Robin Holt Reviewed-by: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/net/tcp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4fee0424af7e..e36c874c7fb1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -224,7 +224,7 @@ extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_ecn; extern int sysctl_tcp_dsack; -extern int sysctl_tcp_mem[3]; +extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; @@ -247,7 +247,7 @@ extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; -extern atomic_t tcp_memory_allocated; +extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; @@ -280,7 +280,7 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) } if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) + atomic_long_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) return true; return false; } -- cgit v1.2.2 From 3f419d2d487821093ee46e898b5f8747f9edc9cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Nov 2010 13:37:14 -0800 Subject: inet: Turn ->remember_stamp into ->get_peer in connection AF ops. Then we can make a completely generic tcp_remember_stamp() that uses ->get_peer() as a helper, minimizing the AF specific code and minimizing the eventual code duplication when we implement the ipv6 side of TW recycling. Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index e36c874c7fb1..3e239641d4ee 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -312,7 +312,7 @@ extern void tcp_shutdown (struct sock *sk, int how); extern int tcp_v4_rcv(struct sk_buff *skb); -extern int tcp_v4_remember_stamp(struct sock *sk); +extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); -- cgit v1.2.2 From ccb7c410ddc054b8c1ae780319bc98ae092d3854 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Dec 2010 18:09:13 -0800 Subject: timewait_sock: Create and use getpeer op. The only thing AF-specific about remembering the timestamp for a time-wait TCP socket is getting the peer. Abstract that behind a new timewait_sock_ops vector. Support for real IPV6 sockets is not filled in yet, but curiously this makes timewait recycling start to work for v4-mapped ipv6 sockets. Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3e239641d4ee..4097320caa25 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -313,6 +313,7 @@ extern void tcp_shutdown (struct sock *sk, int how); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it); +extern void *tcp_v4_tw_get_peer(struct sock *sk); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); -- cgit v1.2.2 From dca9b2404a6d6579828da2425c051462701efd3f Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 1 Dec 2010 18:05:17 +0000 Subject: net: kill unused macros from head file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These macros have been defined for several years since v2.6.12-rc2(tracing by git), but never be used. So remove them. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4097320caa25..3f227baee4be 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -100,12 +100,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a * connection: ~180sec is RFC minimum */ - -#define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned - * socket. 7 is ~50sec-16min. - */ - - #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN -- cgit v1.2.2 From bc2ce894e113ed95b92541134b002fdc641e8080 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Dec 2010 14:08:34 -0800 Subject: tcp: relax tcp_paws_check() Some windows versions have wrong RFC1323 implementations, with SYN and SYNACKS messages containing zero tcp timestamps. We relaxed in commit fc1ad92dfc4e363 the passive connection case (Windows connects to a linux machine), but the reverse case (linux connects to a Windows machine) has an analogue problem when tsvals from windows machine are 'negative' (high order bit set) : PAWS triggers and we drops incoming messages. Fix this by making zero ts_recent value special, allowing frame to be processed. Based on a report and initial patch from Dmitiy Balakin Bugzilla reference : https://bugzilla.kernel.org/show_bug.cgi?id=24842 Reported-by: dmitriy.balakin@nicneiron.ru Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3f227baee4be..2ab6c9c1c53a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1038,7 +1038,13 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, return 1; if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) return 1; - + /* + * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0, + * then following tcp messages have valid values. Ignore 0 value, + * or else 'negative' tsval might forbid us to accept their packets. + */ + if (!rx_opt->ts_recent) + return 1; return 0; } -- cgit v1.2.2 From 4c306a9291a077879fc3e933326caac3bc319caa Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Sun, 19 Dec 2010 21:59:35 -0800 Subject: net: kill unused macros These macros never be used, so remove them. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 2ab6c9c1c53a..b4480300cadf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1158,8 +1158,6 @@ struct tcp_md5sig_pool { union tcp_md5sum_block md5_blk; }; -#define TCP_MD5SIG_MAXKEYS (~(u32)0) /* really?! */ - /* - functions */ extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, struct sock *sk, struct request_sock *req, -- cgit v1.2.2 From 356f039822b8d802138f7121c80d2a9286976dbd Mon Sep 17 00:00:00 2001 From: Nandita Dukkipati Date: Mon, 20 Dec 2010 14:15:56 +0000 Subject: TCP: increase default initial receive window. This patch changes the default initial receive window to 10 mss (defined constant). The default window is limited to the maximum of 10*1460 and 2*mss (when mss > 1460). draft-ietf-tcpm-initcwnd-00 is a proposal to the IETF that recommends increasing TCP's initial congestion window to 10 mss or about 15KB. Leading up to this proposal were several large-scale live Internet experiments with an initial congestion window of 10 mss (IW10), where we showed that the average latency of HTTP responses improved by approximately 10%. This was accompanied by a slight increase in retransmission rate (0.5%), most of which is coming from applications opening multiple simultaneous connections. To understand the extreme worst case scenarios, and fairness issues (IW10 versus IW3), we further conducted controlled testbed experiments. We came away finding minimal negative impact even under low link bandwidths (dial-ups) and small buffers. These results are extremely encouraging to adopting IW10. However, an initial congestion window of 10 mss is useless unless a TCP receiver advertises an initial receive window of at least 10 mss. Fortunately, in the large-scale Internet experiments we found that most widely used operating systems advertised large initial receive windows of 64KB, allowing us to experiment with a wide range of initial congestion windows. Linux systems were among the few exceptions that advertised a small receive window of 6KB. The purpose of this patch is to fix this shortcoming. References: 1. A comprehensive list of all IW10 references to date. http://code.google.com/speed/protocols/tcpm-IW10.html 2. Paper describing results from large-scale Internet experiments with IW10. http://ccr.sigcomm.org/drupal/?q=node/621 3. Controlled testbed experiments under worst case scenarios and a fairness study. http://www.ietf.org/proceedings/79/slides/tcpm-0.pdf 4. Raw test data from testbed experiments (Linux senders/receivers) with initial congestion and receive windows of both 10 mss. http://research.csc.ncsu.edu/netsrv/?q=content/iw10 5. Internet-Draft. Increasing TCP's Initial Window. https://datatracker.ietf.org/doc/draft-ietf-tcpm-initcwnd/ Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index b4480300cadf..38509f047382 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -60,6 +60,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define MAX_TCP_WINDOW 32767U +/* Offer an initial receive window of 10 mss. */ +#define TCP_DEFAULT_INIT_RCVWND 10 + /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U -- cgit v1.2.2