From 4c3a76abd379d9a4668b2d417baa991de9757dc2 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 19:03:26 +0000 Subject: bridge: netfilter: fix a memory leak nf_bridge_alloc() always reset the skb->nf_bridge, so we should always put the old one. Signed-off-by: Changli Gao Signed-off-by: Bart De Schuymer Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2c911c0759c2..5ed00bd7009f 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -162,8 +162,8 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) if (tmp) { memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); atomic_set(&tmp->use, 1); - nf_bridge_put(nf_bridge); } + nf_bridge_put(nf_bridge); nf_bridge = tmp; } return nf_bridge; -- cgit v1.2.2 From ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 25 Aug 2010 02:27:49 -0700 Subject: tcp: Combat per-cpu skew in orphan tests. As reported by Anton Blanchard when we use percpu_counter_read_positive() to make our orphan socket limit checks, the check can be off by up to num_cpus_online() * batch (which is 32 by default) which on a 128 cpu machine can be as large as the default orphan limit itself. Fix this by doing the full expensive sum check if the optimized check triggers. Reported-by: Anton Blanchard Signed-off-by: David S. Miller Acked-by: Eric Dumazet --- net/ipv4/tcp.c | 5 +---- net/ipv4/tcp_timer.c | 8 ++++---- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 176e11aaea77..197b9b77fa3e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2011,11 +2011,8 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - int orphan_count = percpu_counter_read_positive( - sk->sk_prot->orphan_count); - sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, orphan_count)) { + if (tcp_too_many_orphans(sk, 0)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 808bb920c9f5..c35b469e851c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = percpu_counter_read_positive(&tcp_orphan_count); + int shift = 0; /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) - orphans <<= 1; + shift++; /* If some dubious ICMP arrived, penalize even more. */ if (sk->sk_err_soft) - orphans <<= 1; + shift++; - if (tcp_too_many_orphans(sk, orphans)) { + if (tcp_too_many_orphans(sk, shift)) { if (net_ratelimit()) printk(KERN_INFO "Out of socket memory\n"); -- cgit v1.2.2 From c5ed63d66f24fd4f7089b5a6e087b0ce7202aa8e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Aug 2010 23:02:17 -0700 Subject: tcp: fix three tcp sysctls tuning As discovered by Anton Blanchard, current code to autotune tcp_death_row.sysctl_max_tw_buckets, sysctl_tcp_max_orphans and sysctl_max_syn_backlog makes little sense. The bigger a page is, the less tcp_max_orphans is : 4096 on a 512GB machine in Anton's case. (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)) is much bigger if spinlock debugging is on. Its wrong to select bigger limits in this case (where kernel structures are also bigger) bhash_size max is 65536, and we get this value even for small machines. A better ground is to use size of ehash table, this also makes code shorter and more obvious. Based on a patch from Anton, and another from David. Reported-and-tested-by: Anton Blanchard Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 197b9b77fa3e..e2add5ff9cb1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3209,7 +3209,7 @@ void __init tcp_init(void) { struct sk_buff *skb = NULL; unsigned long nr_pages, limit; - int order, i, max_share; + int i, max_share, cnt; unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3258,22 +3258,12 @@ void __init tcp_init(void) INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); } - /* Try to be a bit smarter and adjust defaults depending - * on available memory. - */ - for (order = 0; ((1 << order) << PAGE_SHIFT) < - (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); - order++) - ; - if (order >= 4) { - tcp_death_row.sysctl_max_tw_buckets = 180000; - sysctl_tcp_max_orphans = 4096 << (order - 4); - sysctl_max_syn_backlog = 1024; - } else if (order < 3) { - tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); - sysctl_tcp_max_orphans >>= (3 - order); - sysctl_max_syn_backlog = 128; - } + + cnt = tcp_hashinfo.ehash_mask + 1; + + tcp_death_row.sysctl_max_tw_buckets = cnt / 2; + sysctl_tcp_max_orphans = cnt / 2; + sysctl_max_syn_backlog = max(128, cnt / 256); /* Set the pressure threshold to be a fraction of global memory that * is up to 1/2 at 256 MB, decreasing toward zero with the amount of -- cgit v1.2.2 From d84ba638e4ba3c40023ff997aa5e8d3ed002af36 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 Aug 2010 16:05:48 +0000 Subject: tcp: select(writefds) don't hang up when a peer close connection This issue come from ruby language community. Below test program hang up when only run on Linux. % uname -mrsv Linux 2.6.26-2-486 #1 Sat Dec 26 08:37:39 UTC 2009 i686 % ruby -rsocket -ve ' BasicSocket.do_not_reverse_lookup = true serv = TCPServer.open("127.0.0.1", 0) s1 = TCPSocket.open("127.0.0.1", serv.addr[1]) s2 = serv.accept s2.close s1.write("a") rescue p $! s1.write("a") rescue p $! Thread.new { s1.write("a") }.join' ruby 1.9.3dev (2010-07-06 trunk 28554) [i686-linux] # [Hang Here] FreeBSD, Solaris, Mac doesn't. because Ruby's write() method call select() internally. and tcp_poll has a bug. SUS defined 'ready for writing' of select() as following. | A descriptor shall be considered ready for writing when a call to an output | function with O_NONBLOCK clear would not block, whether or not the function | would transfer data successfully. That said, EPIPE situation is clearly one of 'ready for writing'. We don't have read-side issue because tcp_poll() already has read side shutdown care. | if (sk->sk_shutdown & RCV_SHUTDOWN) | mask |= POLLIN | POLLRDNORM | POLLRDHUP; So, Let's insert same logic in write side. - reference url http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31065 http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31068 Signed-off-by: KOSAKI Motohiro Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e2add5ff9cb1..3fb1428e526e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -451,7 +451,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) mask |= POLLOUT | POLLWRNORM; } - } + } else + mask |= POLLOUT | POLLWRNORM; if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; -- cgit v1.2.2 From bfc960a8eec023a170a80697fe65157cd4f44f81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Aug 2010 23:44:35 +0000 Subject: l2tp: test for ethernet header in l2tp_eth_dev_recv() close https://bugzilla.kernel.org/show_bug.cgi?id=16529 Before calling dev_forward_skb(), we should make sure skb head contains at least an ethernet header, even if length included in upper layer said so. Use pskb_may_pull() to make sure this ethernet header is present in skb head. Reported-by: Thomas Heil Reported-by: Ian Campbell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 58c6c4cda73b..1ae697681bc7 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, printk("\n"); } - if (data_len < ETH_HLEN) + if (!pskb_may_pull(skb, sizeof(ETH_HLEN))) goto error; secpath_reset(skb); -- cgit v1.2.2 From d71b0e9c0028f3af910226f995e0074873e16979 Mon Sep 17 00:00:00 2001 From: Bernard Pidoux F6BVP Date: Thu, 26 Aug 2010 11:40:00 +0000 Subject: ax25: missplaced sock_put(sk) This patch moves a missplaced sock_put(sk) after bh_unlock_sock(sk) like in other parts of AX25 driver. Signed-off-by: Bernard Pidoux Signed-off-by: David S. Miller --- net/ax25/ax25_ds_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 2ce79df00680..c7d81436213d 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -112,8 +112,8 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) if (sk) { sock_hold(sk); ax25_destroy_socket(ax25); - sock_put(sk); bh_unlock_sock(sk); + sock_put(sk); } else ax25_destroy_socket(ax25); return; -- cgit v1.2.2 From 7e368739e3b3f1d7944794c178a15f05829b56bc Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 26 Aug 2010 16:11:08 -0700 Subject: net/caif/cfrfml.c: use asm/unaligned.h caif does not build on ia64 starting with 2.6.32-rc1. Using asm/unaligned.h instead of linux/unaligned/le_byteshift.h fixes the issue. include/linux/unaligned/le_byteshift.h:40:50: error: redefinition of 'get_unaligned_le16' include/linux/unaligned/le_byteshift.h:45:50: error: redefinition of 'get_unaligned_le32' include/linux/unaligned/le_byteshift.h:50:50: error: redefinition of 'get_unaligned_le64' include/linux/unaligned/le_byteshift.h:55:51: error: redefinition of 'put_unaligned_le16' include/linux/unaligned/le_byteshift.h:60:51: error: redefinition of 'put_unaligned_le32' include/linux/unaligned/le_byteshift.h:65:51: error: redefinition of 'put_unaligned_le64' include/linux/unaligned/le_struct.h:31:51: note: previous definition of 'put_unaligned_le64' was here Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/caif/cfrfml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index eb1602022ac0..9a699242d104 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.2 From c34186ed008229e7f7e3f1de8e6acf6374995358 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 27 Aug 2010 19:31:56 -0700 Subject: net/ipv4: Eliminate kstrdup memory leak The string clone is only used as a temporary copy of the argument val within the while loop, and so it should be freed before leaving the function. The call to strsep, however, modifies clone, so a pointer to the front of the string is kept in saved_clone, to make it possible to free it. The sematic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r exists@ local idexpression x; expression E; identifier l; statement S; @@ *x= \(kasprintf\|kstrdup\)(...); ... if (x == NULL) S ... when != kfree(x) when != E = x if (...) { <... when != kfree(x) * goto l; ...> * return ...; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/ipv4/tcp_cong.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 0ec9bd0ae94f..850c737e08e2 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -196,10 +196,10 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) int tcp_set_allowed_congestion_control(char *val) { struct tcp_congestion_ops *ca; - char *clone, *name; + char *saved_clone, *clone, *name; int ret = 0; - clone = kstrdup(val, GFP_USER); + saved_clone = clone = kstrdup(val, GFP_USER); if (!clone) return -ENOMEM; @@ -226,6 +226,7 @@ int tcp_set_allowed_congestion_control(char *val) } out: spin_unlock(&tcp_cong_list_lock); + kfree(saved_clone); return ret; } -- cgit v1.2.2