From 99709372736a216f99eb32b76fba835a2bfc93a8 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 12 Feb 2009 16:43:17 -0800
Subject: net: don't use in_atomic() in gfp_any()

The problem is that in_atomic() will return false inside spinlocks if
CONFIG_PREEMPT=n.  This will lead to deadlockable GFP_KERNEL allocations
from spinlocked regions.

Secondly, if CONFIG_PREEMPT=y, this bug solves itself because networking
will instead use GFP_ATOMIC from this callsite.  Hence we won't get the
might_sleep() debugging warnings which would have informed us of the buggy
callsites.

Solve both these problems by switching to in_interrupt().  Now, if someone
runs a gfp_any() allocation from inside spinlock we will get the warning
if CONFIG_PREEMPT=y.

I reviewed all callsites and most of them were too complex for my little
brain and none of them documented their interface requirements.  I have no
idea what this patch will do.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 5a3a151bd730..ce3b5b622683 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1308,7 +1308,7 @@ static inline int sock_writeable(const struct sock *sk)
 
 static inline gfp_t gfp_any(void)
 {
-	return in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
+	return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
 }
 
 static inline long sock_rcvtimeo(const struct sock *sk, int noblock)
-- 
cgit v1.2.2


From 92a0acce186cde8ead56c6915d9479773673ea1a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Feb 2009 21:24:05 -0800
Subject: net: Kill skb_truesize_check(), it only catches false-positives.

A long time ago we had bugs, primarily in TCP, where we would modify
skb->truesize (for TSO queue collapsing) in ways which would corrupt
the socket memory accounting.

skb_truesize_check() was added in order to try and catch this error
more systematically.

However this debugging check has morphed into a Frankenstein of sorts
and these days it does nothing other than catch false-positives.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index ce3b5b622683..eefeeaf7fc46 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -860,7 +860,6 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
 
 static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 {
-	skb_truesize_check(skb);
 	sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
 	sk->sk_wmem_queued -= skb->truesize;
 	sk_mem_uncharge(sk, skb->truesize);
-- 
cgit v1.2.2


From 5962fc6d5fff09c8e6fb8cadcb18327a0f4277f7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 18 Feb 2009 15:30:34 +0100
Subject: netfilter: nf_conntrack: don't try to deliver events for untracked
 connections

The untracked conntrack actually does usually have events marked for
delivery as its not special-cased in that part of the code. Skip the
actual delivery since it impacts performance noticeably.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index e78afe7f28e3..c25068e38516 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -59,7 +59,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
 	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
 	int ret = NF_ACCEPT;
 
-	if (ct) {
+	if (ct && ct != &nf_conntrack_untracked) {
 		if (!nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
 			ret = __nf_conntrack_confirm(skb);
 		nf_ct_deliver_cached_events(ct);
-- 
cgit v1.2.2


From 17edde520927070a6bf14a6a75027c0b843443e5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Sun, 22 Feb 2009 00:11:09 -0800
Subject: netns: Remove net_alive

It turns out that net_alive is unnecessary, and the original problem
that led to it being added was simply that the icmp code thought
it was a network device and wound up being unable to handle packets
while there were still packets in the network namespace.

Now that icmp and tcp have been fixed to properly register themselves
this problem is no longer present and we have a stronger guarantee
that packets will not arrive in a network namespace then that provided
by net_alive in netif_receive_skb.  So remove net_alive allowing
packet reception run a little faster.

Additionally document the strong reason why network namespace cleanup
is safe so that if something happens again someone else will have
a chance of figuring it out.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 6fc13d905c5f..ded434b032a4 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -109,11 +109,6 @@ extern struct list_head net_namespace_list;
 #ifdef CONFIG_NET_NS
 extern void __put_net(struct net *net);
 
-static inline int net_alive(struct net *net)
-{
-	return net && atomic_read(&net->count);
-}
-
 static inline struct net *get_net(struct net *net)
 {
 	atomic_inc(&net->count);
@@ -145,11 +140,6 @@ int net_eq(const struct net *net1, const struct net *net2)
 }
 #else
 
-static inline int net_alive(struct net *net)
-{
-	return 1;
-}
-
 static inline struct net *get_net(struct net *net)
 {
 	return net;
@@ -234,6 +224,23 @@ struct pernet_operations {
 	void (*exit)(struct net *net);
 };
 
+/*
+ * Use these carefully.  If you implement a network device and it
+ * needs per network namespace operations use device pernet operations,
+ * otherwise use pernet subsys operations.
+ *
+ * This is critically important.  Most of the network code cleanup
+ * runs with the assumption that dev_remove_pack has been called so no
+ * new packets will arrive during and after the cleanup functions have
+ * been called.  dev_remove_pack is not per namespace so instead the
+ * guarantee of no more packets arriving in a network namespace is
+ * provided by ensuring that all network devices and all sockets have
+ * left the network namespace before the cleanup methods are called.
+ *
+ * For the longest time the ipv4 icmp code was registered as a pernet
+ * device which caused kernel oops, and panics during network
+ * namespace cleanup.   So please don't get this wrong.
+ */
 extern int register_pernet_subsys(struct pernet_operations *);
 extern void unregister_pernet_subsys(struct pernet_operations *);
 extern int register_pernet_gen_subsys(int *id, struct pernet_operations *);
-- 
cgit v1.2.2