From 6a57b2ee4506bb8a3d2f7ba2f62ff65ec56ba150 Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <patrick@tykepenguin.com>
Date: Wed, 29 Mar 2006 13:57:31 -0800
Subject: [DECNET]: Fix refcount

From: Patrick Caulfield <patrick@tykepenguin.com>

This patch fixes a bug in the reference counting for the default
DECnet device.

If the device is changed, then the new device had its refcount
decremented rather than the old one!

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index d2ae9893ca17..a26ff9f44576 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -620,7 +620,7 @@ int dn_dev_set_default(struct net_device *dev, int force)
 	}
 	write_unlock(&dndev_lock);
 	if (old)
-		dev_put(dev);
+		dev_put(old);
 	return rv;
 }
 
-- 
cgit v1.2.2


From 68907dad58cd7ef11536e1db6baeb98b20af91b2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 29 Mar 2006 13:58:25 -0800
Subject: [DCCP]: Use NULL for pointers, comfort sparse.

From: Randy Dunlap <rdunlap@xenotime.net>

Use NULL instead of 0 for pointers.
Fix these sparse warnings:
net/dccp/feat.c:207:20: warning: Using plain integer as NULL pointer
net/dccp/feat.c:325:21: warning: Using plain integer as NULL pointer
net/dccp/feat.c:526:20: warning: Using plain integer as NULL pointer

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/feat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index e3dd30d36c8a..b39e2a597889 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -204,7 +204,7 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
 	if (rc) {
 		kfree(opt->dccpop_sc->dccpoc_val);
 		kfree(opt->dccpop_sc);
-		opt->dccpop_sc = 0;
+		opt->dccpop_sc = NULL;
 		return rc;
 	}
 
@@ -322,7 +322,7 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
 	opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R :
 						    DCCPO_CONFIRM_L;
 	opt->dccpop_feat = feature;
-	opt->dccpop_val	 = 0;
+	opt->dccpop_val	 = NULL;
 	opt->dccpop_len	 = 0;
 
 	/* change feature */
@@ -523,7 +523,7 @@ int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
 		 * once...
 		 */
 		/* the master socket no longer needs to worry about confirms */
-		opt->dccpop_sc = 0; /* it's not a memleak---new socket has it */
+		opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */
 
 		/* reset state for a new socket */
 		opt->dccpop_conf = 0;
-- 
cgit v1.2.2


From 56079431b6ba163df8ba26b3eccc82379f0c0ce4 Mon Sep 17 00:00:00 2001
From: Denis Vlasenko <vda@ilport.com.ua>
Date: Wed, 29 Mar 2006 15:57:29 -0800
Subject: [NET]: Deinline some larger functions from netdevice.h

On a allyesconfig'ured kernel:

Size  Uses Wasted Name and definition
===== ==== ====== ================================================
   95  162  12075 netif_wake_queue      include/linux/netdevice.h
  129   86   9265 dev_kfree_skb_any     include/linux/netdevice.h
  127   56   5885 netif_device_attach   include/linux/netdevice.h
   73   86   4505 dev_kfree_skb_irq     include/linux/netdevice.h
   46   60   1534 netif_device_detach   include/linux/netdevice.h
  119   16   1485 __netif_rx_schedule   include/linux/netdevice.h
  143    5    492 netif_rx_schedule     include/linux/netdevice.h
   81    7    366 netif_schedule        include/linux/netdevice.h

netif_wake_queue is big because __netif_schedule is a big inline:

static inline void __netif_schedule(struct net_device *dev)
{
        if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
                unsigned long flags;
                struct softnet_data *sd;

                local_irq_save(flags);
                sd = &__get_cpu_var(softnet_data);
                dev->next_sched = sd->output_queue;
                sd->output_queue = dev;
                raise_softirq_irqoff(NET_TX_SOFTIRQ);
                local_irq_restore(flags);
        }
}

static inline void netif_wake_queue(struct net_device *dev)
{
#ifdef CONFIG_NETPOLL_TRAP
        if (netpoll_trap())
                return;
#endif
        if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
                __netif_schedule(dev);
}

By de-inlining __netif_schedule we are saving a lot of text
at each callsite of netif_wake_queue and netif_schedule.
__netif_rx_schedule is also big, and it makes more sense to keep
both of them out of line.

Patch also deinlines dev_kfree_skb_any. We can deinline dev_kfree_skb_irq
instead... oh well.

netif_device_attach/detach are not hot paths, we can deinline them too.

Signed-off-by: Denis Vlasenko <vda@ilport.com.ua>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 55 ++++------------------------------------
 net/core/dev.c            | 64 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 50 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 950dc55e5192..40ccf8cc4239 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -598,20 +598,7 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data);
 
 #define HAVE_NETIF_QUEUE
 
-static inline void __netif_schedule(struct net_device *dev)
-{
-	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
-		unsigned long flags;
-		struct softnet_data *sd;
-
-		local_irq_save(flags);
-		sd = &__get_cpu_var(softnet_data);
-		dev->next_sched = sd->output_queue;
-		sd->output_queue = dev;
-		raise_softirq_irqoff(NET_TX_SOFTIRQ);
-		local_irq_restore(flags);
-	}
-}
+extern void __netif_schedule(struct net_device *dev);
 
 static inline void netif_schedule(struct net_device *dev)
 {
@@ -675,13 +662,7 @@ static inline void dev_kfree_skb_irq(struct sk_buff *skb)
 /* Use this variant in places where it could be invoked
  * either from interrupt or non-interrupt context.
  */
-static inline void dev_kfree_skb_any(struct sk_buff *skb)
-{
-	if (in_irq() || irqs_disabled())
-		dev_kfree_skb_irq(skb);
-	else
-		dev_kfree_skb(skb);
-}
+extern void dev_kfree_skb_any(struct sk_buff *skb);
 
 #define HAVE_NETIF_RX 1
 extern int		netif_rx(struct sk_buff *skb);
@@ -768,22 +749,9 @@ static inline int netif_device_present(struct net_device *dev)
 	return test_bit(__LINK_STATE_PRESENT, &dev->state);
 }
 
-static inline void netif_device_detach(struct net_device *dev)
-{
-	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
-	    netif_running(dev)) {
-		netif_stop_queue(dev);
-	}
-}
+extern void netif_device_detach(struct net_device *dev);
 
-static inline void netif_device_attach(struct net_device *dev)
-{
-	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
-	    netif_running(dev)) {
-		netif_wake_queue(dev);
- 		__netdev_watchdog_up(dev);
-	}
-}
+extern void netif_device_attach(struct net_device *dev);
 
 /*
  * Network interface message level settings
@@ -851,20 +819,7 @@ static inline int netif_rx_schedule_prep(struct net_device *dev)
  * already been called and returned 1.
  */
 
-static inline void __netif_rx_schedule(struct net_device *dev)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	dev_hold(dev);
-	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
-	if (dev->quota < 0)
-		dev->quota += dev->weight;
-	else
-		dev->quota = dev->weight;
-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-	local_irq_restore(flags);
-}
+extern void __netif_rx_schedule(struct net_device *dev);
 
 /* Try to reschedule poll. Called by irq handler. */
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a3ab11f34153..434220d093aa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1080,6 +1080,70 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+
+void __netif_schedule(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
+		unsigned long flags;
+		struct softnet_data *sd;
+
+		local_irq_save(flags);
+		sd = &__get_cpu_var(softnet_data);
+		dev->next_sched = sd->output_queue;
+		sd->output_queue = dev;
+		raise_softirq_irqoff(NET_TX_SOFTIRQ);
+		local_irq_restore(flags);
+	}
+}
+EXPORT_SYMBOL(__netif_schedule);
+
+void __netif_rx_schedule(struct net_device *dev)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	dev_hold(dev);
+	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
+	if (dev->quota < 0)
+		dev->quota += dev->weight;
+	else
+		dev->quota = dev->weight;
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(__netif_rx_schedule);
+
+void dev_kfree_skb_any(struct sk_buff *skb)
+{
+	if (in_irq() || irqs_disabled())
+		dev_kfree_skb_irq(skb);
+	else
+		dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL(dev_kfree_skb_any);
+
+
+/* Hot-plugging. */
+void netif_device_detach(struct net_device *dev)
+{
+	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	    netif_running(dev)) {
+		netif_stop_queue(dev);
+	}
+}
+EXPORT_SYMBOL(netif_device_detach);
+
+void netif_device_attach(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	    netif_running(dev)) {
+		netif_wake_queue(dev);
+ 		__netdev_watchdog_up(dev);
+	}
+}
+EXPORT_SYMBOL(netif_device_attach);
+
+
 /*
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
-- 
cgit v1.2.2


From c08e49611a8b4e38a75bf217e1029a48faf10b82 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 31 Mar 2006 02:09:36 -0800
Subject: [NET]: add SO_RCVBUF comment

Put a comment in there explaining why we double the setsockopt()
caller's SO_RCVBUF.  People keep wondering.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index a96ea7dd0fc1..ed2afdb9ea2d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -385,7 +385,21 @@ set_sndbuf:
 				val = sysctl_rmem_max;
 set_rcvbuf:
 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-			/* FIXME: is this lower bound the right one? */
+			/*
+			 * We double it on the way in to account for
+			 * "struct sk_buff" etc. overhead.   Applications
+			 * assume that the SO_RCVBUF setting they make will
+			 * allow that much actual data to be received on that
+			 * socket.
+			 *
+			 * Applications are unaware that "struct sk_buff" and
+			 * other overheads allocate from the receive buffer
+			 * during socket buffer allocation.
+			 *
+			 * And after considering the possible alternatives,
+			 * returning the value we actually used in getsockopt
+			 * is the most desirable behavior.
+			 */
 			if ((val * 2) < SOCK_MIN_RCVBUF)
 				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
 			else
-- 
cgit v1.2.2


From 0803dbed7a23721d091639c9e173c0389dcd524a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 31 Mar 2006 02:25:46 -0800
Subject: [TCP]: Kill unused extern decl for tcp_v4_hash_connecting()

Noticed by Alan Menegotto.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9418f4d1afbb..3c989db8a7aa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -405,9 +405,6 @@ extern int			tcp_disconnect(struct sock *sk, int flags);
 
 extern void			tcp_unhash(struct sock *sk);
 
-extern int			tcp_v4_hash_connecting(struct sock *sk);
-
-
 /* From syncookies.c */
 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 
 				    struct ip_options *opt);
-- 
cgit v1.2.2


From 025be81e83043f20538dcced1e12c5f8d152fbdb Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 31 Mar 2006 02:27:06 -0800
Subject: [NET]: Allow skb headroom to be overridden

Previously we added NET_IP_ALIGN so an architecture can override the
padding done to align headers. The next step is to allow the skb
headroom to be overridden.

We currently always reserve 16 bytes to grow into, meaning all DMAs
start 16 bytes into a cacheline. On ppc64 we really want DMA writes to
start on a cacheline boundary, so we increase that headroom to one
cacheline.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-powerpc/system.h |  5 ++++-
 include/linux/skbuff.h       | 29 +++++++++++++++++++++++++----
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index 65f5a7b2646b..d075725bf444 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -365,8 +365,11 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
  * powers of 2 writes until it reaches sufficient alignment).
  *
  * Based on this we disable the IP header alignment in network drivers.
+ * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining
+ * cacheline alignment of buffers.
  */
-#define NET_IP_ALIGN   0
+#define NET_IP_ALIGN	0
+#define NET_SKB_PAD	L1_CACHE_BYTES
 #endif
 
 #define arch_align_stack(x) (x)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 613b9513f8b9..c4619a428d9b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -941,6 +941,25 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 #define NET_IP_ALIGN	2
 #endif
 
+/*
+ * The networking layer reserves some headroom in skb data (via
+ * dev_alloc_skb). This is used to avoid having to reallocate skb data when
+ * the header has to grow. In the default case, if the header has to grow
+ * 16 bytes or less we avoid the reallocation.
+ *
+ * Unfortunately this headroom changes the DMA alignment of the resulting
+ * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
+ * on some architectures. An architecture can override this value,
+ * perhaps setting it to a cacheline in size (since that will maintain
+ * cacheline alignment of the DMA). It must be a power of 2.
+ *
+ * Various parts of the networking layer expect at least 16 bytes of
+ * headroom, you should not reduce this.
+ */
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD	16
+#endif
+
 extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
 
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
@@ -1030,9 +1049,9 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 					      gfp_t gfp_mask)
 {
-	struct sk_buff *skb = alloc_skb(length + 16, gfp_mask);
+	struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
 	if (likely(skb))
-		skb_reserve(skb, 16);
+		skb_reserve(skb, NET_SKB_PAD);
 	return skb;
 }
 #else
@@ -1070,13 +1089,15 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length)
  */
 static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
 {
-	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+	int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) -
+			skb_headroom(skb);
 
 	if (delta < 0)
 		delta = 0;
 
 	if (delta || skb_cloned(skb))
-		return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
+		return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) &
+				~(NET_SKB_PAD-1), 0, GFP_ATOMIC);
 	return 0;
 }
 
-- 
cgit v1.2.2