From 6b1686a71e3158d3c5f125260effce171cc7852b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 28 Oct 2010 12:34:21 +0200
Subject: netfilter: nf_conntrack: allow nf_ct_alloc_hashtable() to get highmem
 pages

commit ea781f197d6a8 (use SLAB_DESTROY_BY_RCU and get rid of call_rcu())
did a mistake in __vmalloc() call in nf_ct_alloc_hashtable().

I forgot to add __GFP_HIGHMEM, so pages were taken from LOWMEM only.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1eacf8d9966..27a5ea6b6a0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1312,7 +1312,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
 	if (!hash) {
 		*vmalloced = 1;
 		printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
-		hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+		hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+				 PAGE_KERNEL);
 	}
 
 	if (hash && nulls)
-- 
cgit v1.2.2


From 64e46749224aa658d8fc0d37ea83ab20b1d7955d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 29 Oct 2010 16:28:07 +0200
Subject: netfilter: nf_nat: fix compiler warning with CONFIG_NF_CT_NETLINK=n

net/ipv4/netfilter/nf_nat_core.c:52: warning: 'nf_nat_proto_find_get' defined but not used
net/ipv4/netfilter/nf_nat_core.c:66: warning: 'nf_nat_proto_put' defined but not used

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_core.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 295c97431e4..c04787ce1a7 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,26 +47,6 @@ __nf_nat_proto_find(u_int8_t protonum)
 	return rcu_dereference(nf_nat_protos[protonum]);
 }
 
-static const struct nf_nat_protocol *
-nf_nat_proto_find_get(u_int8_t protonum)
-{
-	const struct nf_nat_protocol *p;
-
-	rcu_read_lock();
-	p = __nf_nat_proto_find(protonum);
-	if (!try_module_get(p->me))
-		p = &nf_nat_unknown_protocol;
-	rcu_read_unlock();
-
-	return p;
-}
-
-static void
-nf_nat_proto_put(const struct nf_nat_protocol *p)
-{
-	module_put(p->me);
-}
-
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
 hash_by_src(const struct net *net, u16 zone,
@@ -588,6 +568,26 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
+static const struct nf_nat_protocol *
+nf_nat_proto_find_get(u_int8_t protonum)
+{
+	const struct nf_nat_protocol *p;
+
+	rcu_read_lock();
+	p = __nf_nat_proto_find(protonum);
+	if (!try_module_get(p->me))
+		p = &nf_nat_unknown_protocol;
+	rcu_read_unlock();
+
+	return p;
+}
+
+static void
+nf_nat_proto_put(const struct nf_nat_protocol *p)
+{
+	module_put(p->me);
+}
+
 static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
 	[CTA_PROTONAT_PORT_MIN]	= { .type = NLA_U16 },
 	[CTA_PROTONAT_PORT_MAX]	= { .type = NLA_U16 },
-- 
cgit v1.2.2


From d817d29d0b37290d90b3a9e2a61162f1dbf2be4f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Oct 2010 19:59:40 +0200
Subject: netfilter: fix nf_conntrack_l4proto_register()

While doing __rcu annotations work on net/netfilter I found following
bug. On some arches, it is possible we publish a table while its content
is not yet committed to memory, and lockless reader can dereference wild
pointer.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index ed6d9295802..dc7bb74110d 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -292,6 +292,12 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 
 		for (i = 0; i < MAX_NF_CT_PROTO; i++)
 			proto_array[i] = &nf_conntrack_l4proto_generic;
+
+		/* Before making proto_array visible to lockless readers,
+		 * we must make sure its content is committed to memory.
+		 */
+		smp_wmb();
+
 		nf_ct_protos[l4proto->l3proto] = proto_array;
 	} else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
 					&nf_conntrack_l4proto_generic) {
-- 
cgit v1.2.2


From 870be39258cf84b65accf629f5f9e816b1b8512e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 30 Oct 2010 16:17:23 -0700
Subject: ipv6/udp: report SndbufErrors and RcvbufErrors

commit a18135eb9389 (Add UDP_MIB_{SND,RCV}BUFERRORS handling.)
forgot to make the necessary changes in net/ipv6/proc.c to report
additional counters in /proc/net/snmp6

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index d082eaeefa2..24b3558b8e6 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -126,6 +126,8 @@ static const struct snmp_mib snmp6_udp6_list[] = {
 	SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
 	SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+	SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+	SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -134,6 +136,8 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
 	SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
 	SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+	SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+	SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
 	SNMP_MIB_SENTINEL
 };
 
-- 
cgit v1.2.2


From 3285ee3bb2e158299ff19b947e41da735980d954 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 30 Oct 2010 16:21:28 -0700
Subject: ip_gre: fix fallback tunnel setup

Before making the fallback tunnel visible to lookups, we should make
sure it is completely setup, once ipgre_tunnel_init() had been called
and tstats per_cpu pointer allocated.

move rcu_assign_pointer(ign->tunnels_wc[0], tunnel); from
ipgre_fb_tunnel_init() to ipgre_init_net()

Based on a patch from Pavel Emelyanov

Reported-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 01087e035b7..70ff77f02ee 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1325,7 +1325,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
-	struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
 
 	tunnel->dev = dev;
 	strcpy(tunnel->parms.name, dev->name);
@@ -1336,7 +1335,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
 	tunnel->hlen		= sizeof(struct iphdr) + 4;
 
 	dev_hold(dev);
-	rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
 }
 
 
@@ -1383,10 +1381,12 @@ static int __net_init ipgre_init_net(struct net *net)
 	if ((err = register_netdev(ign->fb_tunnel_dev)))
 		goto err_reg_dev;
 
+	rcu_assign_pointer(ign->tunnels_wc[0],
+			   netdev_priv(ign->fb_tunnel_dev));
 	return 0;
 
 err_reg_dev:
-	free_netdev(ign->fb_tunnel_dev);
+	ipgre_dev_free(ign->fb_tunnel_dev);
 err_alloc_dev:
 	return err;
 }
-- 
cgit v1.2.2


From 1b1f693d7ad6d193862dcb1118540a030c5e761f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 28 Oct 2010 15:40:55 +0000
Subject: net: fix rds_iovec page count overflow

As reported by Thomas Pollet, the rdma page counting can overflow.  We
get the rdma sizes in 64-bit unsigned entities, but then limit it to
UINT_MAX bytes and shift them down to pages (so with a possible "+1" for
an unaligned address).

So each individual page count fits comfortably in an 'unsigned int' (not
even close to overflowing into signed), but as they are added up, they
might end up resulting in a signed return value. Which would be wrong.

Catch the case of tot_pages turning negative, and return the appropriate
error code.

Reported-by: Thomas Pollet <thomas.pollet@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/rdma.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 1a41debca1c..0df02c8a853 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -502,6 +502,13 @@ static int rds_rdma_pages(struct rds_rdma_args *args)
 			return -EINVAL;
 
 		tot_pages += nr_pages;
+
+		/*
+		 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
+		 * so tot_pages cannot overflow without first going negative.
+		 */
+		if ((int)tot_pages < 0)
+			return -EINVAL;
 	}
 
 	return tot_pages;
-- 
cgit v1.2.2


From a09f69c49b84b161ebd4dd09d3cce1b68297f1d3 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 28 Oct 2010 15:40:56 +0000
Subject: RDS: Return -EINVAL if rds_rdma_pages returns an error

rds_cmsg_rdma_args would still return success even if rds_rdma_pages
returned an error (or overflowed).

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/rdma.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 0df02c8a853..d0ba2ca3b71 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -554,8 +554,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 	}
 
 	nr_pages = rds_rdma_pages(args);
-	if (nr_pages < 0)
+	if (nr_pages < 0) {
+		ret = -EINVAL;
 		goto out;
+	}
 
 	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
 	if (!pages) {
-- 
cgit v1.2.2


From f4a3fc03c1d73753879fb655b8cd628b29f6706b Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 28 Oct 2010 15:40:57 +0000
Subject: RDS: Clean up error handling in rds_cmsg_rdma_args

We don't need to set ret = 0 at the end -- it's initialized to 0.

Also, don't increment s_send_rdma stat if we're exiting with an
error.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/rdma.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index d0ba2ca3b71..334acdd32ab 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -664,13 +664,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 	}
 	op->op_bytes = nr_bytes;
 
-	ret = 0;
 out:
 	kfree(pages);
 	if (ret)
 		rds_rdma_free_op(op);
-
-	rds_stats_inc(s_send_rdma);
+	else
+		rds_stats_inc(s_send_rdma);
 
 	return ret;
 }
-- 
cgit v1.2.2


From fc8162e3c034af743d8def435fda6396603d321f Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 28 Oct 2010 15:40:58 +0000
Subject: RDS: Copy rds_iovecs into kernel memory instead of rereading from
 userspace

Change rds_rdma_pages to take a passed-in rds_iovec array instead
of doing copy_from_user itself.

Change rds_cmsg_rdma_args to copy rds_iovec array once only. This
eliminates the possibility of userspace changing it after our
sanity checks.

Implement stack-based storage for small numbers of iovecs, based
on net/socket.c, to save an alloc in the extremely common case.

Although this patch reduces iovec copies in cmsg_rdma_args to 1,
we still do another one in rds_rdma_extra_size. Getting rid of
that one will be trickier, so it'll be a separate patch.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/rdma.c | 104 +++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 65 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 334acdd32ab..caa4d9866d9 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -479,13 +479,38 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
 
 
 /*
- * Count the number of pages needed to describe an incoming iovec.
+ * Count the number of pages needed to describe an incoming iovec array.
  */
-static int rds_rdma_pages(struct rds_rdma_args *args)
+static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs)
+{
+	int tot_pages = 0;
+	unsigned int nr_pages;
+	unsigned int i;
+
+	/* figure out the number of pages in the vector */
+	for (i = 0; i < nr_iovecs; i++) {
+		nr_pages = rds_pages_in_vec(&iov[i]);
+		if (nr_pages == 0)
+			return -EINVAL;
+
+		tot_pages += nr_pages;
+
+		/*
+		 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
+		 * so tot_pages cannot overflow without first going negative.
+		 */
+		if (tot_pages < 0)
+			return -EINVAL;
+	}
+
+	return tot_pages;
+}
+
+int rds_rdma_extra_size(struct rds_rdma_args *args)
 {
 	struct rds_iovec vec;
 	struct rds_iovec __user *local_vec;
-	unsigned int tot_pages = 0;
+	int tot_pages = 0;
 	unsigned int nr_pages;
 	unsigned int i;
 
@@ -507,16 +532,11 @@ static int rds_rdma_pages(struct rds_rdma_args *args)
 		 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
 		 * so tot_pages cannot overflow without first going negative.
 		 */
-		if ((int)tot_pages < 0)
+		if (tot_pages < 0)
 			return -EINVAL;
 	}
 
-	return tot_pages;
-}
-
-int rds_rdma_extra_size(struct rds_rdma_args *args)
-{
-	return rds_rdma_pages(args) * sizeof(struct scatterlist);
+	return tot_pages * sizeof(struct scatterlist);
 }
 
 /*
@@ -527,13 +547,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 			  struct cmsghdr *cmsg)
 {
 	struct rds_rdma_args *args;
-	struct rds_iovec vec;
 	struct rm_rdma_op *op = &rm->rdma;
 	int nr_pages;
 	unsigned int nr_bytes;
 	struct page **pages = NULL;
-	struct rds_iovec __user *local_vec;
-	unsigned int nr;
+	struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack;
+	int iov_size;
 	unsigned int i, j;
 	int ret = 0;
 
@@ -553,7 +572,22 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 		goto out;
 	}
 
-	nr_pages = rds_rdma_pages(args);
+	/* Check whether to allocate the iovec area */
+	iov_size = args->nr_local * sizeof(struct rds_iovec);
+	if (args->nr_local > UIO_FASTIOV) {
+		iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL);
+		if (!iovs) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	nr_pages = rds_rdma_pages(iovs, args->nr_local);
 	if (nr_pages < 0) {
 		ret = -EINVAL;
 		goto out;
@@ -606,50 +640,40 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 	       (unsigned long long)args->remote_vec.addr,
 	       op->op_rkey);
 
-	local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
-
 	for (i = 0; i < args->nr_local; i++) {
-		if (copy_from_user(&vec, &local_vec[i],
-				   sizeof(struct rds_iovec))) {
-			ret = -EFAULT;
-			goto out;
-		}
-
-		nr = rds_pages_in_vec(&vec);
-		if (nr == 0) {
-			ret = -EINVAL;
-			goto out;
-		}
+		struct rds_iovec *iov = &iovs[i];
+		/* don't need to check, rds_rdma_pages() verified nr will be +nonzero */
+		unsigned int nr = rds_pages_in_vec(iov);
 
-		rs->rs_user_addr = vec.addr;
-		rs->rs_user_bytes = vec.bytes;
+		rs->rs_user_addr = iov->addr;
+		rs->rs_user_bytes = iov->bytes;
 
 		/* If it's a WRITE operation, we want to pin the pages for reading.
 		 * If it's a READ operation, we need to pin the pages for writing.
 		 */
-		ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write);
+		ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
 		if (ret < 0)
 			goto out;
 
-		rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n",
-		       nr_bytes, nr, vec.bytes, vec.addr);
+		rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
+			 nr_bytes, nr, iov->bytes, iov->addr);
 
-		nr_bytes += vec.bytes;
+		nr_bytes += iov->bytes;
 
 		for (j = 0; j < nr; j++) {
-			unsigned int offset = vec.addr & ~PAGE_MASK;
+			unsigned int offset = iov->addr & ~PAGE_MASK;
 			struct scatterlist *sg;
 
 			sg = &op->op_sg[op->op_nents + j];
 			sg_set_page(sg, pages[j],
-					min_t(unsigned int, vec.bytes, PAGE_SIZE - offset),
+					min_t(unsigned int, iov->bytes, PAGE_SIZE - offset),
 					offset);
 
-			rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n",
-			       sg->offset, sg->length, vec.addr, vec.bytes);
+			rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n",
+			       sg->offset, sg->length, iov->addr, iov->bytes);
 
-			vec.addr += sg->length;
-			vec.bytes -= sg->length;
+			iov->addr += sg->length;
+			iov->bytes -= sg->length;
 		}
 
 		op->op_nents += nr;
@@ -665,6 +689,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 	op->op_bytes = nr_bytes;
 
 out:
+	if (iovs != iovstack)
+		sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size);
 	kfree(pages);
 	if (ret)
 		rds_rdma_free_op(op);
-- 
cgit v1.2.2


From d139ff0907dac9ef72fb2cf301e345bac3aec42f Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 28 Oct 2010 15:40:59 +0000
Subject: RDS: Let rds_message_alloc_sgs() return NULL

Even with the previous fix, we still are reading the iovecs once
to determine SGs needed, and then again later on. Preallocating
space for sg lists as part of rds_message seemed like a good idea
but it might be better to not do this. While working to redo that
code, this patch attempts to protect against userspace rewriting
the rds_iovec array between the first and second accesses.

The consequences of this would be either a too-small or too-large
sg list array. Too large is not an issue. This patch changes all
callers of message_alloc_sgs to handle running out of preallocated
sgs, and fail gracefully.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/message.c | 5 +++++
 net/rds/rdma.c    | 8 ++++++++
 net/rds/send.c    | 4 ++++
 3 files changed, 17 insertions(+)

(limited to 'net')

diff --git a/net/rds/message.c b/net/rds/message.c
index a84545dae37..848cff45183 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -224,6 +224,9 @@ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
 	WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
 	WARN_ON(!nents);
 
+	if (rm->m_used_sgs + nents > rm->m_total_sgs)
+		return NULL;
+
 	sg_ret = &sg_first[rm->m_used_sgs];
 	sg_init_table(sg_ret, nents);
 	rm->m_used_sgs += nents;
@@ -246,6 +249,8 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
 	rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
 	rm->data.op_nents = ceil(total_len, PAGE_SIZE);
 	rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
+	if (!rm->data.op_sg)
+		return ERR_PTR(-ENOMEM);
 
 	for (i = 0; i < rm->data.op_nents; ++i) {
 		sg_set_page(&rm->data.op_sg[i],
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index caa4d9866d9..8920f2a8332 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -607,6 +607,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 	op->op_recverr = rs->rs_recverr;
 	WARN_ON(!nr_pages);
 	op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
+	if (!op->op_sg) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	if (op->op_notify || op->op_recverr) {
 		/* We allocate an uninitialized notifier here, because
@@ -807,6 +811,10 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
 	rm->atomic.op_active = 1;
 	rm->atomic.op_recverr = rs->rs_recverr;
 	rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
+	if (!rm->atomic.op_sg) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
 	/* verify 8 byte-aligned */
 	if (args->local_addr & 0x7) {
diff --git a/net/rds/send.c b/net/rds/send.c
index 0bc9db17a87..35b9c2e9caf 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -973,6 +973,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	/* Attach data to the rm */
 	if (payload_len) {
 		rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
+		if (!rm->data.op_sg) {
+			ret = -ENOMEM;
+			goto out;
+		}
 		ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
 		if (ret)
 			goto out;
-- 
cgit v1.2.2


From 253eacc070b114c2ec1f81b067d2fed7305467b0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 30 Oct 2010 16:43:10 -0700
Subject: net: Truncate recvfrom and sendto length to INT_MAX.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index abf3e256152..2808b4db46e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1652,6 +1652,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
 	struct iovec iov;
 	int fput_needed;
 
+	if (len > INT_MAX)
+		len = INT_MAX;
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
@@ -1709,6 +1711,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
 	int err, err2;
 	int fput_needed;
 
+	if (size > INT_MAX)
+		size = INT_MAX;
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
-- 
cgit v1.2.2


From 5ec1cea057495b8f10bab0c1396a9d8e46b7b0a8 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Sun, 31 Oct 2010 09:37:38 -0700
Subject: text ematch: check for NULL pointer before destroying textsearch
 config

While validating the configuration em_ops is already set, thus the
individual destroy functions are called, but the ematch data has
not been allocated and associated with the ematch yet.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/em_text.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 76325325741..ea8f566e720 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -103,7 +103,8 @@ retry:
 
 static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
 {
-	textsearch_destroy(EM_TEXT_PRIV(m)->config);
+	if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
+		textsearch_destroy(EM_TEXT_PRIV(m)->config);
 }
 
 static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
-- 
cgit v1.2.2


From 6f9b901823aafd14a84ae27f61ff28bafed01260 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sun, 31 Oct 2010 07:26:03 +0000
Subject: l2tp: kzalloc with swapped params in l2tp_dfs_seq_open

  'sparse' spotted that the parameters to kzalloc in l2tp_dfs_seq_open
were swapped.

Tested on current git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
at 1792f17b7210280a3d7ff29da9614ba779cfcedb  build, boots and I can see that directory,
but there again I could see /sys/kernel/debug/l2tp with it swapped; I don't have
any l2tp in use.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 104ec3b283d..b8dbae82fab 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -249,7 +249,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
 	struct seq_file *seq;
 	int rc = -ENOMEM;
 
-	pd = kzalloc(GFP_KERNEL, sizeof(*pd));
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
 	if (pd == NULL)
 		goto out;
 
-- 
cgit v1.2.2


From df32cc193ad88f7b1326b90af799c927b27f7654 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 1 Nov 2010 12:55:52 -0700
Subject: net: check queue_index from sock is valid for device

In dev_pick_tx recompute the queue index if the value stored in the
socket is greater than or equal to the number of real queues for the
device.  The saved index in the sock structure is not guaranteed to
be appropriate for the egress device (this could happen on a route
change or in presence of tunnelling).  The result of the queue index
being bad would be to return a bogus queue (crash could prersumably
follow).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 35dfb831848..0dd54a69dac 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2131,7 +2131,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	} else {
 		struct sock *sk = skb->sk;
 		queue_index = sk_tx_queue_get(sk);
-		if (queue_index < 0) {
+		if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
 
 			queue_index = 0;
 			if (dev->real_num_tx_queues > 1)
-- 
cgit v1.2.2


From df9f86faf3ee610527ed02031fe7dd3c8b752e44 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 1 Nov 2010 15:49:23 -0700
Subject: ceph: fix small seq message skipping

If the client gets out of sync with the server message sequence number, we
normally skip low seq messages (ones we already received).  The skip code
was also incrementing the expected seq, such that all subsequent messages
also appeared old and got skipped, and an eventual timeout on the osd
connection.  This resulted in some lagging requests and console messages
like

[233480.882885] ceph: skipping osd22 10.138.138.13:6804 seq 2016, expected 2017
[233480.882919] ceph: skipping osd22 10.138.138.13:6804 seq 2017, expected 2018
[233480.882963] ceph: skipping osd22 10.138.138.13:6804 seq 2018, expected 2019
[233480.883488] ceph: skipping osd22 10.138.138.13:6804 seq 2019, expected 2020
[233485.219558] ceph: skipping osd22 10.138.138.13:6804 seq 2020, expected 2021
[233485.906595] ceph: skipping osd22 10.138.138.13:6804 seq 2021, expected 2022
[233490.379536] ceph: skipping osd22 10.138.138.13:6804 seq 2022, expected 2023
[233495.523260] ceph: skipping osd22 10.138.138.13:6804 seq 2023, expected 2024
[233495.923194] ceph: skipping osd22 10.138.138.13:6804 seq 2024, expected 2025
[233500.534614] ceph:  tid 6023602 timed out on osd22, will reset osd

Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/messenger.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 0e8157ee5d4..d379abf873b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1532,14 +1532,13 @@ static int read_partial_message(struct ceph_connection *con)
 	/* verify seq# */
 	seq = le64_to_cpu(con->in_hdr.seq);
 	if ((s64)seq - (s64)con->in_seq < 1) {
-		pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
+		pr_info("skipping %s%lld %s seq %lld expected %lld\n",
 			ENTITY_NAME(con->peer_name),
 			ceph_pr_addr(&con->peer_addr.in_addr),
 			seq, con->in_seq + 1);
 		con->in_base_pos = -front_len - middle_len - data_len -
 			sizeof(m->footer);
 		con->in_tag = CEPH_MSGR_TAG_READY;
-		con->in_seq++;
 		return 0;
 	} else if ((s64)seq - (s64)con->in_seq > 1) {
 		pr_err("read_partial_message bad seq %lld expected %lld\n",
-- 
cgit v1.2.2


From 1a8b7a67224eb0c9dbd883b9bfc4938278bad370 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segooon@gmail.com>
Date: Wed, 3 Nov 2010 08:44:12 +0100
Subject: ipv4: netfilter: arp_tables: fix information leak to userland

Structure arpt_getinfo is copied to userland with the field "name"
that has the last elements unitialized.  It leads to leaking of
contents of kernel stack memory.

Signed-off-by: Vasiliy Kulikov <segooon@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3cad2591ace..3fac340a28d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -927,6 +927,7 @@ static int get_info(struct net *net, void __user *user,
 			private = &tmp;
 		}
 #endif
+		memset(&info, 0, sizeof(info));
 		info.valid_hooks = t->valid_hooks;
 		memcpy(info.hook_entry, private->hook_entry,
 		       sizeof(info.hook_entry));
-- 
cgit v1.2.2


From b5f15ac4f89f84853544c934fc7a744289e95e34 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segooon@gmail.com>
Date: Wed, 3 Nov 2010 08:45:06 +0100
Subject: ipv4: netfilter: ip_tables: fix information leak to userland

Structure ipt_getinfo is copied to userland with the field "name"
that has the last elements unitialized.  It leads to leaking of
contents of kernel stack memory.

Signed-off-by: Vasiliy Kulikov <segooon@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d31b007a6d8..a846d633b3b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1124,6 +1124,7 @@ static int get_info(struct net *net, void __user *user,
 			private = &tmp;
 		}
 #endif
+		memset(&info, 0, sizeof(info));
 		info.valid_hooks = t->valid_hooks;
 		memcpy(info.hook_entry, private->hook_entry,
 		       sizeof(info.hook_entry));
-- 
cgit v1.2.2


From f2527ec436fd675f08a8e7434f6e940688cb96d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Carvalho=20de=20Matos?=
 <andre.carvalho.matos@stericsson.com>
Date: Mon, 1 Nov 2010 11:52:47 +0000
Subject: caif: Bugfix for socket priority, bindtodev and dbg channel.

Changes:
o Bugfix: SO_PRIORITY for SOL_SOCKET could not be handled
  in caif's setsockopt,  using the struct sock attribute priority instead.

o Bugfix: SO_BINDTODEVICE for SOL_SOCKET could not be handled
  in caif's setsockopt,  using the struct sock attribute ifindex instead.

o Wrong assert statement for RFM layer segmentation.

o CAIF Debug channels was not working over SPI, caif_payload_info
  containing padding info must be initialized.

o Check on pointer before dereferencing when unregister dev in caif_dev.c

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_config_util.c | 13 ++++++++++---
 net/caif/caif_dev.c         |  2 ++
 net/caif/caif_socket.c      | 45 +++++++++++++++------------------------------
 net/caif/cfcnfg.c           | 17 +++++++----------
 net/caif/cfdbgl.c           | 14 ++++++++++++++
 net/caif/cfrfml.c           |  2 +-
 6 files changed, 49 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 76ae68303d3..d522d8c1703 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -16,11 +16,18 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
 {
 	struct dev_info *dev_info;
 	enum cfcnfg_phy_preference pref;
+	int res;
+
 	memset(l, 0, sizeof(*l));
-	l->priority = s->priority;
+	/* In caif protocol low value is high priority */
+	l->priority = CAIF_PRIO_MAX - s->priority + 1;
 
-	if (s->link_name[0] != '\0')
-		l->phyid = cfcnfg_get_named(cnfg, s->link_name);
+	if (s->ifindex != 0){
+		res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex);
+		if (res < 0)
+			return res;
+		l->phyid = res;
+	}
 	else {
 		switch (s->link_selector) {
 		case CAIF_LINK_HIGH_BANDW:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index b99369a055d..a42a408306e 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -307,6 +307,8 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 
 	case NETDEV_UNREGISTER:
 		caifd = caif_get(dev);
+		if (caifd == NULL)
+			break;
 		netdev_info(dev, "unregister\n");
 		atomic_set(&caifd->state, what);
 		caif_device_destroy(dev);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 2eca2dd0000..1bf0cf50379 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -716,8 +716,7 @@ static int setsockopt(struct socket *sock,
 {
 	struct sock *sk = sock->sk;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	int prio, linksel;
-	struct ifreq ifreq;
+	int linksel;
 
 	if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
 		return -ENOPROTOOPT;
@@ -735,33 +734,6 @@ static int setsockopt(struct socket *sock,
 		release_sock(&cf_sk->sk);
 		return 0;
 
-	case SO_PRIORITY:
-		if (lvl != SOL_SOCKET)
-			goto bad_sol;
-		if (ol < sizeof(int))
-			return -EINVAL;
-		if (copy_from_user(&prio, ov, sizeof(int)))
-			return -EINVAL;
-		lock_sock(&(cf_sk->sk));
-		cf_sk->conn_req.priority = prio;
-		release_sock(&cf_sk->sk);
-		return 0;
-
-	case SO_BINDTODEVICE:
-		if (lvl != SOL_SOCKET)
-			goto bad_sol;
-		if (ol < sizeof(struct ifreq))
-			return -EINVAL;
-		if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
-			return -EFAULT;
-		lock_sock(&(cf_sk->sk));
-		strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
-			sizeof(cf_sk->conn_req.link_name));
-		cf_sk->conn_req.link_name
-			[sizeof(cf_sk->conn_req.link_name)-1] = 0;
-		release_sock(&cf_sk->sk);
-		return 0;
-
 	case CAIFSO_REQ_PARAM:
 		if (lvl != SOL_CAIF)
 			goto bad_sol;
@@ -880,6 +852,18 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	sock->state = SS_CONNECTING;
 	sk->sk_state = CAIF_CONNECTING;
 
+	/* Check priority value comming from socket */
+	/* if priority value is out of range it will be ajusted */
+	if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX)
+		cf_sk->conn_req.priority = CAIF_PRIO_MAX;
+	else if (cf_sk->sk.sk_priority < CAIF_PRIO_MIN)
+		cf_sk->conn_req.priority = CAIF_PRIO_MIN;
+	else
+		cf_sk->conn_req.priority = cf_sk->sk.sk_priority;
+
+	/*ifindex = id of the interface.*/
+	cf_sk->conn_req.ifindex = cf_sk->sk.sk_bound_dev_if;
+
 	dbfs_atomic_inc(&cnt.num_connect_req);
 	cf_sk->layer.receive = caif_sktrecv_cb;
 	err = caif_connect_client(&cf_sk->conn_req,
@@ -905,6 +889,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	cf_sk->maxframe = mtu - (headroom + tailroom);
 	if (cf_sk->maxframe < 1) {
 		pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
+		err = -ENODEV;
 		goto out;
 	}
 
@@ -1142,7 +1127,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
 	set_rx_flow_on(cf_sk);
 
 	/* Set default options on configuration */
-	cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
+	cf_sk->sk.sk_priority= CAIF_PRIO_NORMAL;
 	cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
 	cf_sk->conn_req.protocol = protocol;
 	/* Increase the number of sockets created. */
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 41adafd1891..21ede141018 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -173,18 +173,15 @@ static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
 	return NULL;
 }
 
-int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
+
+int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi)
 {
 	int i;
-
-	/* Try to match with specified name */
-	for (i = 0; i < MAX_PHY_LAYERS; i++) {
-		if (cnfg->phy_layers[i].frm_layer != NULL
-		    && strcmp(cnfg->phy_layers[i].phy_layer->name,
-			      name) == 0)
-			return cnfg->phy_layers[i].frm_layer->id;
-	}
-	return 0;
+	for (i = 0; i < MAX_PHY_LAYERS; i++)
+		if (cnfg->phy_layers[i].frm_layer != NULL &&
+				cnfg->phy_layers[i].ifindex == ifi)
+			return i;
+	return -ENODEV;
 }
 
 int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 496fda9ac66..11a2af4c162 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -12,6 +12,8 @@
 #include <net/caif/cfsrvl.h>
 #include <net/caif/cfpkt.h>
 
+#define container_obj(layr) ((struct cfsrvl *) layr)
+
 static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
 
@@ -38,5 +40,17 @@ static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
 
 static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
 {
+	struct cfsrvl *service = container_obj(layr);
+	struct caif_payload_info *info;
+	int ret;
+
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+
+	/* Add info for MUX-layer to route the packet out */
+	info = cfpkt_info(pkt);
+	info->channel_id = service->layer.id;
+	info->dev_info = &service->dev_info;
+
 	return layr->dn->transmit(layr->dn, pkt);
 }
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index bde8481e8d2..e2fb5fa7579 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -193,7 +193,7 @@ out:
 
 static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
 {
-	caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size);
+	caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size);
 
 	/* Add info for MUX-layer to route the packet out. */
 	cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
-- 
cgit v1.2.2


From 47d1ff176553fec3cb17854a7ca85036d3b0c4e7 Mon Sep 17 00:00:00 2001
From: "sjur.brandeland@stericsson.com" <sjur.brandeland@stericsson.com>
Date: Wed, 3 Nov 2010 10:19:25 +0000
Subject: caif: Remove noisy printout when disconnecting caif socket
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfctrl.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 08f267a109a..3cd8f978e30 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -361,11 +361,10 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
 	struct cfctrl_request_info *p, *tmp;
 	struct cfctrl *ctrl = container_obj(layr);
 	spin_lock(&ctrl->info_list_lock);
-	pr_warn("enter\n");
 
 	list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
 		if (p->client_layer == adap_layer) {
-			pr_warn("cancel req :%d\n", p->sequence_no);
+			pr_debug("cancel req :%d\n", p->sequence_no);
 			list_del(&p->list);
 			kfree(p);
 		}
-- 
cgit v1.2.2


From 58c490babd4b425310363cbd1f406d7e508f77a5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 2 Nov 2010 01:52:05 +0000
Subject: rds: Lost locking in loop connection freeing

The conn is removed from list in there and this requires
proper lock protection.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/loop.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/rds/loop.c b/net/rds/loop.c
index c390156b426..aeec1d483b1 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -134,8 +134,12 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 static void rds_loop_conn_free(void *arg)
 {
 	struct rds_loop_connection *lc = arg;
+	unsigned long flags;
+
 	rdsdebug("lc %p\n", lc);
+	spin_lock_irqsave(&loop_conns_lock, flags);
 	list_del(&lc->loop_node);
+	spin_unlock_irqrestore(&loop_conns_lock, flags);
 	kfree(lc);
 }
 
-- 
cgit v1.2.2


From 8200a59f24aeca379660f80658a8c0c343ca5c31 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 2 Nov 2010 01:54:01 +0000
Subject: rds: Remove kfreed tcp conn from list

All the rds_tcp_connection objects are stored list, but when
being freed it should be removed from there.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 08a8c6cf2d1..8e0a32001c9 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -221,7 +221,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 static void rds_tcp_conn_free(void *arg)
 {
 	struct rds_tcp_connection *tc = arg;
+	unsigned long flags;
 	rdsdebug("freeing tc %p\n", tc);
+
+	spin_lock_irqsave(&rds_tcp_conn_lock, flags);
+	list_del(&tc->t_tcp_node);
+	spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
+
 	kmem_cache_free(rds_tcp_conn_slab, tc);
 }
 
-- 
cgit v1.2.2


From 41bb78b4b9adb21cf2c395b6b880aaae99c788b7 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Tue, 2 Nov 2010 16:11:05 +0000
Subject: net dst: fix percpu_counter list corruption and poison overwritten

There're some percpu_counter list corruption and poison overwritten warnings
in recent kernel, which is resulted by fc66f95c.

commit fc66f95c switches to use percpu_counter, in ip6_route_net_init, kernel
init the percpu_counter for dst entries, but, the percpu_counter is never destroyed
in ip6_route_net_exit. So if the related data is freed by kernel, the freed percpu_counter
is still on the list, then if we insert/remove other percpu_counter, list corruption
resulted. Also, if the insert/remove option modifies the ->prev,->next pointer of
the freed value, the poison overwritten is resulted then.

With the following patch, the percpu_counter list corruption and poison overwritten
warnings disappeared.

Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 25661f968f3..fc328339be9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2741,6 +2741,7 @@ static void __net_exit ip6_route_net_exit(struct net *net)
 	kfree(net->ipv6.ip6_prohibit_entry);
 	kfree(net->ipv6.ip6_blk_hole_entry);
 #endif
+	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
 }
 
 static struct pernet_operations ip6_route_net_ops = {
@@ -2832,5 +2833,6 @@ void ip6_route_cleanup(void)
 	xfrm6_fini();
 	fib6_gc_cleanup();
 	unregister_pernet_subsys(&ip6_route_net_ops);
+	dst_entries_destroy(&ip6_dst_blackhole_ops);
 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
 }
-- 
cgit v1.2.2


From a6331d6f9a4298173b413cf99a40cc86a9d92c37 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Wed, 3 Nov 2010 12:54:53 +0000
Subject: memory corruption in X.25 facilities parsing

Signed-of-by: Andrew Hendry <andrew.hendry@gmail.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_facilities.c | 8 ++++----
 net/x25/x25_in.c         | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 771bab00754..3a8c4c419cd 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -134,15 +134,15 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 		case X25_FAC_CLASS_D:
 			switch (*p) {
 			case X25_FAC_CALLING_AE:
-				if (p[1] > X25_MAX_DTE_FACIL_LEN)
-					break;
+				if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
+					return 0;
 				dte_facs->calling_len = p[2];
 				memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
 				*vc_fac_mask |= X25_MASK_CALLING_AE;
 				break;
 			case X25_FAC_CALLED_AE:
-				if (p[1] > X25_MAX_DTE_FACIL_LEN)
-					break;
+				if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
+					return 0;
 				dte_facs->called_len = p[2];
 				memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
 				*vc_fac_mask |= X25_MASK_CALLED_AE;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 63178961efa..f729f022be6 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -119,6 +119,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 						&x25->vc_facil_mask);
 			if (len > 0)
 				skb_pull(skb, len);
+			else
+				return -1;
 			/*
 			 *	Copy any Call User Data.
 			 */
-- 
cgit v1.2.2


From c00b2c9e79466d61979cd21af526cc6d5d0ee04f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 3 Nov 2010 13:31:05 +0000
Subject: cls_cgroup: Fix crash on module unload

Somewhere along the lines net_cls_subsys_id became a macro when
cls_cgroup is built as a module.  Not only did it make cls_cgroup
completely useless, it also causes it to crash on module unload.

This patch fixes this by removing that macro.

Thanks to Eric Dumazet for diagnosing this problem.

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_cgroup.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 37dff78e9cb..d49c40fb7e0 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,8 +34,6 @@ struct cgroup_subsys net_cls_subsys = {
 	.populate	= cgrp_populate,
 #ifdef CONFIG_NET_CLS_CGROUP
 	.subsys_id	= net_cls_subsys_id,
-#else
-#define net_cls_subsys_id net_cls_subsys.subsys_id
 #endif
 	.module		= THIS_MODULE,
 };
-- 
cgit v1.2.2


From cccbe5ef85284621d19e5b2b1c61cc0506bc9dee Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 3 Nov 2010 18:55:39 -0700
Subject: netfilter: ip6_tables: fix information leak to userspace

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6_tables.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 51df035897e..455582384ec 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1137,6 +1137,7 @@ static int get_info(struct net *net, void __user *user,
 			private = &tmp;
 		}
 #endif
+		memset(&info, 0, sizeof(info));
 		info.valid_hooks = t->valid_hooks;
 		memcpy(info.hook_entry, private->hook_entry,
 		       sizeof(info.hook_entry));
-- 
cgit v1.2.2


From 1f1b9c9990205759aae31b7734b0ede41a867f32 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 4 Nov 2010 01:21:39 +0000
Subject: fib: fib_result_assign() should not change fib refcounts

After commit ebc0ffae5 (RCU conversion of fib_lookup()),
fib_result_assign()  should not change fib refcounts anymore.

Thanks to Michael who did the bisection and bug report.

Reported-by: Michael Ellerman <michael@ellerman.id.au>
Tested-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_lookup.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index a29edf2219c..c079cc0ec65 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -47,11 +47,8 @@ extern int fib_detect_death(struct fib_info *fi, int order,
 static inline void fib_result_assign(struct fib_result *res,
 				     struct fib_info *fi)
 {
-	if (res->fi != NULL)
-		fib_info_put(res->fi);
+	/* we used to play games with refcounts, but we now use RCU */
 	res->fi = fi;
-	if (fi != NULL)
-		atomic_inc(&fi->fib_clntref);
 }
 
 #endif /* _FIB_LOOKUP_H */
-- 
cgit v1.2.2


From 22e76c849d505d87c5ecf3d3e6742a65f0ff4860 Mon Sep 17 00:00:00 2001
From: Nelson Elhage <nelhage@ksplice.com>
Date: Wed, 3 Nov 2010 16:35:41 +0000
Subject: inet_diag: Make sure we actually run the same bytecode we audited.

We were using nlmsg_find_attr() to look up the bytecode by attribute when
auditing, but then just using the first attribute when actually running
bytecode. So, if we received a message with two attribute elements, where only
the second had type INET_DIAG_REQ_BYTECODE, we would validate and run different
bytecode strings.

Fix this by consistently using nlmsg_find_attr everywhere.

Signed-off-by: Nelson Elhage <nelhage@ksplice.com>
Signed-off-by: Thomas Graf <tgraf@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ba804266584..2ada17129fc 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -490,9 +490,11 @@ static int inet_csk_diag_dump(struct sock *sk,
 {
 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
 
-	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+	if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
 		struct inet_diag_entry entry;
-		struct rtattr *bc = (struct rtattr *)(r + 1);
+		const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+							  sizeof(*r),
+							  INET_DIAG_REQ_BYTECODE);
 		struct inet_sock *inet = inet_sk(sk);
 
 		entry.family = sk->sk_family;
@@ -512,7 +514,7 @@ static int inet_csk_diag_dump(struct sock *sk,
 		entry.dport = ntohs(inet->inet_dport);
 		entry.userlocks = sk->sk_userlocks;
 
-		if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+		if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
 			return 0;
 	}
 
@@ -527,9 +529,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
 {
 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
 
-	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+	if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
 		struct inet_diag_entry entry;
-		struct rtattr *bc = (struct rtattr *)(r + 1);
+		const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+							  sizeof(*r),
+							  INET_DIAG_REQ_BYTECODE);
 
 		entry.family = tw->tw_family;
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -548,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
 		entry.dport = ntohs(tw->tw_dport);
 		entry.userlocks = 0;
 
-		if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+		if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
 			return 0;
 	}
 
@@ -618,7 +622,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt;
-	struct rtattr *bc = NULL;
+	const struct nlattr *bc = NULL;
 	struct inet_sock *inet = inet_sk(sk);
 	int j, s_j;
 	int reqnum, s_reqnum;
@@ -638,8 +642,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 	if (!lopt || !lopt->qlen)
 		goto out;
 
-	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
-		bc = (struct rtattr *)(r + 1);
+	if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
+		bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
+				     INET_DIAG_REQ_BYTECODE);
 		entry.sport = inet->inet_num;
 		entry.userlocks = sk->sk_userlocks;
 	}
@@ -672,8 +677,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 					&ireq->rmt_addr;
 				entry.dport = ntohs(ireq->rmt_port);
 
-				if (!inet_diag_bc_run(RTA_DATA(bc),
-						    RTA_PAYLOAD(bc), &entry))
+				if (!inet_diag_bc_run(nla_data(bc),
+						      nla_len(bc), &entry))
 					continue;
 			}
 
-- 
cgit v1.2.2


From 86c2c0a8a4965ae5cbc0ff97ed39a4472e8e9b23 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 6 Nov 2010 20:11:38 +0000
Subject: NET: pktgen - fix compile warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This should fix the following warning:

net/core/pktgen.c: In function ‘pktgen_if_write’:
net/core/pktgen.c:890: warning: comparison of distinct pointer types lacks a cast

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Reviewed-by: Nelson Elhage <nelhage@ksplice.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fbce4b05a53..1992cd050e2 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -887,7 +887,7 @@ static ssize_t pktgen_if_write(struct file *file,
 	i += len;
 
 	if (debug) {
-		size_t copy = min(count, 1023);
+		size_t copy = min_t(size_t, count, 1023);
 		char tb[copy + 1];
 		if (copy_from_user(tb, user_buffer, copy))
 			return -EFAULT;
-- 
cgit v1.2.2


From 4c46ee52589a4edd67447214eb489b10fed5c53a Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Thu, 4 Nov 2010 11:47:04 +0000
Subject: classifier: report statistics for basic classifier

The basic classifier keeps statistics but does not report it to user space.
This showed up when using basic classifier (with police) as a default catch
all on ingress; no statistics were reported.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_basic.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index efd4f95fd05..f23d9155b1e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -268,6 +268,10 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
+		goto nla_put_failure;
+
 	return skb->len;
 
 nla_put_failure:
-- 
cgit v1.2.2


From f46421416fb6b91513fb687d6503142cd99034a5 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Fri, 5 Nov 2010 01:56:34 +0000
Subject: ipv6: fix overlap check for fragments

The type of FRAG6_CB(prev)->offset is int, skb->len is *unsigned* int,
and offset is int.

Without this patch, type conversion occurred to this expression, when
(FRAG6_CB(prev)->offset + prev->len) is less than offset.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index c7ba3149633..0f276645375 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -349,7 +349,7 @@ found:
 
 	/* Check for overlap with preceding fragment. */
 	if (prev &&
-	    (FRAG6_CB(prev)->offset + prev->len) - offset > 0)
+	    (FRAG6_CB(prev)->offset + prev->len) > offset)
 		goto discard_fq;
 
 	/* Look for overlap with succeeding segment. */
-- 
cgit v1.2.2


From eb589063ed482f5592b1378e4136d6998419af6e Mon Sep 17 00:00:00 2001
From: Junchang Wang <junchangwang@gmail.com>
Date: Sun, 7 Nov 2010 23:19:43 +0000
Subject: pktgen: correct uninitialized queue_map

This fix a bug reported by backyes.
Right the first time pktgen's using queue_map that's not been initialized
by set_cur_queue_map(pkt_dev);

Signed-off-by: Junchang Wang <junchangwang@gmail.com>
Signed-off-by: Backyes <backyes@mail.ustc.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1992cd050e2..33bc3823ac6 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2612,8 +2612,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
 	 */
-	queue_map = pkt_dev->cur_queue_map;
 	mod_cur_headers(pkt_dev);
+	queue_map = pkt_dev->cur_queue_map;
 
 	datalen = (odev->hard_header_len + 16) & ~0xf;
 
@@ -2976,8 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
 	 */
-	queue_map = pkt_dev->cur_queue_map;
 	mod_cur_headers(pkt_dev);
+	queue_map = pkt_dev->cur_queue_map;
 
 	skb = __netdev_alloc_skb(odev,
 				 pkt_dev->cur_pkt_size + 64
-- 
cgit v1.2.2


From aa58163a76a3aef33c7220931543d45d0fe43753 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 8 Nov 2010 06:20:50 +0000
Subject: rds: Fix rds message leak in rds_message_map_pages

The sgs allocation error path leaks the allocated message.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/message.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/message.c b/net/rds/message.c
index 848cff45183..1fd3d29023d 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -249,8 +249,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
 	rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
 	rm->data.op_nents = ceil(total_len, PAGE_SIZE);
 	rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
-	if (!rm->data.op_sg)
+	if (!rm->data.op_sg) {
+		rds_message_put(rm);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	for (i = 0; i < rm->data.op_nents; ++i) {
 		sg_set_page(&rm->data.op_sg[i],
-- 
cgit v1.2.2


From 3cc25e510dfc36dc62ee0aa87344b36ed7c1742a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 31 Oct 2010 15:31:54 +0100
Subject: cfg80211: fix a crash in dev lookup on dump commands

IS_ERR and PTR_ERR were called with the wrong pointer, leading to a
crash when cfg80211_get_dev_from_ifindex fails.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c506241f863..4e78e3f2679 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -224,8 +224,8 @@ static int nl80211_prepare_netdev_dump(struct sk_buff *skb,
 	}
 
 	*rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
-	if (IS_ERR(dev)) {
-		err = PTR_ERR(dev);
+	if (IS_ERR(*rdev)) {
+		err = PTR_ERR(*rdev);
 		goto out_rtnl;
 	}
 
-- 
cgit v1.2.2


From 352ffad646c0e0c5cf9ae8cea99710ee0d66ee27 Mon Sep 17 00:00:00 2001
From: Brian Cavagnolo <brian@cozybit.com>
Date: Thu, 4 Nov 2010 16:59:28 -0700
Subject: mac80211: unset SDATA_STATE_OFFCHANNEL when cancelling a scan

For client STA interfaces, ieee80211_do_stop unsets the relevant
interface's SDATA_STATE_RUNNING state bit prior to cancelling an
interrupted scan.  When ieee80211_offchannel_return is invoked as
part of cancelling the scan, it doesn't bother unsetting the
SDATA_STATE_OFFCHANNEL bit because it sees that the interface is
down.  Normally this doesn't matter because when the client STA
interface is brought back up, it will probably issue a scan.  But
in some cases (e.g., the user changes the interface type while it
is down), the SDATA_STATE_OFFCHANNEL bit will remain set.  This
prevents the interface queues from being started.  So we
cancel the scan before unsetting the SDATA_STATE_RUNNING bit.

Signed-off-by: Brian Cavagnolo <brian@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f9163b12c7f..7aa85591dbe 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -391,6 +391,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	u32 hw_reconf_flags = 0;
 	int i;
 
+	if (local->scan_sdata == sdata)
+		ieee80211_scan_cancel(local);
+
 	clear_bit(SDATA_STATE_RUNNING, &sdata->state);
 
 	/*
@@ -523,9 +526,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		synchronize_rcu();
 		skb_queue_purge(&sdata->skb_queue);
 
-		if (local->scan_sdata == sdata)
-			ieee80211_scan_cancel(local);
-
 		/*
 		 * Disable beaconing here for mesh only, AP and IBSS
 		 * are already taken care of.
-- 
cgit v1.2.2


From 96c99b473a8531188e2f6106c6ef0e33bb4500f2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 13 Oct 2010 18:16:52 -0700
Subject: Bluetooth: fix hidp kconfig dependency warning

Fix kconfig dependency warning to satisfy dependencies:

warning: (BT_HIDP && NET && BT && BT_L2CAP && INPUT || USB_HID && HID_SUPPORT && USB && INPUT) selects HID which has unmet direct dependencies (HID_SUPPORT && INPUT)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hidp/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig
index 98fdfa1fbdd..86a91543172 100644
--- a/net/bluetooth/hidp/Kconfig
+++ b/net/bluetooth/hidp/Kconfig
@@ -1,6 +1,6 @@
 config BT_HIDP
 	tristate "HIDP protocol support"
-	depends on BT && BT_L2CAP && INPUT
+	depends on BT && BT_L2CAP && INPUT && HID_SUPPORT
 	select HID
 	help
 	  HIDP (Human Interface Device Protocol) is a transport layer
-- 
cgit v1.2.2


From bdb7524a75e4716535a29abb314a82619301e068 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Fri, 15 Oct 2010 10:46:09 +0300
Subject: Bluetooth: Fix non-SSP auth request for HIGH security level sockets

When initiating dedicated bonding a L2CAP raw socket with HIGH security
level is used. The kernel is supposed to trigger the authentication
request in this case but this doesn't happen currently for non-SSP
(pre-2.1) devices. The reason is that the authentication request happens
in the remote extended features callback which never gets called for
non-SSP devices. This patch fixes the issue by requesting also
authentiation in the (normal) remote features callback in the case of
non-SSP devices.

This rule is applied only for HIGH security level which might at first
seem unintuitive since on the server socket side MEDIUM is already
enough for authentication. However, for the clients we really want to
prefer the server side to decide the authentication requrement in most
cases, and since most client sockets use MEDIUM it's better to be
avoided on the kernel side for these sockets. The important socket to
request it for is the dedicated bonding one and that socket uses HIGH
security level.

The patch is based on the initial investigation and patch proposal from
Andrei Emeltchenko <endrei.emeltchenko@nokia.com>.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_event.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index bfef5bae0b3..84093b0000b 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1175,6 +1175,12 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff
 				hci_send_cmd(hdev,
 					HCI_OP_READ_REMOTE_EXT_FEATURES,
 							sizeof(cp), &cp);
+			} else if (!ev->status && conn->out &&
+					conn->sec_level == BT_SECURITY_HIGH) {
+				struct hci_cp_auth_requested cp;
+				cp.handle = ev->handle;
+				hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
+							sizeof(cp), &cp);
 			} else {
 				conn->state = BT_CONNECTED;
 				hci_proto_connect_cfm(conn, ev->status);
-- 
cgit v1.2.2


From bfaaeb3ed5533a2dd38e3aa9ea43efd619690aed Mon Sep 17 00:00:00 2001
From: steven miao <realmz6@gmail.com>
Date: Sat, 16 Oct 2010 18:29:47 -0400
Subject: Bluetooth: fix unaligned access to l2cap conf data

In function l2cap_get_conf_opt() and l2cap_add_conf_opt() the address of
opt->val sometimes is not at the edge of 2-bytes/4-bytes, so 2-bytes/4 bytes
access will cause data misalignment exeception.  Use get_unaligned_le16/32
and put_unaligned_le16/32 function to avoid data misalignment execption.

Signed-off-by: steven miao <realmz6@gmail.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index daa7a988d9a..b3fb02ab229 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2421,11 +2421,11 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned
 		break;
 
 	case 2:
-		*val = __le16_to_cpu(*((__le16 *) opt->val));
+		*val = get_unaligned_le16(opt->val);
 		break;
 
 	case 4:
-		*val = __le32_to_cpu(*((__le32 *) opt->val));
+		*val = get_unaligned_le32(opt->val);
 		break;
 
 	default:
@@ -2452,11 +2452,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
 		break;
 
 	case 2:
-		*((__le16 *) opt->val) = cpu_to_le16(val);
+		put_unaligned_le16(cpu_to_le16(val), opt->val);
 		break;
 
 	case 4:
-		*((__le32 *) opt->val) = cpu_to_le32(val);
+		put_unaligned_le32(cpu_to_le32(val), opt->val);
 		break;
 
 	default:
-- 
cgit v1.2.2


From 4f8b691c9fb02e72359e71592098c1de3b8ec712 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 18 Oct 2010 14:25:53 -0200
Subject: Bluetooth: fix endianness conversion in L2CAP

Last commit added a wrong endianness conversion. Fixing that.

Reported-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index b3fb02ab229..cd8f6ea0384 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2452,11 +2452,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
 		break;
 
 	case 2:
-		put_unaligned_le16(cpu_to_le16(val), opt->val);
+		put_unaligned_le16(val, opt->val);
 		break;
 
 	case 4:
-		put_unaligned_le32(cpu_to_le32(val), opt->val);
+		put_unaligned_le32(val, opt->val);
 		break;
 
 	default:
-- 
cgit v1.2.2


From 63ce0900d79645c714de6c8b66d8040670068c9e Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.dentz-von@nokia.com>
Date: Thu, 19 Aug 2010 14:06:10 +0300
Subject: Bluetooth: fix not setting security level when creating a rfcomm
 session

This cause 'No Bonding' to be used if userspace has not yet been paired
with remote device since the l2cap socket used to create the rfcomm
session does not have any security level set.

Signed-off-by: Luiz Augusto von Dentz <luiz.dentz-von@nokia.com>
Acked-by: Ville Tervo <ville.tervo@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/rfcomm/core.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 39a5d87e33b..fa642aa652b 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -79,7 +79,10 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr);
 
 static void rfcomm_process_connect(struct rfcomm_session *s);
 
-static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst, int *err);
+static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
+							bdaddr_t *dst,
+							u8 sec_level,
+							int *err);
 static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst);
 static void rfcomm_session_del(struct rfcomm_session *s);
 
@@ -401,7 +404,7 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst,
 
 	s = rfcomm_session_get(src, dst);
 	if (!s) {
-		s = rfcomm_session_create(src, dst, &err);
+		s = rfcomm_session_create(src, dst, d->sec_level, &err);
 		if (!s)
 			return err;
 	}
@@ -679,7 +682,10 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err)
 	rfcomm_session_put(s);
 }
 
-static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst, int *err)
+static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
+							bdaddr_t *dst,
+							u8 sec_level,
+							int *err)
 {
 	struct rfcomm_session *s = NULL;
 	struct sockaddr_l2 addr;
@@ -704,6 +710,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
 	sk = sock->sk;
 	lock_sock(sk);
 	l2cap_pi(sk)->imtu = l2cap_mtu;
+	l2cap_pi(sk)->sec_level = sec_level;
 	if (l2cap_ertm)
 		l2cap_pi(sk)->mode = L2CAP_MODE_ERTM;
 	release_sock(sk);
-- 
cgit v1.2.2


From 18943d292facbc70e6a36fc62399ae833f64671b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Nov 2010 11:15:54 +0000
Subject: inet: fix ip_mc_drop_socket()

commit 8723e1b4ad9be4444 (inet: RCU changes in inetdev_by_index())
forgot one call site in ip_mc_drop_socket()

We should not decrease idev refcount after inetdev_by_index() call,
since refcount is not increased anymore.

Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Reported-by: Miles Lane <miles.lane@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c8877c6c721..3c53c2d89e3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2306,10 +2306,8 @@ void ip_mc_drop_socket(struct sock *sk)
 
 		in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
 		(void) ip_mc_leave_src(sk, iml, in_dev);
-		if (in_dev != NULL) {
+		if (in_dev != NULL)
 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
-			in_dev_put(in_dev);
-		}
 		/* decrease mem now to avoid the memleak warning */
 		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
 		call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
-- 
cgit v1.2.2


From 88f8a5e3e7defccd3925cabb1ee4d3994e5cdb52 Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy <segooon@gmail.com>
Date: Sun, 31 Oct 2010 07:10:32 +0000
Subject: net: tipc: fix information leak to userland

Structure sockaddr_tipc is copied to userland with padding bytes after
"id" field in union field "name" unitialized.  It leads to leaking of
contents of kernel stack memory.  We have to initialize them to zero.

Signed-off-by: Vasiliy Kulikov <segooon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 33217fc3d69..e9f0d500448 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -396,6 +396,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
 	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 	struct tipc_sock *tsock = tipc_sk(sock->sk);
 
+	memset(addr, 0, sizeof(*addr));
 	if (peer) {
 		if ((sock->state != SS_CONNECTED) &&
 			((peer != 2) || (sock->state != SS_DISCONNECTING)))
-- 
cgit v1.2.2


From 332dd96f7ac15e937088fe11f15cfe0210e8edd1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Nov 2010 11:46:33 -0800
Subject: net/dst: dst_dev_event() called after other notifiers

Followup of commit ef885afbf8a37689 (net: use rcu_barrier() in
rollback_registered_many)

dst_dev_event() scans a garbage dst list that might be feeded by various
network notifiers at device dismantle time.

Its important to call dst_dev_event() after other notifiers, or we might
enter the infamous msleep(250) in netdev_wait_allrefs(), and wait one
second before calling again call_netdevice_notifiers(NETDEV_UNREGISTER,
dev) to properly remove last device references.

Use priority -10 to let dst_dev_notifier be called after other network
notifiers (they have the default 0 priority)

Reported-by: Ben Greear <greearb@candelatech.com>
Reported-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reported-by: Octavian Purdila <opurdila@ixiacom.com>
Reported-by: Benjamin LaHaise <bcrl@kvack.org>
Tested-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index 8abe628b79f..b99c7c7ffce 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -370,6 +370,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
 
 static struct notifier_block dst_dev_notifier = {
 	.notifier_call	= dst_dev_event,
+	.priority = -10, /* must be called after other network notifiers */
 };
 
 void __init dst_init(void)
-- 
cgit v1.2.2


From e98b6fed84d0f0155d7b398e0dfeac74c792f2d0 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 9 Nov 2010 12:24:53 -0800
Subject: ceph: fix comment, remove extraneous args

The offset/length arguments aren't used.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/pagevec.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 54caf068715..ac34feeb2b3 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -13,8 +13,7 @@
  * build a vector of user pages
  */
 struct page **ceph_get_direct_page_vector(const char __user *data,
-						 int num_pages,
-						 loff_t off, size_t len)
+					  int num_pages)
 {
 	struct page **pages;
 	int rc;
-- 
cgit v1.2.2


From b7495fc2ff941db6a118a93ab8d61149e3f4cef8 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 9 Nov 2010 12:43:12 -0800
Subject: ceph: make page alignment explicit in osd interface

We used to infer alignment of IOs within a page based on the file offset,
which assumed they matched.  This broke with direct IO that was not aligned
to pages (e.g., 512-byte aligned IO).  We were also trusting the alignment
specified in the OSD reply, which could have been adjusted by the server.

Explicitly specify the page alignment when setting up OSD IO requests.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osd_client.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 79391994b3e..6c096239660 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -71,6 +71,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
 		op->extent.length = objlen;
 	}
 	req->r_num_pages = calc_pages_for(off, *plen);
+	req->r_page_alignment = off & ~PAGE_MASK;
 	if (op->op == CEPH_OSD_OP_WRITE)
 		op->payload_len = *plen;
 
@@ -419,7 +420,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					       u32 truncate_seq,
 					       u64 truncate_size,
 					       struct timespec *mtime,
-					       bool use_mempool, int num_reply)
+					       bool use_mempool, int num_reply,
+					       int page_align)
 {
 	struct ceph_osd_req_op ops[3];
 	struct ceph_osd_request *req;
@@ -447,6 +449,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	calc_layout(osdc, vino, layout, off, plen, req, ops);
 	req->r_file_layout = *layout;  /* keep a copy */
 
+	/* in case it differs from natural alignment that calc_layout
+	   filled in for us */
+	req->r_page_alignment = page_align;
+
 	ceph_osdc_build_request(req, off, plen, ops,
 				snapc,
 				mtime,
@@ -1489,7 +1495,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
 			struct ceph_vino vino, struct ceph_file_layout *layout,
 			u64 off, u64 *plen,
 			u32 truncate_seq, u64 truncate_size,
-			struct page **pages, int num_pages)
+			struct page **pages, int num_pages, int page_align)
 {
 	struct ceph_osd_request *req;
 	int rc = 0;
@@ -1499,15 +1505,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
 	req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
 				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
 				    NULL, 0, truncate_seq, truncate_size, NULL,
-				    false, 1);
+				    false, 1, page_align);
 	if (!req)
 		return -ENOMEM;
 
 	/* it may be a short read due to an object boundary */
 	req->r_pages = pages;
 
-	dout("readpages  final extent is %llu~%llu (%d pages)\n",
-	     off, *plen, req->r_num_pages);
+	dout("readpages  final extent is %llu~%llu (%d pages align %d)\n",
+	     off, *plen, req->r_num_pages, page_align);
 
 	rc = ceph_osdc_start_request(osdc, req, false);
 	if (!rc)
@@ -1533,6 +1539,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 {
 	struct ceph_osd_request *req;
 	int rc = 0;
+	int page_align = off & ~PAGE_MASK;
 
 	BUG_ON(vino.snap != CEPH_NOSNAP);
 	req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
@@ -1541,7 +1548,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 					    CEPH_OSD_FLAG_WRITE,
 				    snapc, do_sync,
 				    truncate_seq, truncate_size, mtime,
-				    nofail, 1);
+				    nofail, 1, page_align);
 	if (!req)
 		return -ENOMEM;
 
@@ -1638,8 +1645,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
 	m = ceph_msg_get(req->r_reply);
 
 	if (data_len > 0) {
-		unsigned data_off = le16_to_cpu(hdr->data_off);
-		int want = calc_pages_for(data_off & ~PAGE_MASK, data_len);
+		int want = calc_pages_for(req->r_page_alignment, data_len);
 
 		if (unlikely(req->r_num_pages < want)) {
 			pr_warning("tid %lld reply %d > expected %d pages\n",
-- 
cgit v1.2.2


From c5c6b19d4b8f5431fca05f28ae9e141045022149 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 9 Nov 2010 12:40:00 -0800
Subject: ceph: explicitly specify page alignment in network messages

The alignment used for reading data into or out of pages used to be taken
from the data_off field in the message header.  This only worked as long
as the page alignment matched the object offset, breaking direct io to
non-page aligned offsets.

Instead, explicitly specify the page alignment next to the page vector
in the ceph_msg struct, and use that instead of the message header (which
probably shouldn't be trusted).  The alloc_msg callback is responsible for
filling in this field properly when it sets up the page vector.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/messenger.c  | 10 +++++-----
 net/ceph/osd_client.c |  3 +++
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index d379abf873b..1c7a2ec4f3c 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -540,8 +540,7 @@ static void prepare_write_message(struct ceph_connection *con)
 		/* initialize page iterator */
 		con->out_msg_pos.page = 0;
 		if (m->pages)
-			con->out_msg_pos.page_pos =
-				le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
+			con->out_msg_pos.page_pos = m->page_alignment;
 		else
 			con->out_msg_pos.page_pos = 0;
 		con->out_msg_pos.data_pos = 0;
@@ -1491,7 +1490,7 @@ static int read_partial_message(struct ceph_connection *con)
 	struct ceph_msg *m = con->in_msg;
 	int ret;
 	int to, left;
-	unsigned front_len, middle_len, data_len, data_off;
+	unsigned front_len, middle_len, data_len;
 	int datacrc = con->msgr->nocrc;
 	int skip;
 	u64 seq;
@@ -1527,7 +1526,6 @@ static int read_partial_message(struct ceph_connection *con)
 	data_len = le32_to_cpu(con->in_hdr.data_len);
 	if (data_len > CEPH_MSG_MAX_DATA_LEN)
 		return -EIO;
-	data_off = le16_to_cpu(con->in_hdr.data_off);
 
 	/* verify seq# */
 	seq = le64_to_cpu(con->in_hdr.seq);
@@ -1575,7 +1573,7 @@ static int read_partial_message(struct ceph_connection *con)
 
 		con->in_msg_pos.page = 0;
 		if (m->pages)
-			con->in_msg_pos.page_pos = data_off & ~PAGE_MASK;
+			con->in_msg_pos.page_pos = m->page_alignment;
 		else
 			con->in_msg_pos.page_pos = 0;
 		con->in_msg_pos.data_pos = 0;
@@ -2300,6 +2298,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
 
 	/* data */
 	m->nr_pages = 0;
+	m->page_alignment = 0;
 	m->pages = NULL;
 	m->pagelist = NULL;
 	m->bio = NULL;
@@ -2369,6 +2368,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
 			       type, front_len);
 			return NULL;
 		}
+		msg->page_alignment = le16_to_cpu(hdr->data_off);
 	}
 	memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6c096239660..3e20a122ffa 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -391,6 +391,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 		req->r_request->hdr.data_len = cpu_to_le32(data_len);
 	}
 
+	req->r_request->page_alignment = req->r_page_alignment;
+
 	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
 	msg_size = p - msg->front.iov_base;
 	msg->front.iov_len = msg_size;
@@ -1657,6 +1659,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
 		}
 		m->pages = req->r_pages;
 		m->nr_pages = req->r_num_pages;
+		m->page_alignment = req->r_page_alignment;
 #ifdef CONFIG_BLOCK
 		m->bio = req->r_bio;
 #endif
-- 
cgit v1.2.2


From fe10ae53384e48c51996941b7720ee16995cbcb7 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segooon@gmail.com>
Date: Wed, 10 Nov 2010 10:14:33 -0800
Subject: net: ax25: fix information leak to userland

Sometimes ax25_getname() doesn't initialize all members of fsa_digipeater
field of fsa struct, also the struct has padding bytes between
sax25_call and sax25_ndigis fields.  This structure is then copied to
userland.  It leads to leaking of contents of kernel stack memory.

Signed-off-by: Vasiliy Kulikov <segooon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 26eaebf4aaa..bb86d293239 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1392,6 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
 	ax25_cb *ax25;
 	int err = 0;
 
+	memset(fsa, 0, sizeof(fsa));
 	lock_sock(sk);
 	ax25 = ax25_sk(sk);
 
@@ -1403,7 +1404,6 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
 
 		fsa->fsa_ax25.sax25_family = AF_AX25;
 		fsa->fsa_ax25.sax25_call   = ax25->dest_addr;
-		fsa->fsa_ax25.sax25_ndigis = 0;
 
 		if (ax25->digipeat != NULL) {
 			ndigi = ax25->digipeat->ndigi;
-- 
cgit v1.2.2


From 57fe93b374a6b8711995c2d466c502af9f3a08bb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 10 Nov 2010 10:38:24 -0800
Subject: filter: make sure filters dont read uninitialized memory

There is a possibility malicious users can get limited information about
uninitialized stack mem array. Even if sk_run_filter() result is bound
to packet length (0 .. 65535), we could imagine this can be used by
hostile user.

Initializing mem[] array, like Dan Rosenberg suggested in his patch is
expensive since most filters dont even use this array.

Its hard to make the filter validation in sk_chk_filter(), because of
the jumps. This might be done later.

In this patch, I use a bitmap (a single long var) so that only filters
using mem[] loads/stores pay the price of added security checks.

For other filters, additional cost is a single instruction.

[ Since we access fentry->k a lot now, cache it in a local variable
  and mark filter entry pointer as const. -DaveM ]

Reported-by: Dan Rosenberg <drosenberg@vsecurity.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 64 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 35 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 7beaec36b54..23e9b2a6b4c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -112,39 +112,41 @@ EXPORT_SYMBOL(sk_filter);
  */
 unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
 {
-	struct sock_filter *fentry;	/* We walk down these */
 	void *ptr;
 	u32 A = 0;			/* Accumulator */
 	u32 X = 0;			/* Index Register */
 	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
+	unsigned long memvalid = 0;
 	u32 tmp;
 	int k;
 	int pc;
 
+	BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
 	/*
 	 * Process array of filter instructions.
 	 */
 	for (pc = 0; pc < flen; pc++) {
-		fentry = &filter[pc];
+		const struct sock_filter *fentry = &filter[pc];
+		u32 f_k = fentry->k;
 
 		switch (fentry->code) {
 		case BPF_S_ALU_ADD_X:
 			A += X;
 			continue;
 		case BPF_S_ALU_ADD_K:
-			A += fentry->k;
+			A += f_k;
 			continue;
 		case BPF_S_ALU_SUB_X:
 			A -= X;
 			continue;
 		case BPF_S_ALU_SUB_K:
-			A -= fentry->k;
+			A -= f_k;
 			continue;
 		case BPF_S_ALU_MUL_X:
 			A *= X;
 			continue;
 		case BPF_S_ALU_MUL_K:
-			A *= fentry->k;
+			A *= f_k;
 			continue;
 		case BPF_S_ALU_DIV_X:
 			if (X == 0)
@@ -152,49 +154,49 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 			A /= X;
 			continue;
 		case BPF_S_ALU_DIV_K:
-			A /= fentry->k;
+			A /= f_k;
 			continue;
 		case BPF_S_ALU_AND_X:
 			A &= X;
 			continue;
 		case BPF_S_ALU_AND_K:
-			A &= fentry->k;
+			A &= f_k;
 			continue;
 		case BPF_S_ALU_OR_X:
 			A |= X;
 			continue;
 		case BPF_S_ALU_OR_K:
-			A |= fentry->k;
+			A |= f_k;
 			continue;
 		case BPF_S_ALU_LSH_X:
 			A <<= X;
 			continue;
 		case BPF_S_ALU_LSH_K:
-			A <<= fentry->k;
+			A <<= f_k;
 			continue;
 		case BPF_S_ALU_RSH_X:
 			A >>= X;
 			continue;
 		case BPF_S_ALU_RSH_K:
-			A >>= fentry->k;
+			A >>= f_k;
 			continue;
 		case BPF_S_ALU_NEG:
 			A = -A;
 			continue;
 		case BPF_S_JMP_JA:
-			pc += fentry->k;
+			pc += f_k;
 			continue;
 		case BPF_S_JMP_JGT_K:
-			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+			pc += (A > f_k) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JGE_K:
-			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+			pc += (A >= f_k) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JEQ_K:
-			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+			pc += (A == f_k) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JSET_K:
-			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+			pc += (A & f_k) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JGT_X:
 			pc += (A > X) ? fentry->jt : fentry->jf;
@@ -209,7 +211,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 			pc += (A & X) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_LD_W_ABS:
-			k = fentry->k;
+			k = f_k;
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
 			if (ptr != NULL) {
@@ -218,7 +220,7 @@ load_w:
 			}
 			break;
 		case BPF_S_LD_H_ABS:
-			k = fentry->k;
+			k = f_k;
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
 			if (ptr != NULL) {
@@ -227,7 +229,7 @@ load_h:
 			}
 			break;
 		case BPF_S_LD_B_ABS:
-			k = fentry->k;
+			k = f_k;
 load_b:
 			ptr = load_pointer(skb, k, 1, &tmp);
 			if (ptr != NULL) {
@@ -242,32 +244,34 @@ load_b:
 			X = skb->len;
 			continue;
 		case BPF_S_LD_W_IND:
-			k = X + fentry->k;
+			k = X + f_k;
 			goto load_w;
 		case BPF_S_LD_H_IND:
-			k = X + fentry->k;
+			k = X + f_k;
 			goto load_h;
 		case BPF_S_LD_B_IND:
-			k = X + fentry->k;
+			k = X + f_k;
 			goto load_b;
 		case BPF_S_LDX_B_MSH:
-			ptr = load_pointer(skb, fentry->k, 1, &tmp);
+			ptr = load_pointer(skb, f_k, 1, &tmp);
 			if (ptr != NULL) {
 				X = (*(u8 *)ptr & 0xf) << 2;
 				continue;
 			}
 			return 0;
 		case BPF_S_LD_IMM:
-			A = fentry->k;
+			A = f_k;
 			continue;
 		case BPF_S_LDX_IMM:
-			X = fentry->k;
+			X = f_k;
 			continue;
 		case BPF_S_LD_MEM:
-			A = mem[fentry->k];
+			A = (memvalid & (1UL << f_k)) ?
+				mem[f_k] : 0;
 			continue;
 		case BPF_S_LDX_MEM:
-			X = mem[fentry->k];
+			X = (memvalid & (1UL << f_k)) ?
+				mem[f_k] : 0;
 			continue;
 		case BPF_S_MISC_TAX:
 			X = A;
@@ -276,14 +280,16 @@ load_b:
 			A = X;
 			continue;
 		case BPF_S_RET_K:
-			return fentry->k;
+			return f_k;
 		case BPF_S_RET_A:
 			return A;
 		case BPF_S_ST:
-			mem[fentry->k] = A;
+			memvalid |= 1UL << f_k;
+			mem[f_k] = A;
 			continue;
 		case BPF_S_STX:
-			mem[fentry->k] = X;
+			memvalid |= 1UL << f_k;
+			mem[f_k] = X;
 			continue;
 		default:
 			WARN_ON(1);
-- 
cgit v1.2.2


From 67286640f638f5ad41a946b9a3dc75327950248f Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segooon@gmail.com>
Date: Wed, 10 Nov 2010 12:09:10 -0800
Subject: net: packet: fix information leak to userland

packet_getname_spkt() doesn't initialize all members of sa_data field of
sockaddr struct if strlen(dev->name) < 13.  This structure is then copied
to userland.  It leads to leaking of contents of kernel stack memory.
We have to fully fill sa_data with strncpy() instead of strlcpy().

The same with packet_getname(): it doesn't initialize sll_pkttype field of
sockaddr_ll.  Set it to zero.

Signed-off-by: Vasiliy Kulikov <segooon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27b9d4..0856a13cb53 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1719,7 +1719,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
 	if (dev)
-		strlcpy(uaddr->sa_data, dev->name, 15);
+		strncpy(uaddr->sa_data, dev->name, 14);
 	else
 		memset(uaddr->sa_data, 0, 14);
 	rcu_read_unlock();
@@ -1742,6 +1742,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
 	sll->sll_family = AF_PACKET;
 	sll->sll_ifindex = po->ifindex;
 	sll->sll_protocol = po->num;
+	sll->sll_pkttype = 0;
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
 	if (dev) {
-- 
cgit v1.2.2


From 8d987e5c75107ca7515fa19e857cfa24aab6ec8f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Nov 2010 23:24:26 +0000
Subject: net: avoid limits overflow

Robin Holt tried to boot a 16TB machine and found some limits were
reached : sysctl_tcp_mem[2], sysctl_udp_mem[2]

We can switch infrastructure to use long "instead" of "int", now
atomic_long_t primitives are available for free.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: Robin Holt <holt@sgi.com>
Reviewed-by: Robin Holt <holt@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c                | 14 +++++++-------
 net/decnet/af_decnet.c         |  2 +-
 net/decnet/sysctl_net_decnet.c |  4 ++--
 net/ipv4/proc.c                |  8 ++++----
 net/ipv4/sysctl_net_ipv4.c     |  5 ++---
 net/ipv4/tcp.c                 |  4 ++--
 net/ipv4/tcp_input.c           | 11 +++++++----
 net/ipv4/udp.c                 |  4 ++--
 net/sctp/protocol.c            |  2 +-
 net/sctp/socket.c              |  4 ++--
 net/sctp/sysctl.c              |  4 ++--
 11 files changed, 32 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 3eed5424e65..fb608011146 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1653,10 +1653,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 {
 	struct proto *prot = sk->sk_prot;
 	int amt = sk_mem_pages(size);
-	int allocated;
+	long allocated;
 
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-	allocated = atomic_add_return(amt, prot->memory_allocated);
+	allocated = atomic_long_add_return(amt, prot->memory_allocated);
 
 	/* Under limit. */
 	if (allocated <= prot->sysctl_mem[0]) {
@@ -1714,7 +1714,7 @@ suppress_allocation:
 
 	/* Alas. Undo changes. */
 	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
-	atomic_sub(amt, prot->memory_allocated);
+	atomic_long_sub(amt, prot->memory_allocated);
 	return 0;
 }
 EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1727,12 +1727,12 @@ void __sk_mem_reclaim(struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
 
-	atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
+	atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
 		   prot->memory_allocated);
 	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
 
 	if (prot->memory_pressure && *prot->memory_pressure &&
-	    (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+	    (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
 		*prot->memory_pressure = 0;
 }
 EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2452,12 +2452,12 @@ static char proto_method_implemented(const void *method)
 
 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
 {
-	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
+	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
 		   proto->name,
 		   proto->obj_size,
 		   sock_prot_inuse_get(seq_file_net(seq), proto),
-		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+		   proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
 		   proto->max_header,
 		   proto->slab == NULL ? "no" : "yes",
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d6b93d19790..a76b78de679 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -155,7 +155,7 @@ static const struct proto_ops dn_proto_ops;
 static DEFINE_RWLOCK(dn_hash_lock);
 static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
 static struct hlist_head dn_wild_sk;
-static atomic_t decnet_memory_allocated;
+static atomic_long_t decnet_memory_allocated;
 
 static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
 static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index be3eb8e2328..28f8b5e5f73 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -38,7 +38,7 @@ int decnet_log_martians = 1;
 int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
 
 /* Reasonable defaults, I hope, based on tcp's defaults */
-int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
+long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
 int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
 int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
 
@@ -324,7 +324,7 @@ static ctl_table dn_table[] = {
 		.data = &sysctl_decnet_mem,
 		.maxlen = sizeof(sysctl_decnet_mem),
 		.mode = 0644,
-		.proc_handler = proc_dointvec,
+		.proc_handler = proc_doulongvec_minmax
 	},
 	{
 		.procname = "decnet_rmem",
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4ae1f203f7c..1b48eb1ed45 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 	local_bh_enable();
 
 	socket_seq_show(seq);
-	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
+	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
 		   sock_prot_inuse_get(net, &tcp_prot), orphans,
 		   tcp_death_row.tw_count, sockets,
-		   atomic_read(&tcp_memory_allocated));
-	seq_printf(seq, "UDP: inuse %d mem %d\n",
+		   atomic_long_read(&tcp_memory_allocated));
+	seq_printf(seq, "UDP: inuse %d mem %ld\n",
 		   sock_prot_inuse_get(net, &udp_prot),
-		   atomic_read(&udp_memory_allocated));
+		   atomic_long_read(&udp_memory_allocated));
 	seq_printf(seq, "UDPLITE: inuse %d\n",
 		   sock_prot_inuse_get(net, &udplite_prot));
 	seq_printf(seq, "RAW: inuse %d\n",
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d96c1da4b17..e91911d7aae 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -398,7 +398,7 @@ static struct ctl_table ipv4_table[] = {
 		.data		= &sysctl_tcp_mem,
 		.maxlen		= sizeof(sysctl_tcp_mem),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_doulongvec_minmax
 	},
 	{
 		.procname	= "tcp_wmem",
@@ -602,8 +602,7 @@ static struct ctl_table ipv4_table[] = {
 		.data		= &sysctl_udp_mem,
 		.maxlen		= sizeof(sysctl_udp_mem),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero
+		.proc_handler	= proc_doulongvec_minmax,
 	},
 	{
 		.procname	= "udp_rmem_min",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1664a0590bb..245603c4ad4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
-int sysctl_tcp_mem[3] __read_mostly;
+long sysctl_tcp_mem[3] __read_mostly;
 int sysctl_tcp_wmem[3] __read_mostly;
 int sysctl_tcp_rmem[3] __read_mostly;
 
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
 EXPORT_SYMBOL(sysctl_tcp_rmem);
 EXPORT_SYMBOL(sysctl_tcp_wmem);
 
-atomic_t tcp_memory_allocated;	/* Current allocated memory. */
+atomic_long_t tcp_memory_allocated;	/* Current allocated memory. */
 EXPORT_SYMBOL(tcp_memory_allocated);
 
 /*
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3357f69e353..6d8ab1c4efc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk)
 	int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
 		     sizeof(struct sk_buff);
 
-	if (sk->sk_sndbuf < 3 * sndmem)
-		sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
+	if (sk->sk_sndbuf < 3 * sndmem) {
+		sk->sk_sndbuf = 3 * sndmem;
+		if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
+			sk->sk_sndbuf = sysctl_tcp_wmem[2];
+	}
 }
 
 /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk)
 	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
 	    !tcp_memory_pressure &&
-	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+	    atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
 		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
 				    sysctl_tcp_rmem[2]);
 	}
@@ -4861,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk)
 		return 0;
 
 	/* If we are under soft global TCP memory pressure, do not expand.  */
-	if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
+	if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
 		return 0;
 
 	/* If we filled the congestion window, do not expand.  */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 28cb2d733a3..5e0a3a582a5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,7 +110,7 @@
 struct udp_table udp_table __read_mostly;
 EXPORT_SYMBOL(udp_table);
 
-int sysctl_udp_mem[3] __read_mostly;
+long sysctl_udp_mem[3] __read_mostly;
 EXPORT_SYMBOL(sysctl_udp_mem);
 
 int sysctl_udp_rmem_min __read_mostly;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min);
 int sysctl_udp_wmem_min __read_mostly;
 EXPORT_SYMBOL(sysctl_udp_wmem_min);
 
-atomic_t udp_memory_allocated;
+atomic_long_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
 
 #define MAX_UDP_PORTS 65536
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1ef29c74d85..e58f9476f29 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -92,7 +92,7 @@ static struct sctp_af *sctp_af_v6_specific;
 struct kmem_cache *sctp_chunk_cachep __read_mostly;
 struct kmem_cache *sctp_bucket_cachep __read_mostly;
 
-int sysctl_sctp_mem[3];
+long sysctl_sctp_mem[3];
 int sysctl_sctp_rmem[3];
 int sysctl_sctp_wmem[3];
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e34ca9cc116..6bd554323a3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -111,12 +111,12 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
 static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
 
 extern struct kmem_cache *sctp_bucket_cachep;
-extern int sysctl_sctp_mem[3];
+extern long sysctl_sctp_mem[3];
 extern int sysctl_sctp_rmem[3];
 extern int sysctl_sctp_wmem[3];
 
 static int sctp_memory_pressure;
-static atomic_t sctp_memory_allocated;
+static atomic_long_t sctp_memory_allocated;
 struct percpu_counter sctp_sockets_allocated;
 
 static void sctp_enter_memory_pressure(struct sock *sk)
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 832590bbe0c..50cb57f0919 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -54,7 +54,7 @@ static int sack_timer_max = 500;
 static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
 static int rwnd_scale_max = 16;
 
-extern int sysctl_sctp_mem[3];
+extern long sysctl_sctp_mem[3];
 extern int sysctl_sctp_rmem[3];
 extern int sysctl_sctp_wmem[3];
 
@@ -203,7 +203,7 @@ static ctl_table sctp_table[] = {
 		.data		= &sysctl_sctp_mem,
 		.maxlen		= sizeof(sysctl_sctp_mem),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_doulongvec_minmax
 	},
 	{
 		.procname	= "sctp_rmem",
-- 
cgit v1.2.2


From 7a1abd08d52fdeddb3e9a5a33f2f15cc6a5674d2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 10 Nov 2010 21:35:37 -0800
Subject: tcp: Increase TCP_MAXSEG socket option minimum.

As noted by Steve Chen, since commit
f5fff5dc8a7a3f395b0525c02ba92c95d42b7390 ("tcp: advertise MSS
requested by user") we can end up with a situation where
tcp_select_initial_window() does a divide by a zero (or
even negative) mss value.

The problem is that sometimes we effectively subtract
TCPOLEN_TSTAMP_ALIGNED and/or TCPOLEN_MD5SIG_ALIGNED from the mss.

Fix this by increasing the minimum from 8 to 64.

Reported-by: Steve Chen <schen@mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 245603c4ad4..08141996948 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		/* Values greater than interface MTU won't take effect. However
 		 * at the point when this call is done we typically don't yet
 		 * know which interface is going to be used */
-		if (val < 8 || val > MAX_TCP_WINDOW) {
+		if (val < 64 || val > MAX_TCP_WINDOW) {
 			err = -EINVAL;
 			break;
 		}
-- 
cgit v1.2.2


From 22e091e5253da1e9ad7c0a82c2c84446fc403efe Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Fri, 12 Nov 2010 08:51:55 +0100
Subject: netfilter: ipv6: fix overlap check for fragments

The type of FRAG6_CB(prev)->offset is int, skb->len is *unsigned* int,
and offset is int.

Without this patch, type conversion occurred to this expression, when
(FRAG6_CB(prev)->offset + prev->len) is less than offset.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3a3f129a44c..79d43aa8fa8 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -286,7 +286,7 @@ found:
 
 	/* Check for overlap with preceding fragment. */
 	if (prev &&
-	    (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0)
+	    (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
 		goto discard_fq;
 
 	/* Look for overlap with succeeding segment. */
-- 
cgit v1.2.2


From 369cf77a6a3e41b1110506ddf43d45804103bfde Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Thu, 11 Nov 2010 15:47:59 +0000
Subject: rtnetlink: Fix message size calculation for link messages

nlmsg_total_size() calculates the length of a netlink message
including header and alignment. nla_total_size() calculates the
space an individual attribute consumes which was meant to be used
in this context.

Also, ensure to account for the attribute header for the
IFLA_INFO_XSTATS attribute as implementations of get_xstats_size()
seem to assume that we do so.

The addition of two message headers minus the missing attribute
header resulted in a calculated message size that was larger than
required. Therefore we never risked running out of skb tailroom.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8121268ddbd..841c287ef40 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -347,16 +347,17 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
 	if (!ops)
 		return 0;
 
-	size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
-	       nlmsg_total_size(strlen(ops->kind) + 1);	 /* IFLA_INFO_KIND */
+	size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
+	       nla_total_size(strlen(ops->kind) + 1);  /* IFLA_INFO_KIND */
 
 	if (ops->get_size)
 		/* IFLA_INFO_DATA + nested data */
-		size += nlmsg_total_size(sizeof(struct nlattr)) +
+		size += nla_total_size(sizeof(struct nlattr)) +
 			ops->get_size(dev);
 
 	if (ops->get_xstats_size)
-		size += ops->get_xstats_size(dev);	/* IFLA_INFO_XSTATS */
+		/* IFLA_INFO_XSTATS */
+		size += nla_total_size(ops->get_xstats_size(dev));
 
 	return size;
 }
-- 
cgit v1.2.2


From 1f18b7176e2e41fada24584ce3c80e9abfaca52b Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <mk@lab.zgora.pl>
Date: Mon, 8 Nov 2010 11:58:45 +0000
Subject: net: Fix header size check for GSO case in recvmsg (af_packet)

Parameter 'len' is size_t type so it will never get negative.

Signed-off-by: Mariusz Kozlowski <mk@lab.zgora.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0856a13cb53..8298e676f5a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1610,9 +1610,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		err = -EINVAL;
 		vnet_hdr_len = sizeof(vnet_hdr);
-		if ((len -= vnet_hdr_len) < 0)
+		if (len < vnet_hdr_len)
 			goto out_free;
 
+		len -= vnet_hdr_len;
+
 		if (skb_is_gso(skb)) {
 			struct skb_shared_info *sinfo = skb_shinfo(skb);
 
-- 
cgit v1.2.2


From 5ef41308f94dcbb3b7afc56cdef1c2ba53fa5d2f Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Fri, 12 Nov 2010 12:44:42 -0800
Subject: x25: Prevent crashing when parsing bad X.25 facilities

Now with improved comma support.

On parsing malformed X.25 facilities, decrementing the remaining length
may cause it to underflow.  Since the length is an unsigned integer,
this will result in the loop continuing until the kernel crashes.

This patch adds checks to ensure decrementing the remaining length does
not cause it to wrap around.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_facilities.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 3a8c4c419cd..55187c8f642 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -61,6 +61,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 	while (len > 0) {
 		switch (*p & X25_FAC_CLASS_MASK) {
 		case X25_FAC_CLASS_A:
+			if (len < 2)
+				return 0;
 			switch (*p) {
 			case X25_FAC_REVERSE:
 				if((p[1] & 0x81) == 0x81) {
@@ -104,6 +106,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 			len -= 2;
 			break;
 		case X25_FAC_CLASS_B:
+			if (len < 3)
+				return 0;
 			switch (*p) {
 			case X25_FAC_PACKET_SIZE:
 				facilities->pacsize_in  = p[1];
@@ -125,6 +129,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 			len -= 3;
 			break;
 		case X25_FAC_CLASS_C:
+			if (len < 4)
+				return 0;
 			printk(KERN_DEBUG "X.25: unknown facility %02X, "
 			       "values %02X, %02X, %02X\n",
 			       p[0], p[1], p[2], p[3]);
@@ -132,6 +138,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 			len -= 4;
 			break;
 		case X25_FAC_CLASS_D:
+			if (len < p[1] + 2)
+				return 0;
 			switch (*p) {
 			case X25_FAC_CALLING_AE:
 				if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
@@ -149,9 +157,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 				break;
 			default:
 				printk(KERN_DEBUG "X.25: unknown facility %02X,"
-					"length %d, values %02X, %02X, "
-					"%02X, %02X\n",
-					p[0], p[1], p[2], p[3], p[4], p[5]);
+					"length %d\n", p[0], p[1]);
 				break;
 			}
 			len -= p[1] + 2;
-- 
cgit v1.2.2


From 8f49c2703b33519aaaccc63f571b465b9d2b3a2d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 12 Nov 2010 13:35:00 -0800
Subject: tcp: Don't change unlocked socket state in tcp_v4_err().

Alexey Kuznetsov noticed a regression introduced by
commit f1ecd5d9e7366609d640ff4040304ea197fbc618
("Revert Backoff [v3]: Revert RTO on ICMP destination unreachable")

The RTO and timer modification code added to tcp_v4_err()
doesn't check sock_owned_by_user(), which if true means we
don't have exclusive access to the socket and therefore cannot
modify it's critical state.

Just skip this new code block if sock_owned_by_user() is true
and eliminate the now superfluous sock_owned_by_user() code
block contained within.

Reported-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
CC: Damian Lukowski <damian@tvk.rwth-aachen.de>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/tcp_ipv4.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8f8527d4168..69ccbc1dde9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		    !icsk->icsk_backoff)
 			break;
 
+		if (sock_owned_by_user(sk))
+			break;
+
 		icsk->icsk_backoff--;
 		inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
 					 icsk->icsk_backoff;
@@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		if (remaining) {
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 						  remaining, TCP_RTO_MAX);
-		} else if (sock_owned_by_user(sk)) {
-			/* RTO revert clocked out retransmission,
-			 * but socket is locked. Will defer. */
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-						  HZ/20, TCP_RTO_MAX);
 		} else {
 			/* RTO revert clocked out retransmission.
 			 * Will retransmit now */
-- 
cgit v1.2.2


From 2de795707294972f6c34bae9de713e502c431296 Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Wed, 27 Oct 2010 18:16:49 +0000
Subject: ipv6: addrconf: don't remove address state on ifdown if the address
 is being kept

Currently, addrconf_ifdown does not delete statically configured IPv6
addresses when the interface is brought down. The intent is that when
the interface comes back up the address will be usable again. However,
this doesn't actually work, because the system stops listening on the
corresponding solicited-node multicast address, so the address cannot
respond to neighbor solicitations and thus receive traffic. Also, the
code notifies the rest of the system that the address is being deleted
(e.g, RTM_DELADDR), even though it is not. Fix it so that none of this
state is updated if the address is being kept on the interface.

Tested: Added a statically configured IPv6 address to an interface,
started ping, brought link down, brought link up again. When link came
up ping kept on going and "ip -6 maddr" showed that the host was still
subscribed to there

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e048ec62d10..b41ce0f0d51 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2740,10 +2740,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			/* Flag it for later restoration when link comes up */
 			ifa->flags |= IFA_F_TENTATIVE;
 			ifa->state = INET6_IFADDR_STATE_DAD;
-
-			write_unlock_bh(&idev->lock);
-
-			in6_ifa_hold(ifa);
 		} else {
 			list_del(&ifa->if_list);
 
@@ -2758,19 +2754,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			ifa->state = INET6_IFADDR_STATE_DEAD;
 			spin_unlock_bh(&ifa->state_lock);
 
-			if (state == INET6_IFADDR_STATE_DEAD)
-				goto put_ifa;
+			if (state == INET6_IFADDR_STATE_DEAD) {
+				in6_ifa_put(ifa);
+			} else {
+				__ipv6_ifa_notify(RTM_DELADDR, ifa);
+				atomic_notifier_call_chain(&inet6addr_chain,
+							   NETDEV_DOWN, ifa);
+			}
+			write_lock_bh(&idev->lock);
 		}
-
-		__ipv6_ifa_notify(RTM_DELADDR, ifa);
-		if (ifa->state == INET6_IFADDR_STATE_DEAD)
-			atomic_notifier_call_chain(&inet6addr_chain,
-						   NETDEV_DOWN, ifa);
-
-put_ifa:
-		in6_ifa_put(ifa);
-
-		write_lock_bh(&idev->lock);
 	}
 
 	list_splice(&keep_list, &idev->addr_list);
-- 
cgit v1.2.2


From 403856532734317d25ec86ab1e75b8133db7acc6 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Mon, 8 Nov 2010 12:33:48 +0000
Subject: ipv6: Warn users if maximum number of routes is reached.

This gives users at least some clue as to what the problem
might be and how to go about fixing it.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fc328339be9..96455ffb76f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1945,8 +1945,12 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
 	struct neighbour *neigh;
 
-	if (rt == NULL)
+	if (rt == NULL) {
+		if (net_ratelimit())
+			pr_warning("IPv6:  Maximum number of routes reached,"
+				   " consider increasing route/max_size.\n");
 		return ERR_PTR(-ENOMEM);
+	}
 
 	dev_hold(net->loopback_dev);
 	in6_dev_hold(idev);
-- 
cgit v1.2.2


From 0597d1b99fcfc2c0eada09a698f85ed413d4ba84 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Wed, 10 Nov 2010 12:10:30 +0000
Subject: can-bcm: fix minor heap overflow

On 64-bit platforms the ASCII representation of a pointer may be up to 17
bytes long. This patch increases the length of the buffer accordingly.

http://marc.info/?l=linux-netdev&m=128872251418192&w=2

Reported-by: Dan Rosenberg <drosenberg@vsecurity.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
CC: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/bcm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/can/bcm.c b/net/can/bcm.c
index 08ffe9e4be2..6faa8256e10 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -125,7 +125,7 @@ struct bcm_sock {
 	struct list_head tx_ops;
 	unsigned long dropped_usr_msgs;
 	struct proc_dir_entry *bcm_proc_read;
-	char procname [9]; /* pointer printed in ASCII with \0 */
+	char procname [20]; /* pointer printed in ASCII with \0 */
 };
 
 static inline struct bcm_sock *bcm_sk(const struct sock *sk)
-- 
cgit v1.2.2


From 94f58df8e545657f0b2d16eca1ac7a4ec39ed6be Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Sun, 7 Nov 2010 22:11:34 +0100
Subject: SUNRPC: Simplify rpc_alloc_iostats by removing pointless local
 variable

Hi,

We can simplify net/sunrpc/stats.c::rpc_alloc_iostats() a bit by getting
rid of the unneeded local variable 'new'.

Please CC me on replies.

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/stats.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index f71a73107ae..80df89d957b 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -115,9 +115,7 @@ EXPORT_SYMBOL_GPL(svc_seq_show);
  */
 struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
 {
-	struct rpc_iostats *new;
-	new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
-	return new;
+	return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
 }
 EXPORT_SYMBOL_GPL(rpc_alloc_iostats);
 
-- 
cgit v1.2.2


From 4c62ab9c538bc09c38093fa079e6902ea4d42b98 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Tue, 16 Nov 2010 09:50:47 -0800
Subject: irda: irttp: allow zero byte packets

Sending zero byte packets is not neccessarily an error (AF_INET accepts it,
too), so just apply a shortcut. This was discovered because of a non-working
software with WINE. See

  http://bugs.winehq.org/show_bug.cgi?id=19397#c86
  http://thread.gmane.org/gmane.linux.irda.general/1643

for very detailed debugging information and a testcase. Kudos to Wolfgang for
those!

Reported-by: Wolfgang Schwotzer <wolfgang.schwotzer@gmx.net>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Tested-by: Mike Evans <mike.evans@cardolan.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irttp.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 285761e77d9..6cfaeaf2a37 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -550,16 +550,23 @@ EXPORT_SYMBOL(irttp_close_tsap);
  */
 int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
 {
+	int ret = -1;
+
 	IRDA_ASSERT(self != NULL, return -1;);
 	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
 	IRDA_ASSERT(skb != NULL, return -1;);
 
 	IRDA_DEBUG(4, "%s()\n", __func__);
 
+	/* Take shortcut on zero byte packets */
+	if (skb->len == 0) {
+		ret = 0;
+		goto err;
+	}
+
 	/* Check that nothing bad happens */
-	if ((skb->len == 0) || (!self->connected)) {
-		IRDA_DEBUG(1, "%s(), No data, or not connected\n",
-			   __func__);
+	if (!self->connected) {
+		IRDA_DEBUG(1, "%s(), Not connected\n", __func__);
 		goto err;
 	}
 
@@ -576,7 +583,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
 
 err:
 	dev_kfree_skb(skb);
-	return -1;
+	return ret;
 }
 EXPORT_SYMBOL(irttp_udata_request);
 
@@ -599,9 +606,15 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
 	IRDA_DEBUG(2, "%s() : queue len = %d\n", __func__,
 		   skb_queue_len(&self->tx_queue));
 
+	/* Take shortcut on zero byte packets */
+	if (skb->len == 0) {
+		ret = 0;
+		goto err;
+	}
+
 	/* Check that nothing bad happens */
-	if ((skb->len == 0) || (!self->connected)) {
-		IRDA_WARNING("%s: No data, or not connected\n", __func__);
+	if (!self->connected) {
+		IRDA_WARNING("%s: Not connected\n", __func__);
 		ret = -ENOTCONN;
 		goto err;
 	}
-- 
cgit v1.2.2


From 7d98ffd8c2d1da6cec5d84eba42c4aa836a93f85 Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Fri, 5 Nov 2010 01:39:12 +0000
Subject: xfrm: update flowi saddr in icmp_send if unset

otherwise xfrm_lookup will fail to find correct policy

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 96bc7f9475a..e5d1a44bcbd 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -569,6 +569,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		/* No need to clone since we're just using its address. */
 		rt2 = rt;
 
+		if (!fl.nl_u.ip4_u.saddr)
+			fl.nl_u.ip4_u.saddr = rt->rt_src;
+
 		err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
 		switch (err) {
 		case 0:
-- 
cgit v1.2.2


From 9236d838c920e90708570d9bbd7bb82d30a38130 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Fri, 12 Nov 2010 16:31:23 -0800
Subject: cfg80211: fix extension channel checks to initiate communication

When operating in a mode that initiates communication and using
HT40 we should fail if we cannot use both primary and secondary
channels to initiate communication. Our current ht40 allowmap
only covers STA mode of operation, for beaconing modes we need
a check on the fly as the mode of operation is dynamic and
there other flags other than disable which we should read
to check if we can initiate communication.

Do not allow for initiating communication if our secondary HT40
channel has is either disabled, has a passive scan flag, a
no-ibss flag or is a radar channel. Userspace now has similar
checks but this is also needed in-kernel.

Reported-by: Jouni Malinen <jouni.malinen@atheros.com>
Cc: stable@kernel.org
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/chan.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'net')

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index d0c92dddb26..c8d190d911e 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -44,6 +44,36 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 	return chan;
 }
 
+static bool can_beacon_sec_chan(struct wiphy *wiphy,
+				struct ieee80211_channel *chan,
+				enum nl80211_channel_type channel_type)
+{
+	struct ieee80211_channel *sec_chan;
+	int diff;
+
+	switch (channel_type) {
+	case NL80211_CHAN_HT40PLUS:
+		diff = 20;
+	case NL80211_CHAN_HT40MINUS:
+		diff = -20;
+	default:
+		return false;
+	}
+
+	sec_chan = ieee80211_get_channel(wiphy, chan->center_freq + diff);
+	if (!sec_chan)
+		return false;
+
+	/* we'll need a DFS capability later */
+	if (sec_chan->flags & (IEEE80211_CHAN_DISABLED |
+			       IEEE80211_CHAN_PASSIVE_SCAN |
+			       IEEE80211_CHAN_NO_IBSS |
+			       IEEE80211_CHAN_RADAR))
+		return false;
+
+	return true;
+}
+
 int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
 		      struct wireless_dev *wdev, int freq,
 		      enum nl80211_channel_type channel_type)
@@ -68,6 +98,28 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
 	if (!chan)
 		return -EINVAL;
 
+	/* Both channels should be able to initiate communication */
+	if (wdev && (wdev->iftype == NL80211_IFTYPE_ADHOC ||
+		     wdev->iftype == NL80211_IFTYPE_AP ||
+		     wdev->iftype == NL80211_IFTYPE_AP_VLAN ||
+		     wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
+		     wdev->iftype == NL80211_IFTYPE_P2P_GO)) {
+		switch (channel_type) {
+		case NL80211_CHAN_HT40PLUS:
+		case NL80211_CHAN_HT40MINUS:
+			if (!can_beacon_sec_chan(&rdev->wiphy, chan,
+						 channel_type)) {
+				printk(KERN_DEBUG
+				       "cfg80211: Secondary channel not "
+				       "allowed to initiate communication\n");
+				return -EINVAL;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
 	result = rdev->ops->set_channel(&rdev->wiphy,
 					wdev ? wdev->netdev : NULL,
 					chan, channel_type);
-- 
cgit v1.2.2


From 451a3c24b0135bce54542009b5fde43846c7cf67 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 Nov 2010 16:26:55 +0100
Subject: BKL: remove extraneous #include <smp_lock.h>

The big kernel lock has been removed from all these files at some point,
leaving only the #include.

Remove this too as a cleanup.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/irda/af_irda.c         | 1 -
 net/irda/irnet/irnet_ppp.c | 1 -
 net/sunrpc/svc_xprt.c      | 1 -
 3 files changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 7f097989cde..a6de3059746 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -45,7 +45,6 @@
 #include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/smp_lock.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/slab.h>
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 7fa86373de4..7c567b8aa89 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -15,7 +15,6 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include "irnet_ppp.h"		/* Private header */
 /* Please put other headers in irnet.h - Thanks */
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c82fe739fbd..ea2ff78dcf7 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -5,7 +5,6 @@
  */
 
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
-- 
cgit v1.2.2


From 218854af84038d828a32f061858b1902ed2beec6 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Wed, 17 Nov 2010 06:37:16 +0000
Subject: rds: Integer overflow in RDS cmsg handling

In rds_cmsg_rdma_args(), the user-provided args->nr_local value is
restricted to less than UINT_MAX.  This seems to need a tighter upper
bound, since the calculation of total iov_size can overflow, resulting
in a small sock_kmalloc() allocation.  This would probably just result
in walking off the heap and crashing when calling rds_rdma_pages() with
a high count value.  If it somehow doesn't crash here, then memory
corruption could occur soon after.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 8920f2a8332..4e37c1cbe8b 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -567,7 +567,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 		goto out;
 	}
 
-	if (args->nr_local > (u64)UINT_MAX) {
+	if (args->nr_local > UIO_MAXIOV) {
 		ret = -EMSGSIZE;
 		goto out;
 	}
-- 
cgit v1.2.2


From 09a02fdb919876c01e8f05960750a418b3f7fa48 Mon Sep 17 00:00:00 2001
From: Mark Mentovai <mark@moxienet.com>
Date: Wed, 17 Nov 2010 16:34:37 -0500
Subject: cfg80211: fix can_beacon_sec_chan, reenable HT40

This follows wireless-testing 9236d838c920e90708570d9bbd7bb82d30a38130
("cfg80211: fix extension channel checks to initiate communication") and
fixes accidental case fall-through. Without this fix, HT40 is entirely
blocked.

Signed-off-by: Mark Mentovai <mark@moxienet.com>
Cc: stable@kernel.org
Acked-by: Luis R. Rodriguez <lrodriguez@atheros.com
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/chan.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index c8d190d911e..17cd0c04d13 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -54,8 +54,10 @@ static bool can_beacon_sec_chan(struct wiphy *wiphy,
 	switch (channel_type) {
 	case NL80211_CHAN_HT40PLUS:
 		diff = 20;
+		break;
 	case NL80211_CHAN_HT40MINUS:
 		diff = -20;
+		break;
 	default:
 		return false;
 	}
-- 
cgit v1.2.2


From 7d8e76bf9ac3604897f0ce12e8bf09b68c2a2c89 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 16 Nov 2010 19:42:53 +0000
Subject: net: zero kobject in rx_queue_release

netif_set_real_num_rx_queues() can decrement and increment
the number of rx queues. For example ixgbe does this as
features and offloads are toggled. Presumably this could
also happen across down/up on most devices if the available
resources changed (cpu offlined).

The kobject needs to be zero'd in this case so that the
state is not preserved across kobject_put()/kobject_init_and_add().

This resolves the following error report.

ixgbe 0000:03:00.0: eth2: NIC Link is Up 10 Gbps, Flow Control: RX/TX
kobject (ffff880324b83210): tried to init an initialized object, something is seriously wrong.
Pid: 1972, comm: lldpad Not tainted 2.6.37-rc18021qaz+ #169
Call Trace:
 [<ffffffff8121c940>] kobject_init+0x3a/0x83
 [<ffffffff8121cf77>] kobject_init_and_add+0x23/0x57
 [<ffffffff8107b800>] ? mark_lock+0x21/0x267
 [<ffffffff813c6d11>] net_rx_queue_update_kobjects+0x63/0xc6
 [<ffffffff813b5e0e>] netif_set_real_num_rx_queues+0x5f/0x78
 [<ffffffffa0261d49>] ixgbe_set_num_queues+0x1c6/0x1ca [ixgbe]
 [<ffffffffa0262509>] ixgbe_init_interrupt_scheme+0x1e/0x79c [ixgbe]
 [<ffffffffa0274596>] ixgbe_dcbnl_set_state+0x167/0x189 [ixgbe]

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a5ff5a89f37..7f902cad10f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -712,15 +712,21 @@ static void rx_queue_release(struct kobject *kobj)
 
 
 	map = rcu_dereference_raw(queue->rps_map);
-	if (map)
+	if (map) {
+		RCU_INIT_POINTER(queue->rps_map, NULL);
 		call_rcu(&map->rcu, rps_map_release);
+	}
 
 	flow_table = rcu_dereference_raw(queue->rps_flow_table);
-	if (flow_table)
+	if (flow_table) {
+		RCU_INIT_POINTER(queue->rps_flow_table, NULL);
 		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
+	}
 
 	if (atomic_dec_and_test(&first->count))
 		kfree(first);
+	else
+		memset(kobj, 0, sizeof(*kobj));
 }
 
 static struct kobj_type rx_queue_ktype = {
-- 
cgit v1.2.2


From 93908d192686d8285dd6441ff855df92a40103d2 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Wed, 17 Nov 2010 01:44:24 +0000
Subject: ipv6: Expose IFLA_PROTINFO timer values in msecs instead of jiffies

IFLA_PROTINFO exposes timer related per device settings in jiffies.
Change it to expose these values in msecs like the sysctl interface
does.

I did not find any users of IFLA_PROTINFO which rely on any of these
values and even if there are, they are likely already broken because
there is no way for them to reliably convert such a value to another
time format.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b41ce0f0d51..52b7df74e4c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3798,8 +3798,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_AUTOCONF] = cnf->autoconf;
 	array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
 	array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
-	array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
-	array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
+	array[DEVCONF_RTR_SOLICIT_INTERVAL] =
+		jiffies_to_msecs(cnf->rtr_solicit_interval);
+	array[DEVCONF_RTR_SOLICIT_DELAY] =
+		jiffies_to_msecs(cnf->rtr_solicit_delay);
 	array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
 #ifdef CONFIG_IPV6_PRIVACY
 	array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
@@ -3813,7 +3815,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
-	array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval;
+	array[DEVCONF_RTR_PROBE_INTERVAL] =
+		jiffies_to_msecs(cnf->rtr_probe_interval);
 #ifdef CONFIG_IPV6_ROUTE_INFO
 	array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
 #endif
-- 
cgit v1.2.2


From 18a31e1e282f9ed563b131526a88162ccbe04ee3 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Wed, 17 Nov 2010 04:12:02 +0000
Subject: ipv6: Expose reachable and retrans timer values as msecs

Expose reachable and retrans timer values in msecs instead of jiffies.
Both timer values are already exposed as msecs in the neighbour table
netlink interface.

The creation timestamp format with increased precision is kept but
cleaned up.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 52b7df74e4c..2fc35b32df9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -98,7 +98,11 @@
 #endif
 
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
-#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
+
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
 
 #define ADDRCONF_TIMER_FUZZ_MINUS	(HZ > 50 ? HZ/50 : 1)
 #define ADDRCONF_TIMER_FUZZ		(HZ / 4)
@@ -3444,10 +3448,8 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
 {
 	struct ifa_cacheinfo ci;
 
-	ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
-			+ TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
-			+ TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+	ci.cstamp = cstamp_delta(cstamp);
+	ci.tstamp = cstamp_delta(tstamp);
 	ci.ifa_prefered = preferred;
 	ci.ifa_valid = valid;
 
@@ -3932,10 +3934,9 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 	NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
 
 	ci.max_reasm_len = IPV6_MAXPLEN;
-	ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
-		    + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	ci.reachable_time = idev->nd_parms->reachable_time;
-	ci.retrans_time = idev->nd_parms->retrans_time;
+	ci.tstamp = cstamp_delta(idev->tstamp);
+	ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
+	ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
 	NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
 
 	nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
-- 
cgit v1.2.2


From 925e277f5221defdc53cbef1ac3ed1803fa32357 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Tue, 16 Nov 2010 09:40:02 +0000
Subject: net: irda: irttp: sync error paths of data- and udata-requests

irttp_data_request() returns meaningful errorcodes, while irttp_udata_request()
just returns -1 in similar situations. Sync the two and the loglevels of the
accompanying output.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irttp.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 6cfaeaf2a37..f6054f9ccbe 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -550,7 +550,7 @@ EXPORT_SYMBOL(irttp_close_tsap);
  */
 int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
 {
-	int ret = -1;
+	int ret;
 
 	IRDA_ASSERT(self != NULL, return -1;);
 	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
@@ -566,13 +566,14 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
 
 	/* Check that nothing bad happens */
 	if (!self->connected) {
-		IRDA_DEBUG(1, "%s(), Not connected\n", __func__);
+		IRDA_WARNING("%s(), Not connected\n", __func__);
+		ret = -ENOTCONN;
 		goto err;
 	}
 
 	if (skb->len > self->max_seg_size) {
-		IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n",
-			   __func__);
+		IRDA_ERROR("%s(), UData is too large for IrLAP!\n", __func__);
+		ret = -EMSGSIZE;
 		goto err;
 	}
 
-- 
cgit v1.2.2


From dba4490d22a496f9b7c21919cf3effbed5851213 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 18 Nov 2010 08:20:57 +0000
Subject: netfilter: fix IP_VS dependencies

When NF_CONNTRACK is enabled, IP_VS uses conntrack symbols.
Therefore IP_VS can't be linked statically when conntrack
is built modular.

Reported-by: Justin P. Mattock <justinmattock@gmail.com>
Tested-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index a22dac22705..70bd1d0774c 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -4,6 +4,7 @@
 menuconfig IP_VS
 	tristate "IP virtual server support"
 	depends on NET && INET && NETFILTER
+	depends on (NF_CONNTRACK || NF_CONNTRACK=n)
 	---help---
 	  IP Virtual Server support will let you build a high-performance
 	  virtual server based on cluster of two or more real servers. This
-- 
cgit v1.2.2


From 0302b8622ce696af1cda22fcf207d3793350e896 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 18 Nov 2010 13:02:37 +0000
Subject: net: fix kernel-doc for sk_filter_rcu_release

Fix kernel-doc warning for sk_filter_rcu_release():

Warning(net/core/filter.c:586): missing initial short description on line:
 * 	sk_filter_rcu_release: Release a socket filter by rcu_head

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc:	"David S. Miller" <davem@davemloft.net>
Cc:	netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 23e9b2a6b4c..c1ee800bc08 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -589,7 +589,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 EXPORT_SYMBOL(sk_chk_filter);
 
 /**
- * 	sk_filter_rcu_release: Release a socket filter by rcu_head
+ * 	sk_filter_rcu_release - Release a socket filter by rcu_head
  *	@rcu: rcu_head that contains the sk_filter to free
  */
 static void sk_filter_rcu_release(struct rcu_head *rcu)
-- 
cgit v1.2.2


From 7a1c8e5ab120a5f352e78bbc1fa5bb64e6f23639 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 20 Nov 2010 07:46:35 +0000
Subject: net: allow GFP_HIGHMEM in __vmalloc()

We forgot to use __GFP_HIGHMEM in several __vmalloc() calls.

In ceph, add the missing flag.

In fib_trie.c, xfrm_hash.c and request_sock.c, using vzalloc() is
cleaner and allows using HIGHMEM pages as well.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ceph/buffer.c       | 2 +-
 net/core/request_sock.c | 4 +---
 net/ipv4/fib_trie.c     | 2 +-
 net/xfrm/xfrm_hash.c    | 2 +-
 4 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 53d8abfa25d..bf3e6a13c21 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -19,7 +19,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
 	if (b->vec.iov_base) {
 		b->is_vmalloc = false;
 	} else {
-		b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL);
+		b->vec.iov_base = __vmalloc(len, gfp | __GFP_HIGHMEM, PAGE_KERNEL);
 		if (!b->vec.iov_base) {
 			kfree(b);
 			return NULL;
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 7552495aff7..fceeb37d716 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -45,9 +45,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
 	nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
 	lopt_size += nr_table_entries * sizeof(struct request_sock *);
 	if (lopt_size > PAGE_SIZE)
-		lopt = __vmalloc(lopt_size,
-			GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
-			PAGE_KERNEL);
+		lopt = vzalloc(lopt_size);
 	else
 		lopt = kzalloc(lopt_size, GFP_KERNEL);
 	if (lopt == NULL)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 200eb538fbb..0f280348e0f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -365,7 +365,7 @@ static struct tnode *tnode_alloc(size_t size)
 	if (size <= PAGE_SIZE)
 		return kzalloc(size, GFP_KERNEL);
 	else
-		return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+		return vzalloc(size);
 }
 
 static void __tnode_vfree(struct work_struct *arg)
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index a2023ec5232..1e98bc0fe0a 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -19,7 +19,7 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz)
 	if (sz <= PAGE_SIZE)
 		n = kzalloc(sz, GFP_KERNEL);
 	else if (hashdist)
-		n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+		n = vzalloc(sz);
 	else
 		n = (struct hlist_head *)
 			__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
-- 
cgit v1.2.2


From 88b2a9a3d98a19496d64aadda7158c0ad51cbe7d Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Mon, 15 Nov 2010 20:29:21 +0000
Subject: ipv6: fix missing in6_ifa_put in addrconf

Fix ref count bug introduced by

commit 2de795707294972f6c34bae9de713e502c431296
Author: Lorenzo Colitti <lorenzo@google.com>
Date:   Wed Oct 27 18:16:49 2010 +0000

ipv6: addrconf: don't remove address state on ifdown if the address
is being kept

Fix logic so that addrconf_ifdown() decrements the inet6_ifaddr
refcnt correctly with in6_ifa_put().

Reported-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2fc35b32df9..23cc8e1ce8d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2758,13 +2758,13 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			ifa->state = INET6_IFADDR_STATE_DEAD;
 			spin_unlock_bh(&ifa->state_lock);
 
-			if (state == INET6_IFADDR_STATE_DEAD) {
-				in6_ifa_put(ifa);
-			} else {
+			if (state != INET6_IFADDR_STATE_DEAD) {
 				__ipv6_ifa_notify(RTM_DELADDR, ifa);
 				atomic_notifier_call_chain(&inet6addr_chain,
 							   NETDEV_DOWN, ifa);
 			}
+
+			in6_ifa_put(ifa);
 			write_lock_bh(&idev->lock);
 		}
 	}
-- 
cgit v1.2.2


From 5fc43978a79e8021c189660ab63249fd29c5fb32 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 20 Nov 2010 11:13:31 -0500
Subject: SUNRPC: Fix an infinite loop in call_refresh/call_refreshresult

If the rpcauth_refreshcred() call returns an error other than
EACCES, ENOMEM or ETIMEDOUT, we currently end up looping forever
between call_refresh and call_refreshresult.

The correct thing to do here is to exit on all errors except
EAGAIN and ETIMEDOUT, for which case we retry 3 times, then
return EACCES.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9dab9573be4..92ce94f5146 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -989,20 +989,26 @@ call_refreshresult(struct rpc_task *task)
 	dprint_status(task);
 
 	task->tk_status = 0;
-	task->tk_action = call_allocate;
-	if (status >= 0 && rpcauth_uptodatecred(task))
-		return;
+	task->tk_action = call_refresh;
 	switch (status) {
-	case -EACCES:
-		rpc_exit(task, -EACCES);
-		return;
-	case -ENOMEM:
-		rpc_exit(task, -ENOMEM);
+	case 0:
+		if (rpcauth_uptodatecred(task))
+			task->tk_action = call_allocate;
 		return;
 	case -ETIMEDOUT:
 		rpc_delay(task, 3*HZ);
+	case -EAGAIN:
+		status = -EACCES;
+		if (!task->tk_cred_retry)
+			break;
+		task->tk_cred_retry--;
+		dprintk("RPC: %5u %s: retry refresh creds\n",
+				task->tk_pid, __func__);
+		return;
 	}
-	task->tk_action = call_refresh;
+	dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
+				task->tk_pid, __func__, status);
+	rpc_exit(task, status);
 }
 
 /*
-- 
cgit v1.2.2


From c89ad7372232b69fd37edf90d6f5d2a8d6381214 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 1 Nov 2010 19:08:50 +0000
Subject: Bluetooth: Fix not returning proper error in SCO

Return 0 in that situation could lead to errors in the caller.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/sco.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index d0927d1fdad..66b9e5c0523 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -882,7 +882,7 @@ static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type)
 	int lm = 0;
 
 	if (type != SCO_LINK && type != ESCO_LINK)
-		return 0;
+		return -EINVAL;
 
 	BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
 
@@ -908,7 +908,7 @@ static int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
 	BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
 
 	if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
-		return 0;
+		return -EINVAL;
 
 	if (!status) {
 		struct sco_conn *conn;
@@ -927,7 +927,7 @@ static int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
 	BT_DBG("hcon %p reason %d", hcon, reason);
 
 	if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
-		return 0;
+		return -EINVAL;
 
 	sco_conn_del(hcon, bt_err(reason));
 
-- 
cgit v1.2.2


From 9915672d41273f5b77f1b3c29b391ffb7732b84b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 24 Nov 2010 09:15:27 -0800
Subject: af_unix: limit unix_tot_inflight

Vegard Nossum found a unix socket OOM was possible, posting an exploit
program.

My analysis is we can eat all LOWMEM memory before unix_gc() being
called from unix_release_sock(). Moreover, the thread blocked in
unix_gc() can consume huge amount of time to perform cleanup because of
huge working set.

One way to handle this is to have a sensible limit on unix_tot_inflight,
tested from wait_for_unix_gc() and to force a call to unix_gc() if this
limit is hit.

This solves the OOM and also reduce overall latencies, and should not
slowdown normal workloads.

Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/garbage.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c8df6fda0b1..40df93d1cf3 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
 }
 
 static bool gc_in_progress = false;
+#define UNIX_INFLIGHT_TRIGGER_GC 16000
 
 void wait_for_unix_gc(void)
 {
+	/*
+	 * If number of inflight sockets is insane,
+	 * force a garbage collect right now.
+	 */
+	if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+		unix_gc();
 	wait_event(unix_gc_wait, gc_in_progress == false);
 }
 
-- 
cgit v1.2.2


From c39508d6f118308355468314ff414644115a07f3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 24 Nov 2010 11:47:22 -0800
Subject: tcp: Make TCP_MAXSEG minimum more correct.

Use TCP_MIN_MSS instead of constant 64.

Reported-by: Min Zhang <mzhang@mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 08141996948..f15c36a706e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		/* Values greater than interface MTU won't take effect. However
 		 * at the point when this call is done we typically don't yet
 		 * know which interface is going to be used */
-		if (val < 64 || val > MAX_TCP_WINDOW) {
+		if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
 			err = -EINVAL;
 			break;
 		}
-- 
cgit v1.2.2


From fa0e846494792e722d817b9d3d625a4ef4896c96 Mon Sep 17 00:00:00 2001
From: Phil Blundell <philb@gnu.org>
Date: Wed, 24 Nov 2010 11:49:19 -0800
Subject: econet: disallow NULL remote addr for sendmsg(), fixes CVE-2010-3849

Later parts of econet_sendmsg() rely on saddr != NULL, so return early
with EINVAL if NULL was passed otherwise an oops may occur.

Signed-off-by: Phil Blundell <philb@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index f8c1ae4b41f..e366f1bef91 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -297,23 +297,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	mutex_lock(&econet_mutex);
 
-	if (saddr == NULL) {
-		struct econet_sock *eo = ec_sk(sk);
-
-		addr.station = eo->station;
-		addr.net     = eo->net;
-		port	     = eo->port;
-		cb	     = eo->cb;
-	} else {
-		if (msg->msg_namelen < sizeof(struct sockaddr_ec)) {
-			mutex_unlock(&econet_mutex);
-			return -EINVAL;
-		}
-		addr.station = saddr->addr.station;
-		addr.net = saddr->addr.net;
-		port = saddr->port;
-		cb = saddr->cb;
-	}
+        if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) {
+                mutex_unlock(&econet_mutex);
+                return -EINVAL;
+        }
+        addr.station = saddr->addr.station;
+        addr.net = saddr->addr.net;
+        port = saddr->port;
+        cb = saddr->cb;
 
 	/* Look for a device with the right network number. */
 	dev = net2dev_map[addr.net];
@@ -351,7 +342,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 		eb = (struct ec_cb *)&skb->cb;
 
-		/* BUG: saddr may be NULL */
 		eb->cookie = saddr->cookie;
 		eb->sec = *saddr;
 		eb->sent = ec_tx_done;
-- 
cgit v1.2.2


From 16c41745c7b92a243d0874f534c1655196c64b74 Mon Sep 17 00:00:00 2001
From: Phil Blundell <philb@gnu.org>
Date: Wed, 24 Nov 2010 11:49:53 -0800
Subject: econet: fix CVE-2010-3850

Add missing check for capable(CAP_NET_ADMIN) in SIOCSIFADDR operation.

Signed-off-by: Phil Blundell <philb@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index e366f1bef91..d41ba8e56c1 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -661,6 +661,9 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
 	err = 0;
 	switch (cmd) {
 	case SIOCSIFADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
 		edev = dev->ec_ptr;
 		if (edev == NULL) {
 			/* Magic up a new one. */
-- 
cgit v1.2.2


From a27e13d370415add3487949c60810e36069a23a6 Mon Sep 17 00:00:00 2001
From: Phil Blundell <philb@gnu.org>
Date: Wed, 24 Nov 2010 11:51:47 -0800
Subject: econet: fix CVE-2010-3848

Don't declare variable sized array of iovecs on the stack since this
could cause stack overflow if msg->msgiovlen is large.  Instead, coalesce
the user-supplied data into a new buffer and use a single iovec for it.

Signed-off-by: Phil Blundell <philb@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 62 +++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index d41ba8e56c1..13992e1d272 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -31,6 +31,7 @@
 #include <linux/skbuff.h>
 #include <linux/udp.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <net/sock.h>
 #include <net/inet_common.h>
 #include <linux/stat.h>
@@ -276,12 +277,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 #endif
 #ifdef CONFIG_ECONET_AUNUDP
 	struct msghdr udpmsg;
-	struct iovec iov[msg->msg_iovlen+1];
+	struct iovec iov[2];
 	struct aunhdr ah;
 	struct sockaddr_in udpdest;
 	__kernel_size_t size;
-	int i;
 	mm_segment_t oldfs;
+	char *userbuf;
 #endif
 
 	/*
@@ -319,17 +320,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		}
 	}
 
-	if (len + 15 > dev->mtu) {
-		mutex_unlock(&econet_mutex);
-		return -EMSGSIZE;
-	}
-
 	if (dev->type == ARPHRD_ECONET) {
 		/* Real hardware Econet.  We're not worthy etc. */
 #ifdef CONFIG_ECONET_NATIVE
 		unsigned short proto = 0;
 		int res;
 
+		if (len + 15 > dev->mtu) {
+			mutex_unlock(&econet_mutex);
+			return -EMSGSIZE;
+		}
+
 		dev_hold(dev);
 
 		skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev),
@@ -405,6 +406,11 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		return -ENETDOWN;		/* No socket - can't send */
 	}
 
+	if (len > 32768) {
+		err = -E2BIG;
+		goto error;
+	}
+
 	/* Make up a UDP datagram and hand it off to some higher intellect. */
 
 	memset(&udpdest, 0, sizeof(udpdest));
@@ -436,36 +442,26 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	/* tack our header on the front of the iovec */
 	size = sizeof(struct aunhdr);
-	/*
-	 * XXX: that is b0rken.  We can't mix userland and kernel pointers
-	 * in iovec, since on a lot of platforms copy_from_user() will
-	 * *not* work with the kernel and userland ones at the same time,
-	 * regardless of what we do with set_fs().  And we are talking about
-	 * econet-over-ethernet here, so "it's only ARM anyway" doesn't
-	 * apply.  Any suggestions on fixing that code?		-- AV
-	 */
 	iov[0].iov_base = (void *)&ah;
 	iov[0].iov_len = size;
-	for (i = 0; i < msg->msg_iovlen; i++) {
-		void __user *base = msg->msg_iov[i].iov_base;
-		size_t iov_len = msg->msg_iov[i].iov_len;
-		/* Check it now since we switch to KERNEL_DS later. */
-		if (!access_ok(VERIFY_READ, base, iov_len)) {
-			mutex_unlock(&econet_mutex);
-			return -EFAULT;
-		}
-		iov[i+1].iov_base = base;
-		iov[i+1].iov_len = iov_len;
-		size += iov_len;
+
+	userbuf = vmalloc(len);
+	if (userbuf == NULL) {
+		err = -ENOMEM;
+		goto error;
 	}
 
+	iov[1].iov_base = userbuf;
+	iov[1].iov_len = len;
+	err = memcpy_fromiovec(userbuf, msg->msg_iov, len);
+	if (err)
+		goto error_free_buf;
+
 	/* Get a skbuff (no data, just holds our cb information) */
 	if ((skb = sock_alloc_send_skb(sk, 0,
 				       msg->msg_flags & MSG_DONTWAIT,
-				       &err)) == NULL) {
-		mutex_unlock(&econet_mutex);
-		return err;
-	}
+				       &err)) == NULL)
+		goto error_free_buf;
 
 	eb = (struct ec_cb *)&skb->cb;
 
@@ -481,7 +477,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 	udpmsg.msg_name = (void *)&udpdest;
 	udpmsg.msg_namelen = sizeof(udpdest);
 	udpmsg.msg_iov = &iov[0];
-	udpmsg.msg_iovlen = msg->msg_iovlen + 1;
+	udpmsg.msg_iovlen = 2;
 	udpmsg.msg_control = NULL;
 	udpmsg.msg_controllen = 0;
 	udpmsg.msg_flags=0;
@@ -489,9 +485,13 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 	oldfs = get_fs(); set_fs(KERNEL_DS);	/* More privs :-) */
 	err = sock_sendmsg(udpsock, &udpmsg, size);
 	set_fs(oldfs);
+
+error_free_buf:
+	vfree(userbuf);
 #else
 	err = -EPROTOTYPE;
 #endif
+	error:
 	mutex_unlock(&econet_mutex);
 
 	return err;
-- 
cgit v1.2.2


From 4cb6a614ba0e58cae8abdadbf73bcb4d37a3f599 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:23:50 +0000
Subject: Net: ceph: Makefile: Remove unnessary code

Remove the if and else conditional because the code is in mainline and there
is no need in it being there.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ceph/Makefile | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'net')

diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index aab1cabb803..5f19415ec9c 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -1,9 +1,6 @@
 #
 # Makefile for CEPH filesystem.
 #
-
-ifneq ($(KERNELRELEASE),)
-
 obj-$(CONFIG_CEPH_LIB) += libceph.o
 
 libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
@@ -16,22 +13,3 @@ libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
 	ceph_fs.o ceph_strings.o ceph_hash.o \
 	pagevec.o
 
-else
-#Otherwise we were called directly from the command
-# line; invoke the kernel build system.
-
-KERNELDIR ?= /lib/modules/$(shell uname -r)/build
-PWD := $(shell pwd)
-
-default: all
-
-all:
-	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
-
-modules_install:
-	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
-
-clean:
-	$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
-
-endif
-- 
cgit v1.2.2


From 8475ef9fd16cadbfc692f78e608d1941a340beb2 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 22 Nov 2010 03:26:12 +0000
Subject: netns: Don't leak others' openreq-s in proc

The /proc/net/tcp leaks openreq sockets from other namespaces.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 69ccbc1dde9..e13da6de1fc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2043,7 +2043,9 @@ get_req:
 	}
 get_sk:
 	sk_nulls_for_each_from(sk, node) {
-		if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
+		if (!net_eq(sock_net(sk), net))
+			continue;
+		if (sk->sk_family == st->family) {
 			cur = sk;
 			goto out;
 		}
-- 
cgit v1.2.2


From 0147fc058d11bd4009b126d09974d2c8f48fef15 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 22 Nov 2010 12:54:21 +0000
Subject: tcp: restrict net.ipv4.tcp_adv_win_scale (#20312)

tcp_win_from_space() does the following:

      if (sysctl_tcp_adv_win_scale <= 0)
              return space >> (-sysctl_tcp_adv_win_scale);
      else
              return space - (space >> sysctl_tcp_adv_win_scale);

"space" is int.

As per C99 6.5.7 (3) shifting int for 32 or more bits is
undefined behaviour.

Indeed, if sysctl_tcp_adv_win_scale is exactly 32,
space >> 32 equals space and function returns 0.

Which means we busyloop in tcp_fixup_rcvbuf().

Restrict net.ipv4.tcp_adv_win_scale to [-31, 31].

Fix https://bugzilla.kernel.org/show_bug.cgi?id=20312

Steps to reproduce:

      echo 32 >/proc/sys/net/ipv4/tcp_adv_win_scale
      wget www.kernel.org
      [softlockup]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e91911d7aae..1b4ec21497a 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -26,6 +26,8 @@ static int zero;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
+static int tcp_adv_win_scale_min = -31;
+static int tcp_adv_win_scale_max = 31;
 
 /* Update system visible IP port range */
 static void set_local_port_range(int range[2])
@@ -426,7 +428,9 @@ static struct ctl_table ipv4_table[] = {
 		.data		= &sysctl_tcp_adv_win_scale,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &tcp_adv_win_scale_min,
+		.extra2		= &tcp_adv_win_scale_max,
 	},
 	{
 		.procname	= "tcp_tw_reuse",
-- 
cgit v1.2.2


From 0ac78870220b6e0ac74dd9292bcfa7b18718babd Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 23 Nov 2010 02:36:56 +0000
Subject: dccp: fix error in updating the GAR

This fixes a bug in updating the Greatest Acknowledgment number Received (GAR):
the current implementation does not track the greatest received value -
lower values in the range AWL..AWH (RFC 4340, 7.5.1) erase higher ones.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 265985370fa..e424a09e83f 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -239,7 +239,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
 		dccp_update_gsr(sk, seqno);
 
 		if (dh->dccph_type != DCCP_PKT_SYNC &&
-		    (ackno != DCCP_PKT_WITHOUT_ACK_SEQ))
+		    ackno != DCCP_PKT_WITHOUT_ACK_SEQ &&
+		    after48(ackno, dp->dccps_gar))
 			dp->dccps_gar = ackno;
 	} else {
 		unsigned long now = jiffies;
-- 
cgit v1.2.2


From 3c6f27bf33052ea6ba9d82369fb460726fb779c0 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Tue, 23 Nov 2010 11:02:13 +0000
Subject: DECnet: don't leak uninitialized stack byte

A single uninitialized padding byte is leaked to userspace.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
CC: stable <stable@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/af_decnet.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index a76b78de679..6f97268ed85 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1556,6 +1556,8 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
 			if (r_len > sizeof(struct linkinfo_dn))
 				r_len = sizeof(struct linkinfo_dn);
 
+			memset(&link, 0, sizeof(link));
+
 			switch(sock->state) {
 				case SS_CONNECTING:
 					link.idn_linkstate = LL_CONNECTING;
-- 
cgit v1.2.2


From b4ff3c90e6066bacc8a92111752fe9e4f4c45cca Mon Sep 17 00:00:00 2001
From: Nagendra Tomar <tomer_iisc@yahoo.com>
Date: Fri, 26 Nov 2010 14:26:27 +0000
Subject: inet: Fix __inet_inherit_port() to correctly increment bsockets and
 num_owners

inet sockets corresponding to passive connections are added to the bind hash
using ___inet_inherit_port(). These sockets are later removed from the bind
hash using __inet_put_port(). These two functions are not exactly symmetrical.
__inet_put_port() decrements hashinfo->bsockets and tb->num_owners, whereas
___inet_inherit_port() does not increment them. This results in both of these
going to -ve values.

This patch fixes this by calling inet_bind_hash() from ___inet_inherit_port(),
which does the right thing.

'bsockets' and 'num_owners' were introduced by commit a9d8f9110d7e953c
(inet: Allowing more than 64k connections and heavily optimize bind(0))

Signed-off-by: Nagendra Singh Tomar <tomer_iisc@yahoo.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1b344f30b46..3c0369a3a66 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
 			}
 		}
 	}
-	sk_add_bind_node(child, &tb->owners);
-	inet_csk(child)->icsk_bind_hash = tb;
+	inet_bind_hash(child, tb, port);
 	spin_unlock(&head->lock);
 
 	return 0;
-- 
cgit v1.2.2


From 25888e30319f8896fc656fc68643e6a078263060 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 25 Nov 2010 04:11:39 +0000
Subject: af_unix: limit recursion level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Its easy to eat all kernel memory and trigger NMI watchdog, using an
exploit program that queues unix sockets on top of others.

lkml ref : http://lkml.org/lkml/2010/11/25/8

This mechanism is used in applications, one choice we have is to have a
recursion limit.

Other limits might be needed as well (if we queue other types of files),
since the passfd mechanism is currently limited by socket receive queue
sizes only.

Add a recursion_level to unix socket, allowing up to 4 levels.

Each time we send an unix socket through sendfd mechanism, we copy its
recursion level (plus one) to receiver. This recursion level is cleared
when socket receive queue is emptied.

Reported-by: Марк Коренберг <socketpair@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++++++-----
 net/unix/garbage.c |  2 +-
 2 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3c95304a081..2268e679812 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1343,9 +1343,25 @@ static void unix_destruct_scm(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
+#define MAX_RECURSION_LEVEL 4
+
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
+	unsigned char max_level = 0;
+	int unix_sock_count = 0;
+
+	for (i = scm->fp->count - 1; i >= 0; i--) {
+		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
+
+		if (sk) {
+			unix_sock_count++;
+			max_level = max(max_level,
+					unix_sk(sk)->recursion_level);
+		}
+	}
+	if (unlikely(max_level > MAX_RECURSION_LEVEL))
+		return -ETOOMANYREFS;
 
 	/*
 	 * Need to duplicate file references for the sake of garbage
@@ -1356,9 +1372,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	if (!UNIXCB(skb).fp)
 		return -ENOMEM;
 
-	for (i = scm->fp->count-1; i >= 0; i--)
-		unix_inflight(scm->fp->fp[i]);
-	return 0;
+	if (unix_sock_count) {
+		for (i = scm->fp->count - 1; i >= 0; i--)
+			unix_inflight(scm->fp->fp[i]);
+	}
+	return max_level;
 }
 
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1393,6 +1411,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct sk_buff *skb;
 	long timeo;
 	struct scm_cookie tmp_scm;
+	int max_level;
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1431,8 +1450,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto out;
 
 	err = unix_scm_to_skb(siocb->scm, skb, true);
-	if (err)
+	if (err < 0)
 		goto out_free;
+	max_level = err + 1;
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
@@ -1514,6 +1534,8 @@ restart:
 	if (sock_flag(other, SOCK_RCVTSTAMP))
 		__net_timestamp(skb);
 	skb_queue_tail(&other->sk_receive_queue, skb);
+	if (max_level > unix_sk(other)->recursion_level)
+		unix_sk(other)->recursion_level = max_level;
 	unix_state_unlock(other);
 	other->sk_data_ready(other, len);
 	sock_put(other);
@@ -1544,6 +1566,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	int sent = 0;
 	struct scm_cookie tmp_scm;
 	bool fds_sent = false;
+	int max_level;
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1607,10 +1630,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 		/* Only send the fds in the first buffer */
 		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
-		if (err) {
+		if (err < 0) {
 			kfree_skb(skb);
 			goto out_err;
 		}
+		max_level = err + 1;
 		fds_sent = true;
 
 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
@@ -1626,6 +1650,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			goto pipe_err_free;
 
 		skb_queue_tail(&other->sk_receive_queue, skb);
+		if (max_level > unix_sk(other)->recursion_level)
+			unix_sk(other)->recursion_level = max_level;
 		unix_state_unlock(other);
 		other->sk_data_ready(other, size);
 		sent += size;
@@ -1845,6 +1871,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		unix_state_lock(sk);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb == NULL) {
+			unix_sk(sk)->recursion_level = 0;
 			if (copied >= target)
 				goto unlock;
 
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 40df93d1cf3..f89f83bf828 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
 unsigned int unix_tot_inflight;
 
 
-static struct sock *unix_get_socket(struct file *filp)
+struct sock *unix_get_socket(struct file *filp)
 {
 	struct sock *u_sock = NULL;
 	struct inode *inode = filp->f_path.dentry->d_inode;
-- 
cgit v1.2.2


From 7dff3125534c1d035a910052335a3a39fbb31aa7 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Fri, 26 Nov 2010 20:41:55 +0200
Subject: mac80211: Fix frame injection using non-AP vif

In order for frame injection to work properly for some use cases
(e.g., finding the station entry and keys for encryption), mac80211
needs to find the correct sdata entry. This works when the main vif
is in AP mode, but commit a2c1e3dad516618cb0fbfb1a62c36d0b0744573a
broke this particular use case for station main vif. While this type of
injection is quite unusual operation, it has some uses and we should fix
it. Do this by changing the monitor vif sdata selection to allow station
vif to be selected instead of limiting it to just AP vifs. We still need
to skip some iftypes to avoid selecting unsuitable vif for injection.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 96c59430950..df6aac52353 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1587,7 +1587,12 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 						list) {
 				if (!ieee80211_sdata_running(tmp_sdata))
 					continue;
-				if (tmp_sdata->vif.type != NL80211_IFTYPE_AP)
+				if (tmp_sdata->vif.type ==
+				    NL80211_IFTYPE_MONITOR ||
+				    tmp_sdata->vif.type ==
+				    NL80211_IFTYPE_AP_VLAN ||
+					tmp_sdata->vif.type ==
+				    NL80211_IFTYPE_WDS)
 					continue;
 				if (compare_ether_addr(tmp_sdata->vif.addr,
 						       hdr->addr2) == 0) {
-- 
cgit v1.2.2


From 2c31333a8fde7e26936a9f5371d02ff12c490993 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Mon, 29 Nov 2010 20:53:23 +0100
Subject: mac80211: ignore non-bcast mcast deauth/disassoc franes

This patch fixes an curious issue due to insufficient
rx frame filtering.

Saqeb Akhter reported frequent disconnects while streaming
videos over samba: <http://marc.info/?m=128600031109136>
> [ 1166.512087] wlan1: deauthenticated from 30:46:9a:10:49:f7 (Reason: 7)
> [ 1526.059997] wlan1: deauthenticated from 30:46:9a:10:49:f7 (Reason: 7)
> [ 2125.324356] wlan1: deauthenticated from 30:46:9a:10:49:f7 (Reason: 7)
> [...]

The reason is that the device generates frames with slightly
bogus SA/TA addresses.

e.g.:
 [ 2314.402316] Ignore 9f:1f:31:f8:64:ff
 [ 2314.402321] Ignore 9f:1f:31:f8:64:ff
 [ 2352.453804] Ignore 0d:1f:31:f8:64:ff
 [ 2352.453808] Ignore 0d:1f:31:f8:64:ff
 					   ^^ the group-address flag is set!
 (the correct SA/TA would be: 00:1f:31:f8:64:ff)

Since the AP does not know from where the frames come, it
generates a DEAUTH response for the (invalid) mcast address.
This mcast deauth frame then passes through all filters and
tricks the stack into thinking that the AP brutally kicked
us!

This patch fixes the problem by simply ignoring
non-broadcast, group-addressed deauth/disassoc frames.

Cc: Jouni Malinen <j@w1.fi>
Cc: Johannes Berg <johannes@sipsolutions.net>
Reported-by: Saqeb Akhter <saqeb.akhter@gmail.com>
Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 902b03ee8f6..3c87293cb07 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2247,6 +2247,10 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 		break;
 	case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
 	case cpu_to_le16(IEEE80211_STYPE_DISASSOC):
+		if (is_multicast_ether_addr(mgmt->da) &&
+		    !is_broadcast_ether_addr(mgmt->da))
+			return RX_DROP_MONITOR;
+
 		/* process only for station */
 		if (sdata->vif.type != NL80211_IFTYPE_STATION)
 			return RX_DROP_MONITOR;
-- 
cgit v1.2.2


From 8e26d5ad2f9c038609d42eebc676cd1107709eef Mon Sep 17 00:00:00 2001
From: Senthil Balasubramanian <senthilkumar@atheros.com>
Date: Tue, 30 Nov 2010 20:15:38 +0530
Subject: mac80211: Fix STA disconnect due to MIC failure

Th commit titled "mac80211: clean up rx handling wrt. found_sta"
removed found_sta variable which caused a MIC failure event
to be reported twice for a single failure to supplicant resulted
in STA disconnect.

This should fix WPA specific countermeasures WiFi test case (5.2.17)
issues with mac80211 based drivers which report MIC failure events in
rx status.

Cc: Stable <stable@kernel.org> (2.6.37)
Signed-off-by: Senthil Balasubramanian <senthilkumar@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3c87293cb07..54fb4a0e76f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2745,6 +2745,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 
 			if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
 				return;
+			goto out;
 		}
 	}
 
@@ -2784,6 +2785,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 			return;
 	}
 
+ out:
 	dev_kfree_skb(skb);
 }
 
-- 
cgit v1.2.2


From 381601e5bbae78d7c18d946fe874a63957edea13 Mon Sep 17 00:00:00 2001
From: Anders Franzen <Anders.Franzen@ericsson.com>
Date: Wed, 24 Nov 2010 05:47:18 +0000
Subject: Make the ip6_tunnel reflect the true mtu.

The ip6_tunnel always assumes it consumes 40 bytes (ip6 hdr) of the mtu of the
underlaying device. So for a normal ethernet bearer, the mtu of the ip6_tunnel is
1460.
However, when creating a tunnel the encap limit option is enabled by default, and it
consumes 8 bytes more, so the true mtu shall be 1452.

I dont really know if this breaks some statement in some RFC, so this is a request for
comments.

Signed-off-by: Anders Franzen <anders.franzen@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2a59610c2a5..70e891a20fb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1175,6 +1175,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 				sizeof (struct ipv6hdr);
 
 			dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
+			if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+				dev->mtu-=8;
 
 			if (dev->mtu < IPV6_MIN_MTU)
 				dev->mtu = IPV6_MIN_MTU;
@@ -1363,12 +1365,17 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
 
 static void ip6_tnl_dev_setup(struct net_device *dev)
 {
+	struct ip6_tnl *t;
+
 	dev->netdev_ops = &ip6_tnl_netdev_ops;
 	dev->destructor = ip6_dev_free;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
 	dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
+	t = netdev_priv(dev);
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		dev->mtu-=8;
 	dev->flags |= IFF_NOARP;
 	dev->addr_len = sizeof(struct in6_addr);
 	dev->features |= NETIF_F_NETNS_LOCAL;
-- 
cgit v1.2.2


From 6dcdd1b3694a4fa2b85167a9c860c7613a7553c7 Mon Sep 17 00:00:00 2001
From: David McCullough <david_mccullough@mcafee.com>
Date: Mon, 29 Nov 2010 19:32:34 +0000
Subject: net/ipv6/sit.c: return unhandled skb to tunnel4_rcv

I found a problem using an IPv6 over IPv4 tunnel.  When CONFIG_IPV6_SIT
was enabled, the packets would be rejected as net/ipv6/sit.c was catching
all IPPROTO_IPV6 packets and returning an ICMP port unreachable error.

I think this patch fixes the problem cleanly.  I believe the code in
net/ipv4/tunnel4.c:tunnel4_rcv takes care of it properly if none of the
handlers claim the skb.

Signed-off-by: David McCullough <david_mccullough@mcafee.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d6bfaec3bbb..8c4d00c7cd2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -606,8 +606,9 @@ static int ipip6_rcv(struct sk_buff *skb)
 		return 0;
 	}
 
-	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+	/* no tunnel matched,  let upstream know, ipsec may handle it */
 	rcu_read_unlock();
+	return 1;
 out:
 	kfree_skb(skb);
 	return 0;
-- 
cgit v1.2.2


From 0bae35e14b68f5e7075bc96e5ea608b42bdf8f59 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 2 Dec 2010 14:31:14 -0800
Subject: leds: fix up dependencies

It's not useful to build LED triggers when there's no LEDs that can be
triggered by them.  Therefore, fix up the dependencies so that this
cannot happen, and fix a few users that select triggers to depend on
LEDS_CLASS as well (there is also one user that also selects LEDS_CLASS,
which is OK).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Tested-by: Ingo Molnar <mingo@elte.hu>
Cc: Arnd Hannemann <arnd@arndnet.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Richard Purdie <rpurdie@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/mac80211/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 4d6f8653ec8..8e8ea9cb709 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -92,7 +92,7 @@ config MAC80211_MESH
 config MAC80211_LEDS
 	bool "Enable LED triggers"
 	depends on MAC80211
-	select NEW_LEDS
+	depends on LEDS_CLASS
 	select LEDS_TRIGGERS
 	---help---
 	  This option enables a few LED triggers for different
-- 
cgit v1.2.2


From 46bcf14f44d8f31ecfdc8b6708ec15a3b33316d9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 6 Dec 2010 09:29:43 -0800
Subject: filter: fix sk_filter rcu handling

Pavel Emelyanov tried to fix a race between sk_filter_(de|at)tach and
sk_clone() in commit 47e958eac280c263397

Problem is we can have several clones sharing a common sk_filter, and
these clones might want to sk_filter_attach() their own filters at the
same time, and can overwrite old_filter->rcu, corrupting RCU queues.

We can not use filter->rcu without being sure no other thread could do
the same thing.

Switch code to a more conventional ref-counting technique : Do the
atomic decrement immediately and queue one rcu call back when last
reference is released.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index c1ee800bc08..ae21a0d3c4a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -589,23 +589,16 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 EXPORT_SYMBOL(sk_chk_filter);
 
 /**
- * 	sk_filter_rcu_release - Release a socket filter by rcu_head
+ * 	sk_filter_release_rcu - Release a socket filter by rcu_head
  *	@rcu: rcu_head that contains the sk_filter to free
  */
-static void sk_filter_rcu_release(struct rcu_head *rcu)
+void sk_filter_release_rcu(struct rcu_head *rcu)
 {
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
-	sk_filter_release(fp);
-}
-
-static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
-{
-	unsigned int size = sk_filter_len(fp);
-
-	atomic_sub(size, &sk->sk_omem_alloc);
-	call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
+	kfree(fp);
 }
+EXPORT_SYMBOL(sk_filter_release_rcu);
 
 /**
  *	sk_attach_filter - attach a socket filter
@@ -649,7 +642,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	rcu_assign_pointer(sk->sk_filter, fp);
 
 	if (old_fp)
-		sk_filter_delayed_uncharge(sk, old_fp);
+		sk_filter_uncharge(sk, old_fp);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(sk_attach_filter);
@@ -663,7 +656,7 @@ int sk_detach_filter(struct sock *sk)
 					   sock_owned_by_user(sk));
 	if (filter) {
 		rcu_assign_pointer(sk->sk_filter, NULL);
-		sk_filter_delayed_uncharge(sk, filter);
+		sk_filter_uncharge(sk, filter);
 		ret = 0;
 	}
 	return ret;
-- 
cgit v1.2.2


From ed2849d3ecfa339435818eeff28f6c3424300cec Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 16 Nov 2010 16:55:19 +1100
Subject: sunrpc: prevent use-after-free on clearing XPT_BUSY

When an xprt is created, it has a refcount of 1, and XPT_BUSY is set.
The refcount is *not* owned by the thread that created the xprt
(as is clear from the fact that creators never put the reference).
Rather, it is owned by the absence of XPT_DEAD.  Once XPT_DEAD is set,
(And XPT_BUSY is clear) that initial reference is dropped and the xprt
can be freed.

So when a creator clears XPT_BUSY it is dropping its only reference and
so must not touch the xprt again.

However svc_recv, after calling ->xpo_accept (and so getting an XPT_BUSY
reference on a new xprt), calls svc_xprt_recieved.  This clears
XPT_BUSY and then svc_xprt_enqueue - this last without owning a reference.
This is dangerous and has been seen to leave svc_xprt_enqueue working
with an xprt containing garbage.

So we need to hold an extra counted reference over that call to
svc_xprt_received.

For safety, any time we clear XPT_BUSY and then use the xprt again, we
first get a reference, and the put it again afterwards.

Note that svc_close_all does not need this extra protection as there are
no threads running, and the final free can only be called asynchronously
from such a thread.

Signed-off-by: NeilBrown <neilb@suse.de>
Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index ea2ff78dcf7..3f2c5559ca1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -212,6 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
 	spin_lock(&svc_xprt_class_lock);
 	list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
 		struct svc_xprt *newxprt;
+		unsigned short newport;
 
 		if (strcmp(xprt_name, xcl->xcl_name))
 			continue;
@@ -230,8 +231,9 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
 		spin_lock_bh(&serv->sv_lock);
 		list_add(&newxprt->xpt_list, &serv->sv_permsocks);
 		spin_unlock_bh(&serv->sv_lock);
+		newport = svc_xprt_local_port(newxprt);
 		clear_bit(XPT_BUSY, &newxprt->xpt_flags);
-		return svc_xprt_local_port(newxprt);
+		return newport;
 	}
  err:
 	spin_unlock(&svc_xprt_class_lock);
@@ -425,8 +427,13 @@ void svc_xprt_received(struct svc_xprt *xprt)
 {
 	BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags));
 	xprt->xpt_pool = NULL;
+	/* As soon as we clear busy, the xprt could be closed and
+	 * 'put', so we need a reference to call svc_xprt_enqueue with:
+	 */
+	svc_xprt_get(xprt);
 	clear_bit(XPT_BUSY, &xprt->xpt_flags);
 	svc_xprt_enqueue(xprt);
+	svc_xprt_put(xprt);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_received);
 
-- 
cgit v1.2.2


From b1afde60f2b9ee8444fba4e012dc99a3b28d224d Mon Sep 17 00:00:00 2001
From: Nandita Dukkipati <nanditad@google.com>
Date: Fri, 3 Dec 2010 13:33:44 +0000
Subject: tcp: Bug fix in initialization of receive window.

The bug has to do with boundary checks on the initial receive window.
If the initial receive window falls between init_cwnd and the
receive window specified by the user, the initial window is incorrectly
brought down to init_cwnd. The correct behavior is to allow it to
remain unchanged.

Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 05b1ecf3676..3c59ab42df2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -231,11 +231,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
 		/* when initializing use the value from init_rcv_wnd
 		 * rather than the default from above
 		 */
-		if (init_rcv_wnd &&
-		    (*rcv_wnd > init_rcv_wnd * mss))
-			*rcv_wnd = init_rcv_wnd * mss;
-		else if (*rcv_wnd > init_cwnd * mss)
-			*rcv_wnd = init_cwnd * mss;
+		if (init_rcv_wnd)
+			*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
+		else
+			*rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
 	}
 
 	/* Set the clamp no higher than max representable value */
-- 
cgit v1.2.2


From 35d9b0c906ad92d32a0b8db5daa6fabfcc2f068d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 5 Dec 2010 02:03:26 +0000
Subject: llc: fix a device refcount imbalance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le dimanche 05 décembre 2010 à 12:23 +0100, Eric Dumazet a écrit :
> Le dimanche 05 décembre 2010 à 09:19 +0100, Eric Dumazet a écrit :
>
> > Hmm..
> >
> > If somebody can explain why RTNL is held in arp_ioctl() (and therefore
> > in arp_req_delete()), we might first remove RTNL use in arp_ioctl() so
> > that your patch can be applied.
> >
> > Right now it is not good, because RTNL wont be necessarly held when you
> > are going to call arp_invalidate() ?
>
> While doing this analysis, I found a refcount bug in llc, I'll send a
> patch for net-2.6

Oh well, of course I must first fix the bug in net-2.6, and wait David
pull the fix in net-next-2.6 before sending this rcu conversion.

Note: this patch should be sent to stable teams (2.6.34 and up)

[PATCH net-2.6] llc: fix a device refcount imbalance

commit abf9d537fea225 (llc: add support for SO_BINDTODEVICE) added one
refcount imbalance in llc_ui_bind(), because dev_getbyhwaddr() doesnt
take a reference on device, while dev_get_by_index() does.

Fix this using RCU locking. And since an RCU conversion will be done for
2.6.38 for dev_getbyhwaddr(), put the rcu_read_lock/unlock exactly at
their final place.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: stable@kernel.org
Cc: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 58261299821..e35dbe55f52 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -317,8 +317,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 		goto out;
 	rc = -ENODEV;
 	rtnl_lock();
+	rcu_read_lock();
 	if (sk->sk_bound_dev_if) {
-		llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+		llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
 		if (llc->dev) {
 			if (!addr->sllc_arphrd)
 				addr->sllc_arphrd = llc->dev->type;
@@ -329,13 +330,13 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 			    !llc_mac_match(addr->sllc_mac,
 					   llc->dev->dev_addr)) {
 				rc = -EINVAL;
-				dev_put(llc->dev);
 				llc->dev = NULL;
 			}
 		}
 	} else
 		llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd,
 					   addr->sllc_mac);
+	rcu_read_unlock();
 	rtnl_unlock();
 	if (!llc->dev)
 		goto out;
-- 
cgit v1.2.2


From 0c62fc6dd02c8d793c75ae76a9b6881fc36388ad Mon Sep 17 00:00:00 2001
From: Nelson Elhage <nelhage@ksplice.com>
Date: Wed, 8 Dec 2010 10:13:55 -0800
Subject: econet: Do the correct cleanup after an unprivileged SIOCSIFADDR.

We need to drop the mutex and do a dev_put, so set an error code and break like
the other paths, instead of returning directly.

Signed-off-by: Nelson Elhage <nelhage@ksplice.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 13992e1d272..f180371fa41 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -661,8 +661,10 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
 	err = 0;
 	switch (cmd) {
 	case SIOCSIFADDR:
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
+		if (!capable(CAP_NET_ADMIN)) {
+			err = -EPERM;
+			break;
+		}
 
 		edev = dev->ec_ptr;
 		if (edev == NULL) {
-- 
cgit v1.2.2


From e8d34a884e4ff118920bb57664def8a73b1b784f Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Mon, 6 Dec 2010 02:39:12 +0000
Subject: l2tp: Fix modalias of l2tp_ip

Using the SOCK_DGRAM enum results in
"net-pf-2-proto-SOCK_DGRAM-type-115", so use the numeric value like it
is done in net/dccp.

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0bf6a59545a..522e219f355 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -674,4 +674,8 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("L2TP over IP");
 MODULE_VERSION("1.0");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP);
+
+/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like
+ * enums
+ */
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
-- 
cgit v1.2.2


From 171995e5d82dcc92bea37a7d2a2ecc21068a0f19 Mon Sep 17 00:00:00 2001
From: Apollon Oikonomopoulos <apollon@noc.grnet.gr>
Date: Tue, 7 Dec 2010 09:43:30 +0000
Subject: x25: decrement netdev reference counts on unload

x25 does not decrement the network device reference counts on module unload.
Thus unregistering any pre-existing interface after unloading the x25 module
hangs and results in

 unregister_netdevice: waiting for tap0 to become free. Usage count = 1

This patch decrements the reference counts of all interfaces in x25_link_free,
the way it is already done in x25_link_device_down for NETDEV_DOWN events.

Signed-off-by: Apollon Oikonomopoulos <apollon@noc.grnet.gr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_link.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 73e7b954ad2..b25c6463c3e 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -394,6 +394,7 @@ void __exit x25_link_free(void)
 	list_for_each_safe(entry, tmp, &x25_neigh_list) {
 		nb = list_entry(entry, struct x25_neigh, node);
 		__x25_remove_neigh(nb);
+		dev_put(nb->dev);
 	}
 	write_unlock_bh(&x25_neigh_list_lock);
 }
-- 
cgit v1.2.2


From 67631510a318d5a930055fe927607f483716e100 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 8 Dec 2010 12:16:33 -0800
Subject: tcp: Replace time wait bucket msg by counter

Rather than printing the message to the log, use a mib counter to keep
track of the count of occurences of time wait bucket overflow.  Reduces
spam in logs.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/proc.c          | 1 +
 net/ipv4/tcp_minisocks.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 1b48eb1ed45..b14ec7d03b6 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
 	SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
 	SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
+	SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 43cf901d765..a66735f7596 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -347,7 +347,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		 * socket up.  We've got bigger problems than
 		 * non-graceful socket closings.
 		 */
-		LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n");
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
 	}
 
 	tcp_update_metrics(sk);
-- 
cgit v1.2.2


From 7e2447075690860e2cea96b119fc9cadbaa7e83c Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Thu, 2 Dec 2010 18:44:09 +0100
Subject: mac80211: Fix BUG in pskb_expand_head when transmitting shared skbs

mac80211 doesn't handle shared skbs correctly at the moment. As a result
a possible resize can trigger a BUG in pskb_expand_head.

[  676.030000] Kernel bug detected[#1]:
[  676.030000] Cpu 0
[  676.030000] $ 0   : 00000000 00000000 819662ff 00000002
[  676.030000] $ 4   : 81966200 00000020 00000000 00000020
[  676.030000] $ 8   : 819662e0 800043c0 00000002 00020000
[  676.030000] $12   : 3b9aca00 00000000 00000000 00470000
[  676.030000] $16   : 80ea2000 00000000 00000000 00000000
[  676.030000] $20   : 818aa200 80ea2018 80ea2000 00000008
[  676.030000] $24   : 00000002 800ace5c
[  676.030000] $28   : 8199a000 8199bd20 81938f88 80f180d4
[  676.030000] Hi    : 0000026e
[  676.030000] Lo    : 0000757e
[  676.030000] epc   : 801245e4 pskb_expand_head+0x44/0x1d8
[  676.030000]     Not tainted
[  676.030000] ra    : 80f180d4 ieee80211_skb_resize+0xb0/0x114 [mac80211]
[  676.030000] Status: 1000a403    KERNEL EXL IE
[  676.030000] Cause : 10800024
[  676.030000] PrId  : 0001964c (MIPS 24Kc)
[  676.030000] Modules linked in: mac80211_hwsim rt2800lib rt2x00soc rt2x00pci rt2x00lib mac80211 crc_itu_t crc_ccitt cfg80211 compat arc4 aes_generic deflate ecb cbc [last unloaded: rt2800pci]
[  676.030000] Process kpktgend_0 (pid: 97, threadinfo=8199a000, task=81879f48, tls=00000000)
[  676.030000] Stack : ffffffff 00000000 00000000 00000014 00000004 80ea2000 00000000 00000000
[  676.030000]         818aa200 80f180d4 ffffffff 0000000a 81879f78 81879f48 81879f48 00000018
[  676.030000]         81966246 80ea2000 818432e0 80f1a420 80203050 81814d98 00000001 81879f48
[  676.030000]         81879f48 00000018 81966246 818432e0 0000001a 8199bdd4 0000001c 80f1b72c
[  676.030000]         80203020 8001292c 80ef4aa2 7f10b55d 801ab5b8 81879f48 00000188 80005c90
[  676.030000]         ...
[  676.030000] Call Trace:
[  676.030000] [<801245e4>] pskb_expand_head+0x44/0x1d8
[  676.030000] [<80f180d4>] ieee80211_skb_resize+0xb0/0x114 [mac80211]
[  676.030000] [<80f1a420>] ieee80211_xmit+0x150/0x22c [mac80211]
[  676.030000] [<80f1b72c>] ieee80211_subif_start_xmit+0x6f4/0x73c [mac80211]
[  676.030000] [<8014361c>] pktgen_thread_worker+0xfac/0x16f8
[  676.030000] [<8002ebe8>] kthread+0x7c/0x88
[  676.030000] [<80008e0c>] kernel_thread_helper+0x10/0x18
[  676.030000]
[  676.030000]
[  676.030000] Code: 24020001  10620005  2502001f <0200000d> 0804917a  00000000  2502001f  00441023  00531021

Fix this by making a local copy of shared skbs prior to mangeling them.
To avoid copying the skb unnecessarily move the skb_copy call below the
checks that don't need write access to the skb.

Also, move the assignment of nh_pos and h_pos below the skb_copy to point
to the correct skb.

It would be possible to avoid another resize of the copied skb by using
skb_copy_expand instead of skb_copy but that would make the patch more
complex. Also, shared skbs are a corner case right now, so the resize
shouldn't matter much.

Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index df6aac52353..7a637b80a62 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1737,15 +1737,13 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 	int nh_pos, h_pos;
 	struct sta_info *sta = NULL;
 	u32 sta_flags = 0;
+	struct sk_buff *tmp_skb;
 
 	if (unlikely(skb->len < ETH_HLEN)) {
 		ret = NETDEV_TX_OK;
 		goto fail;
 	}
 
-	nh_pos = skb_network_header(skb) - skb->data;
-	h_pos = skb_transport_header(skb) - skb->data;
-
 	/* convert Ethernet header to proper 802.11 header (based on
 	 * operation mode) */
 	ethertype = (skb->data[12] << 8) | skb->data[13];
@@ -1918,6 +1916,20 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		goto fail;
 	}
 
+	/*
+	 * If the skb is shared we need to obtain our own copy.
+	 */
+	if (skb_shared(skb)) {
+		tmp_skb = skb;
+		skb = skb_copy(skb, GFP_ATOMIC);
+		kfree_skb(tmp_skb);
+
+		if (!skb) {
+			ret = NETDEV_TX_OK;
+			goto fail;
+		}
+	}
+
 	hdr.frame_control = fc;
 	hdr.duration_id = 0;
 	hdr.seq_ctrl = 0;
@@ -1936,6 +1948,9 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		encaps_len = 0;
 	}
 
+	nh_pos = skb_network_header(skb) - skb->data;
+	h_pos = skb_transport_header(skb) - skb->data;
+
 	skb_pull(skb, skip_header_bytes);
 	nh_pos -= skip_header_bytes;
 	h_pos -= skip_header_bytes;
-- 
cgit v1.2.2


From ad9f4f50fe9288bbe65b7dfd76d8820afac6a24c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 7 Dec 2010 12:03:55 +0000
Subject: tcp: avoid a possible divide by zero

sysctl_tcp_tso_win_divisor might be set to zero while one cpu runs in
tcp_tso_should_defer(). Make sure we dont allow a divide by zero by
reading sysctl_tcp_tso_win_divisor exactly once.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3c59ab42df2..0d4a3cebfb4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1512,6 +1512,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 send_win, cong_win, limit, in_flight;
+	int win_divisor;
 
 	if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
 		goto send_now;
@@ -1543,13 +1544,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
 		goto send_now;
 
-	if (sysctl_tcp_tso_win_divisor) {
+	win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
+	if (win_divisor) {
 		u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
 
 		/* If at least some fraction of a window is available,
 		 * just use it.
 		 */
-		chunk /= sysctl_tcp_tso_win_divisor;
+		chunk /= win_divisor;
 		if (limit >= chunk)
 			goto send_now;
 	} else {
-- 
cgit v1.2.2


From f19872575ff7819a3723154657a497d9bca66b33 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 7 Dec 2010 12:20:47 +0000
Subject: tcp: protect sysctl_tcp_cookie_size reads

Make sure sysctl_tcp_cookie_size is read once in
tcp_cookie_size_check(), or we might return an illegal value to caller
if sysctl_tcp_cookie_size is changed by another cpu.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: William Allen Simpson <william.allen.simpson@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0d4a3cebfb4..61c2463e275 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -385,27 +385,30 @@ struct tcp_out_options {
  */
 static u8 tcp_cookie_size_check(u8 desired)
 {
-	if (desired > 0) {
+	int cookie_size;
+
+	if (desired > 0)
 		/* previously specified */
 		return desired;
-	}
-	if (sysctl_tcp_cookie_size <= 0) {
+
+	cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
+	if (cookie_size <= 0)
 		/* no default specified */
 		return 0;
-	}
-	if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) {
+
+	if (cookie_size <= TCP_COOKIE_MIN)
 		/* value too small, specify minimum */
 		return TCP_COOKIE_MIN;
-	}
-	if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) {
+
+	if (cookie_size >= TCP_COOKIE_MAX)
 		/* value too large, specify maximum */
 		return TCP_COOKIE_MAX;
-	}
-	if (0x1 & sysctl_tcp_cookie_size) {
+
+	if (cookie_size & 1)
 		/* 8-bit multiple, illegal, fix it */
-		return (u8)(sysctl_tcp_cookie_size + 0x1);
-	}
-	return (u8)sysctl_tcp_cookie_size;
+		cookie_size++;
+
+	return (u8)cookie_size;
 }
 
 /* Write previously computed TCP options to the packet.
-- 
cgit v1.2.2


From 4e085e76cbe558b79b54cbab772f61185879bc64 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 8 Dec 2010 18:42:23 -0800
Subject: econet: Fix crash in aun_incoming().

Unconditional use of skb->dev won't work here,
try to fetch the econet device via skb_dst()->dev
instead.

Suggested by Eric Dumazet.

Reported-by: Nelson Elhage <nelhage@ksplice.com>
Tested-by: Nelson Elhage <nelhage@ksplice.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index f180371fa41..15dcc1a586b 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -851,9 +851,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
 {
 	struct iphdr *ip = ip_hdr(skb);
 	unsigned char stn = ntohl(ip->saddr) & 0xff;
+	struct dst_entry *dst = skb_dst(skb);
+	struct ec_device *edev = NULL;
 	struct sock *sk = NULL;
 	struct sk_buff *newskb;
-	struct ec_device *edev = skb->dev->ec_ptr;
+
+	if (dst)
+		edev = dst->dev->ec_ptr;
 
 	if (! edev)
 		goto bad;
-- 
cgit v1.2.2


From 78347c8c6b2ddf20535bc1b18d749a3bbdea2a60 Mon Sep 17 00:00:00 2001
From: Thomas Egerer <thomas.egerer@secunet.com>
Date: Mon, 6 Dec 2010 23:28:56 +0000
Subject: xfrm: Fix xfrm_state_migrate leak

xfrm_state_migrate calls kfree instead of xfrm_state_put to free
a failed state. According to git commit 553f9118 this can cause
memory leaks.

Signed-off-by: Thomas Egerer <thomas.egerer@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index eb96ce52f17..220ebc05c7a 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1268,7 +1268,7 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
 
 	return xc;
 error:
-	kfree(xc);
+	xfrm_state_put(xc);
 	return NULL;
 }
 EXPORT_SYMBOL(xfrm_state_migrate);
-- 
cgit v1.2.2


From c1249c0aae4c93a753c70496ab2e9a51430a6f02 Mon Sep 17 00:00:00 2001
From: Martin Lucina <mato@kotelna.sk>
Date: Fri, 10 Dec 2010 00:04:05 +0000
Subject: net: Document the kernel_recvmsg() function

Signed-off-by: Martin Lucina <mato@kotelna.sk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 3ca2fd9e372..088fb3fd45e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -732,6 +732,21 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 	return ret;
 }
 
+/**
+ * kernel_recvmsg - Receive a message from a socket (kernel space)
+ * @sock:       The socket to receive the message from
+ * @msg:        Received message
+ * @vec:        Input s/g array for message data
+ * @num:        Size of input s/g array
+ * @size:       Number of bytes to read
+ * @flags:      Message flags (MSG_DONTWAIT, etc...)
+ *
+ * On return the msg structure contains the scatter/gather array passed in the
+ * vec argument. The array is modified so that it consists of the unfilled
+ * portion of the original array.
+ *
+ * The returned value is the total number of bytes received, or an error.
+ */
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size, int flags)
 {
-- 
cgit v1.2.2


From 5f75a1042feca37c0a436ba42a4b1f7f75c35778 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 7 Dec 2010 23:38:31 +0000
Subject: ipv6: fix nl group when advertising a new link

New idev are advertised with NL group RTNLGRP_IPV6_IFADDR, but
should use RTNLGRP_IPV6_IFINFO.
Bug was introduced by commit 8d7a76c9.

Signed-off-by: Wang Xuefu <xuefu.wang@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Thomas Graf <tgraf@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 23cc8e1ce8d..93b7a933a77 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4021,11 +4021,11 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
 	return;
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
+		rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
 }
 
 static inline size_t inet6_prefix_nlmsg_size(void)
-- 
cgit v1.2.2


From 40a010395cd66053f07bffeb3da5e44683bac30e Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 7 Dec 2010 17:11:09 +0000
Subject: SCTP: Fix SCTP_SET_PEER_PRIMARY_ADDR to accpet v4mapped address

SCTP_SET_PEER_PRIMARY_ADDR does not accpet v4mapped address, using
v4mapped address in SCTP_SET_PEER_PRIMARY_ADDR socket option will
get -EADDRNOTAVAIL error if v4map is enabled. This patch try to
fix it by mapping v4mapped address to v4 address if allowed.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6bd554323a3..0b9ee34ad35 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2932,6 +2932,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
 	struct sctp_association	*asoc = NULL;
 	struct sctp_setpeerprim	prim;
 	struct sctp_chunk	*chunk;
+	struct sctp_af		*af;
 	int 			err;
 
 	sp = sctp_sk(sk);
@@ -2959,6 +2960,13 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
 	if (!sctp_state(asoc, ESTABLISHED))
 		return -ENOTCONN;
 
+	af = sctp_get_af_specific(prim.sspp_addr.ss_family);
+	if (!af)
+		return -EINVAL;
+
+	if (!af->addr_valid((union sctp_addr *)&prim.sspp_addr, sp, NULL))
+		return -EADDRNOTAVAIL;
+
 	if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim.sspp_addr))
 		return -EADDRNOTAVAIL;
 
-- 
cgit v1.2.2


From d9ca676bcb26e1fdff9265a3e70f697cd381c889 Mon Sep 17 00:00:00 2001
From: Dan Williams <dcbw@redhat.com>
Date: Wed, 8 Dec 2010 19:40:47 +0000
Subject: atm: correct sysfs 'device' link creation and parent relationships

The ATM subsystem was incorrectly creating the 'device' link for ATM
nodes in sysfs.  This led to incorrect device/parent relationships
exposed by sysfs and udev.  Instead of rolling the 'device' link by hand
in the generic ATM code, pass each ATM driver's bus device down to the
sysfs code and let sysfs do this stuff correctly.

Signed-off-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/atm_sysfs.c | 3 ++-
 net/atm/resources.c | 7 ++++---
 net/atm/resources.h | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 799c631f0fe..f7fa67c7876 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -143,12 +143,13 @@ static struct class atm_class = {
 	.dev_uevent		= atm_uevent,
 };
 
-int atm_register_sysfs(struct atm_dev *adev)
+int atm_register_sysfs(struct atm_dev *adev, struct device *parent)
 {
 	struct device *cdev = &adev->class_dev;
 	int i, j, err;
 
 	cdev->class = &atm_class;
+	cdev->parent = parent;
 	dev_set_drvdata(cdev, adev);
 
 	dev_set_name(cdev, "%s%d", adev->type, adev->number);
diff --git a/net/atm/resources.c b/net/atm/resources.c
index d29e5826151..23f45ce6f35 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -74,8 +74,9 @@ struct atm_dev *atm_dev_lookup(int number)
 }
 EXPORT_SYMBOL(atm_dev_lookup);
 
-struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
-				 int number, unsigned long *flags)
+struct atm_dev *atm_dev_register(const char *type, struct device *parent,
+				 const struct atmdev_ops *ops, int number,
+				 unsigned long *flags)
 {
 	struct atm_dev *dev, *inuse;
 
@@ -115,7 +116,7 @@ struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
 		goto out_fail;
 	}
 
-	if (atm_register_sysfs(dev) < 0) {
+	if (atm_register_sysfs(dev, parent) < 0) {
 		pr_err("atm_register_sysfs failed for dev %s\n", type);
 		atm_proc_dev_deregister(dev);
 		goto out_fail;
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 126fb1840df..521431e3050 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -42,6 +42,6 @@ static inline void atm_proc_dev_deregister(struct atm_dev *dev)
 
 #endif /* CONFIG_PROC_FS */
 
-int atm_register_sysfs(struct atm_dev *adev);
+int atm_register_sysfs(struct atm_dev *adev, struct device *parent);
 void atm_unregister_sysfs(struct atm_dev *adev);
 #endif
-- 
cgit v1.2.2


From a19faf0250e09b16cac169354126404bc8aa342b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 5 Dec 2010 18:50:32 +0000
Subject: net: fix skb_defer_rx_timestamp()

After commit c1f19b51d1d8 (net: support time stamping in phy devices.),
kernel might crash if CONFIG_NETWORK_PHY_TIMESTAMPING=y and
skb_defer_rx_timestamp() handles a packet without an ethernet header.

Fixes kernel bugzilla #24102

Reference: https://bugzilla.kernel.org/show_bug.cgi?id=24102
Reported-and-tested-by: Andrew Watts <akwatts@ymail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/timestamping.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22da85..c19bb4ee405 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -96,11 +96,13 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
 	struct phy_device *phydev;
 	unsigned int type;
 
-	skb_push(skb, ETH_HLEN);
+	if (skb_headroom(skb) < ETH_HLEN)
+		return false;
+	__skb_push(skb, ETH_HLEN);
 
 	type = classify(skb);
 
-	skb_pull(skb, ETH_HLEN);
+	__skb_pull(skb, ETH_HLEN);
 
 	switch (type) {
 	case PTP_CLASS_V1_IPV4:
-- 
cgit v1.2.2


From d96c9043d1588f04c7f467167f653c07d83232d5 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 13 Dec 2010 20:30:28 -0800
Subject: ceph: fix msgr_init error path

create_workqueue() returns NULL on failure.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/messenger.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1c7a2ec4f3c..b6ff4a1519a 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -97,11 +97,9 @@ struct workqueue_struct *ceph_msgr_wq;
 int ceph_msgr_init(void)
 {
 	ceph_msgr_wq = create_workqueue("ceph-msgr");
-	if (IS_ERR(ceph_msgr_wq)) {
-		int ret = PTR_ERR(ceph_msgr_wq);
-		pr_err("msgr_init failed to create workqueue: %d\n", ret);
-		ceph_msgr_wq = NULL;
-		return ret;
+	if (!ceph_msgr_wq) {
+		pr_err("msgr_init failed to create workqueue\n");
+		return -ENOMEM;
 	}
 	return 0;
 }
-- 
cgit v1.2.2


From b6aa5901c7a2bd90d0b6b9866300d2648b2568f3 Mon Sep 17 00:00:00 2001
From: Henry C Chang <henry_c_chang@tcloudcomputing.com>
Date: Wed, 15 Dec 2010 20:45:41 -0800
Subject: ceph: mark user pages dirty on direct-io reads

For read operation, we have to set the argument _write_ of get_user_pages
to 1 since we will write data to pages. Also, we need to SetPageDirty before
releasing these pages.

Signed-off-by: Henry C Chang <henry_c_chang@tcloudcomputing.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/pagevec.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index ac34feeb2b3..01947a5d03b 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -13,7 +13,7 @@
  * build a vector of user pages
  */
 struct page **ceph_get_direct_page_vector(const char __user *data,
-					  int num_pages)
+					  int num_pages, bool write_page)
 {
 	struct page **pages;
 	int rc;
@@ -24,7 +24,7 @@ struct page **ceph_get_direct_page_vector(const char __user *data,
 
 	down_read(&current->mm->mmap_sem);
 	rc = get_user_pages(current, current->mm, (unsigned long)data,
-			    num_pages, 0, 0, pages, NULL);
+			    num_pages, write_page, 0, pages, NULL);
 	up_read(&current->mm->mmap_sem);
 	if (rc < 0)
 		goto fail;
@@ -36,12 +36,15 @@ fail:
 }
 EXPORT_SYMBOL(ceph_get_direct_page_vector);
 
-void ceph_put_page_vector(struct page **pages, int num_pages)
+void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
 {
 	int i;
 
-	for (i = 0; i < num_pages; i++)
+	for (i = 0; i < num_pages; i++) {
+		if (dirty)
+			set_page_dirty_lock(pages[i]);
 		put_page(pages[i]);
+	}
 	kfree(pages);
 }
 EXPORT_SYMBOL(ceph_put_page_vector);
-- 
cgit v1.2.2


From 361cf40519a491f68b28ad90225e4611c4bf8e12 Mon Sep 17 00:00:00 2001
From: Henry C Chang <henry_c_chang@tcloudcomputing.com>
Date: Fri, 17 Dec 2010 09:55:59 -0800
Subject: ceph: handle partial result from get_user_pages

The get_user_pages() helper can return fewer than the requested pages.
Error out in that case, and clean up the partial result.

Signed-off-by: Henry C Chang <henry_c_chang@tcloudcomputing.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/pagevec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 01947a5d03b..1a040e64c69 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -26,12 +26,12 @@ struct page **ceph_get_direct_page_vector(const char __user *data,
 	rc = get_user_pages(current, current->mm, (unsigned long)data,
 			    num_pages, write_page, 0, pages, NULL);
 	up_read(&current->mm->mmap_sem);
-	if (rc < 0)
+	if (rc < num_pages)
 		goto fail;
 	return pages;
 
 fail:
-	kfree(pages);
+	ceph_put_page_vector(pages, rc > 0 ? rc : 0, false);
 	return ERR_PTR(rc);
 }
 EXPORT_SYMBOL(ceph_get_direct_page_vector);
-- 
cgit v1.2.2