From dc7a08166f3a5f23e79e839a8a88849bd3397c32 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 27 Oct 2009 14:41:35 -0400
Subject: nfs: new subdir Documentation/filesystems/nfs

We're adding enough nfs documentation that it may as well have its own
subdirectory.

Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/ipv4/Kconfig    | 6 +++---
 net/ipv4/ipconfig.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 70491d9035eb..0c94a1ac2946 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -166,7 +166,7 @@ config IP_PNP_DHCP
 
 	  If unsure, say Y. Note that if you want to use DHCP, a DHCP server
 	  must be operating on your network.  Read
-	  <file:Documentation/filesystems/nfsroot.txt> for details.
+	  <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
 
 config IP_PNP_BOOTP
 	bool "IP: BOOTP support"
@@ -181,7 +181,7 @@ config IP_PNP_BOOTP
 	  does BOOTP itself, providing all necessary information on the kernel
 	  command line, you can say N here. If unsure, say Y. Note that if you
 	  want to use BOOTP, a BOOTP server must be operating on your network.
-	  Read <file:Documentation/filesystems/nfsroot.txt> for details.
+	  Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
 
 config IP_PNP_RARP
 	bool "IP: RARP support"
@@ -194,7 +194,7 @@ config IP_PNP_RARP
 	  older protocol which is being obsoleted by BOOTP and DHCP), say Y
 	  here. Note that if you want to use RARP, a RARP server must be
 	  operating on your network. Read
-	  <file:Documentation/filesystems/nfsroot.txt> for details.
+	  <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
 
 # not yet ready..
 #   bool '    IP: ARP support' CONFIG_IP_PNP_ARP
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f8d04c256454..7dcbf4706099 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1447,7 +1447,7 @@ late_initcall(ip_auto_config);
 
 /*
  *  Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
- *  command line parameter.  See Documentation/filesystems/nfsroot.txt.
+ *  command line parameter.  See Documentation/filesystems/nfs/nfsroot.txt.
  */
 static int __init ic_proto_name(char *name)
 {
-- 
cgit v1.2.2


From dc83d6e27fa80babe31c80aa8568f125f72edf57 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 20 Oct 2009 18:51:34 -0400
Subject: nfsd4: don't try to map gid's in generic rpc code

For nfsd we provide users the option of mapping uid's to server-side
supplementary group lists.  That makes sense for nfsd, but not
necessarily for other rpc users (such as the callback client).

So move that lookup to svcauth_unix_set_client, which is a
program-specific method.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svcauth_unix.c | 53 +++++++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 117f68a8aa40..97cc3de7432e 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -655,23 +655,25 @@ static struct unix_gid *unix_gid_lookup(uid_t uid)
 		return NULL;
 }
 
-static int unix_gid_find(uid_t uid, struct group_info **gip,
-			 struct svc_rqst *rqstp)
+static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
 {
-	struct unix_gid *ug = unix_gid_lookup(uid);
+	struct unix_gid *ug;
+	struct group_info *gi;
+	int ret;
+
+	ug = unix_gid_lookup(uid);
 	if (!ug)
-		return -EAGAIN;
-	switch (cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle)) {
+		return ERR_PTR(-EAGAIN);
+	ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle);
+	switch (ret) {
 	case -ENOENT:
-		*gip = NULL;
-		return 0;
+		return ERR_PTR(-ENOENT);
 	case 0:
-		*gip = ug->gi;
-		get_group_info(*gip);
+		gi = get_group_info(ug->gi);
 		cache_put(&ug->h, &unix_gid_cache);
-		return 0;
+		return gi;
 	default:
-		return -EAGAIN;
+		return ERR_PTR(-EAGAIN);
 	}
 }
 
@@ -681,6 +683,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 	struct sockaddr_in *sin;
 	struct sockaddr_in6 *sin6, sin6_storage;
 	struct ip_map *ipm;
+	struct group_info *gi;
+	struct svc_cred *cred = &rqstp->rq_cred;
 
 	switch (rqstp->rq_addr.ss_family) {
 	case AF_INET:
@@ -722,6 +726,17 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 			ip_map_cached_put(rqstp, ipm);
 			break;
 	}
+
+	gi = unix_gid_find(cred->cr_uid, rqstp);
+	switch (PTR_ERR(gi)) {
+	case -EAGAIN:
+		return SVC_DROP;
+	case -ENOENT:
+		break;
+	default:
+		put_group_info(cred->cr_group_info);
+		cred->cr_group_info = gi;
+	}
 	return SVC_OK;
 }
 
@@ -818,19 +833,11 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 	slen = svc_getnl(argv);			/* gids length */
 	if (slen > 16 || (len -= (slen + 2)*4) < 0)
 		goto badcred;
-	if (unix_gid_find(cred->cr_uid, &cred->cr_group_info, rqstp)
-	    == -EAGAIN)
+	cred->cr_group_info = groups_alloc(slen);
+	if (cred->cr_group_info == NULL)
 		return SVC_DROP;
-	if (cred->cr_group_info == NULL) {
-		cred->cr_group_info = groups_alloc(slen);
-		if (cred->cr_group_info == NULL)
-			return SVC_DROP;
-		for (i = 0; i < slen; i++)
-			GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
-	} else {
-		for (i = 0; i < slen ; i++)
-			svc_getnl(argv);
-	}
+	for (i = 0; i < slen; i++)
+		GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
 	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
 		*authp = rpc_autherr_badverf;
 		return SVC_DENIED;
-- 
cgit v1.2.2


From 6f8372b69c3198e06cecb1df2cb9682d0c55e657 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 19 Nov 2009 13:26:06 -0800
Subject: RDMA/cm: fix loopback address support

The RDMA CM is intended to support the use of a loopback address
when establishing a connection; however, the behavior of the CM
when loopback addresses are used is confusing and does not always
work, depending on whether loopback was specified by the server,
the client, or both.

The defined behavior of rdma_bind_addr is to associate an RDMA
device with an rdma_cm_id, as long as the user specified a non-
zero address.  (ie they weren't just trying to reserve a port)
Currently, if the loopback address is passed to rdam_bind_addr,
no device is associated with the rdma_cm_id.  Fix this.

If a loopback address is specified by the client as the destination
address for a connection, it will fail to establish a connection.
This is true even if the server is listing across all addresses or
on the loopback address itself.  The issue is that the server tries
to translate the IP address carried in the REQ message to a local
net_device address, which fails.  The translation is not needed in
this case, since the REQ carries the actual HW address that should
be used.

Finally, cleanup loopback support to be more transport neutral.
Replace separate calls to get/set the sgid and dgid from the
device address to a single call that behaves correctly depending
on the format of the device address.  And support both IPv4 and
IPv6 address formats.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>

[ Fixed RDS build by s/ib_addr_get/rdma_addr_get/  - Roland ]

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 net/rds/ib.c | 4 ++--
 net/rds/iw.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib.c b/net/rds/ib.c
index 536ebe5d3f6b..3b8992361042 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -182,8 +182,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 		ic = conn->c_transport_data;
 		dev_addr = &ic->i_cm_id->route.addr.dev_addr;
 
-		ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
-		ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+		rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
+		rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
 
 		rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
 		iinfo->max_send_wr = ic->i_send_ring.w_nr;
diff --git a/net/rds/iw.c b/net/rds/iw.c
index db224f7c2937..b28fa8525b24 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -184,8 +184,8 @@ static int rds_iw_conn_info_visitor(struct rds_connection *conn,
 		ic = conn->c_transport_data;
 		dev_addr = &ic->i_cm_id->route.addr.dev_addr;
 
-		ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
-		ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+		rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
+		rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
 
 		rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
 		iinfo->max_send_wr = ic->i_send_ring.w_nr;
-- 
cgit v1.2.2


From 78c210efdefe07131f91ed512a3308b15bb14e2f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 6 Aug 2009 15:41:34 -0400
Subject: Revert "knfsd: avoid overloading the CPU scheduler with enormous load
 averages"

This reverts commit 59a252ff8c0f2fa32c896f69d56ae33e641ce7ad.

This helps in an entirely cached workload but not necessarily in
workloads that require waiting on disk.

Conflicts:

	include/linux/sunrpc/svc.h
	net/sunrpc/svc_xprt.c

Reported-by: Simon Kirby <sim@hostway.ca>
Tested-by: Jesper Krogh <jesper@krogh.cc>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc_xprt.c | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index df124f78ee48..2c58b75a236f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -16,8 +16,6 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
-#define SVC_MAX_WAKING 5
-
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@ -306,7 +304,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp;
 	int cpu;
-	int thread_avail;
 
 	if (!(xprt->xpt_flags &
 	      ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
@@ -318,6 +315,12 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 
 	spin_lock_bh(&pool->sp_lock);
 
+	if (!list_empty(&pool->sp_threads) &&
+	    !list_empty(&pool->sp_sockets))
+		printk(KERN_ERR
+		       "svc_xprt_enqueue: "
+		       "threads and transports both waiting??\n");
+
 	if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
 		/* Don't enqueue dead transports */
 		dprintk("svc: transport %p is dead, not enqueued\n", xprt);
@@ -358,15 +361,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	}
 
  process:
-	/* Work out whether threads are available */
-	thread_avail = !list_empty(&pool->sp_threads);	/* threads are asleep */
-	if (pool->sp_nwaking >= SVC_MAX_WAKING) {
-		/* too many threads are runnable and trying to wake up */
-		thread_avail = 0;
-		pool->sp_stats.overloads_avoided++;
-	}
-
-	if (thread_avail) {
+	if (!list_empty(&pool->sp_threads)) {
 		rqstp = list_entry(pool->sp_threads.next,
 				   struct svc_rqst,
 				   rq_list);
@@ -381,8 +376,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 		svc_xprt_get(xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
 		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
-		rqstp->rq_waking = 1;
-		pool->sp_nwaking++;
 		pool->sp_stats.threads_woken++;
 		BUG_ON(xprt->xpt_pool != pool);
 		wake_up(&rqstp->rq_wait);
@@ -651,11 +644,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		return -EINTR;
 
 	spin_lock_bh(&pool->sp_lock);
-	if (rqstp->rq_waking) {
-		rqstp->rq_waking = 0;
-		pool->sp_nwaking--;
-		BUG_ON(pool->sp_nwaking < 0);
-	}
 	xprt = svc_xprt_dequeue(pool);
 	if (xprt) {
 		rqstp->rq_xprt = xprt;
@@ -1204,16 +1192,15 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
 	struct svc_pool *pool = p;
 
 	if (p == SEQ_START_TOKEN) {
-		seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n");
+		seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n");
 		return 0;
 	}
 
-	seq_printf(m, "%u %lu %lu %lu %lu %lu\n",
+	seq_printf(m, "%u %lu %lu %lu %lu\n",
 		pool->sp_id,
 		pool->sp_stats.packets,
 		pool->sp_stats.sockets_queued,
 		pool->sp_stats.threads_woken,
-		pool->sp_stats.overloads_avoided,
 		pool->sp_stats.threads_timedout);
 
 	return 0;
-- 
cgit v1.2.2


From feb8ca37cc3d83c07fd042509ef1e176cfeb2cfa Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 3 Dec 2009 08:10:17 -0500
Subject: SUNRPC: Ensure that we honour autoclose before attempting to
 reconnect

If the XPRT_CLOSE_WAIT flag is set, we need to ensure that we call
xprt->ops->close() while holding xprt_lock_write() before we can
start reconnecting.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprt.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index fd46d42afa89..469de292c23c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -700,6 +700,10 @@ void xprt_connect(struct rpc_task *task)
 	}
 	if (!xprt_lock_write(xprt, task))
 		return;
+
+	if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state))
+		xprt->ops->close(xprt);
+
 	if (xprt_connected(xprt))
 		xprt_release_write(xprt, task);
 	else {
-- 
cgit v1.2.2


From f0380f3d16df8f9e2fcd1d8c16fb0d94370bea99 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 3 Dec 2009 08:10:17 -0500
Subject: RPC: Fix two potential races in put_rpccred

It is possible for rpcauth_destroy_credcache() to cause the rpc credentials
to be unhashed while put_rpccred is waiting for the rpc_credcache_lock on
another cpu. Should this happen, then we can end up calling
hlist_del_rcu(&cred->cr_hash) a second time in put_rpccred, thus causing
list corruption.

Should the credential actually be hashed, it is also possible for
rpcauth_lookup_credcache to find and reference it before we get round to
unhashing it. In this case, the call to rpcauth_unhash_cred will fail, and
so we should just exit without destroying the cred.

Reported-by: Neil Brown <neilb@suse.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 54a4e042f104..7ee6f7eaddfb 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -123,16 +123,19 @@ rpcauth_unhash_cred_locked(struct rpc_cred *cred)
 	clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
 }
 
-static void
+static int
 rpcauth_unhash_cred(struct rpc_cred *cred)
 {
 	spinlock_t *cache_lock;
+	int ret;
 
 	cache_lock = &cred->cr_auth->au_credcache->lock;
 	spin_lock(cache_lock);
-	if (atomic_read(&cred->cr_count) == 0)
+	ret = atomic_read(&cred->cr_count) == 0;
+	if (ret)
 		rpcauth_unhash_cred_locked(cred);
 	spin_unlock(cache_lock);
+	return ret;
 }
 
 /*
@@ -446,31 +449,35 @@ void
 put_rpccred(struct rpc_cred *cred)
 {
 	/* Fast path for unhashed credentials */
-	if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
-		goto need_lock;
-
-	if (!atomic_dec_and_test(&cred->cr_count))
+	if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) {
+		if (atomic_dec_and_test(&cred->cr_count))
+			cred->cr_ops->crdestroy(cred);
 		return;
-	goto out_destroy;
-need_lock:
+	}
+
 	if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
 		return;
 	if (!list_empty(&cred->cr_lru)) {
 		number_cred_unused--;
 		list_del_init(&cred->cr_lru);
 	}
-	if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
-		rpcauth_unhash_cred(cred);
 	if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
-		cred->cr_expire = jiffies;
-		list_add_tail(&cred->cr_lru, &cred_unused);
-		number_cred_unused++;
-		spin_unlock(&rpc_credcache_lock);
-		return;
+		if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
+			cred->cr_expire = jiffies;
+			list_add_tail(&cred->cr_lru, &cred_unused);
+			number_cred_unused++;
+			goto out_nodestroy;
+		}
+		if (!rpcauth_unhash_cred(cred)) {
+			/* We were hashed and someone looked us up... */
+			goto out_nodestroy;
+		}
 	}
 	spin_unlock(&rpc_credcache_lock);
-out_destroy:
 	cred->cr_ops->crdestroy(cred);
+	return;
+out_nodestroy:
+	spin_unlock(&rpc_credcache_lock);
 }
 EXPORT_SYMBOL_GPL(put_rpccred);
 
-- 
cgit v1.2.2


From dd1fd90fe65e2e642f0e58e2ff4849f317a6c43d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: SUNRPC: Display compressed (shorthand) IPv6 presentation addresses

Recent changes to snprintf() introduced the %pI6c formatter, which can
display an IPv6 address with standard shorthanding.  Using a
shorthanded address can save us a few bytes of memory for each stored
presentation address, or a few bytes on the wire when sending these in
a universal address.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/addr.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index c7450c8f0a7c..6dcdd2517819 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -55,16 +55,8 @@ static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
 
 	/*
 	 * RFC 4291, Section 2.2.1
-	 *
-	 * To keep the result as short as possible, especially
-	 * since we don't shorthand, we don't want leading zeros
-	 * in each halfword, so avoid %pI6.
 	 */
-	return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x",
-		ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]),
-		ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]),
-		ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]),
-		ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7]));
+	return snprintf(buf, buflen, "%pI6c", addr);
 }
 
 static size_t rpc_ntop6(const struct sockaddr *sap,
-- 
cgit v1.2.2


From 206a134b4d8abf57cd34dffacf993869355b9aac Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: SUNRPC: Check explicitly for tk_status == 0 in call_transmit_status()

The success case, where task->tk_status == 0, is by far the most
frequent case in call_transmit_status().

The default: arm of the switch statement in call_transmit_status()
handles the 0 case.  default: was moved close to the top of the switch
statement in call_transmit_status() under the theory that the compiler
places object code for the earliest arms of a switch statement first,
making the CPU do less work.

The default: arm of a switch statement, however, is executed only
after all the other cases have been checked.  Even if the compiler
rearranges the object code, the default: arm is the "last resort",
meaning all of the other cases have been explicitly exhausted.  That
makes the current arrangement about as inefficient as it gets for the
common case.

To fix this, add an explicit check for zero before the switch
statement.  That forces the compiler to do the zero check first, no
matter what optimizations it might try to do to the switch statement.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 38829e20500b..7bcd931e06ee 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1180,10 +1180,22 @@ static void
 call_transmit_status(struct rpc_task *task)
 {
 	task->tk_action = call_status;
+
+	/*
+	 * Common case: success.  Force the compiler to put this
+	 * test first.
+	 */
+	if (task->tk_status == 0) {
+		xprt_end_transmit(task);
+		rpc_task_force_reencode(task);
+		return;
+	}
+
 	switch (task->tk_status) {
 	case -EAGAIN:
 		break;
 	default:
+		dprint_status(task);
 		xprt_end_transmit(task);
 		/*
 		 * Special cases: if we've been waiting on the
-- 
cgit v1.2.2


From 09a21c4102c8f7893368553273d39c0cadedf9af Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: SUNRPC: Allow RPCs to fail quickly if the server is unreachable

The kernel sometimes makes RPC calls to services that aren't running.
Because the kernel's RPC client always assumes the hard retry semantic
when reconnecting a connection-oriented RPC transport, the underlying
reconnect logic takes a long while to time out, even though the remote
may have responded immediately with ECONNREFUSED.

In certain cases, like upcalls to our local rpcbind daemon, or for NFS
mount requests, we'd like the kernel to fail immediately if the remote
service isn't reachable.  This allows another transport to be tried
immediately, or the pending request can be abandoned quickly.

Introduce a per-request flag which controls how call_transmit_status()
behaves when request transmission fails because the server cannot be
reached.

We don't want soft connection semantics to apply to other errors.  The
default case of the switch statement in call_transmit_status() no
longer falls through; the fall through code is copied to the default
case, and a "break;" is added.

The transport's connection re-establishment timeout is also ignored for
such requests.  We want the request to fail immediately, so the
reconnect delay is skipped.  Additionally, we don't want a connect
failure here to further increase the reconnect timeout value, since
this request will not be retried.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c     | 11 +++++++++--
 net/sunrpc/xprtsock.c |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7bcd931e06ee..68a23583f44c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1197,6 +1197,8 @@ call_transmit_status(struct rpc_task *task)
 	default:
 		dprint_status(task);
 		xprt_end_transmit(task);
+		rpc_task_force_reencode(task);
+		break;
 		/*
 		 * Special cases: if we've been waiting on the
 		 * socket's write_space() callback, or if the
@@ -1204,11 +1206,16 @@ call_transmit_status(struct rpc_task *task)
 		 * then hold onto the transport lock.
 		 */
 	case -ECONNREFUSED:
-	case -ECONNRESET:
-	case -ENOTCONN:
 	case -EHOSTDOWN:
 	case -EHOSTUNREACH:
 	case -ENETUNREACH:
+		if (RPC_IS_SOFTCONN(task)) {
+			xprt_end_transmit(task);
+			rpc_exit(task, task->tk_status);
+			break;
+		}
+	case -ECONNRESET:
+	case -ENOTCONN:
 	case -EPIPE:
 		rpc_task_force_reencode(task);
 	}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 37c5475ba258..ff312f8b018d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2033,7 +2033,7 @@ static void xs_connect(struct rpc_task *task)
 	if (xprt_test_and_set_connecting(xprt))
 		return;
 
-	if (transport->sock != NULL) {
+	if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
 		dprintk("RPC:       xs_connect delayed xprt %p for %lu "
 				"seconds\n",
 				xprt, xprt->reestablish_timeout / HZ);
-- 
cgit v1.2.2


From 5a46211540a83871196489247f57da2bdde58d87 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: SUNRPC: Simplify synopsis of rpcb_local_clnt()

Clean up: At one point, rpcb_local_clnt() handled IPv6 loopback
addresses too, but it doesn't any more; only IPv4 loopback is used
now.  Get rid of the @addr and @addrlen arguments to
rpcb_local_clnt().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 830faf4d9997..28f50da60ceb 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -163,13 +163,12 @@ static const struct sockaddr_in rpcb_inaddr_loopback = {
 	.sin_port		= htons(RPCBIND_PORT),
 };
 
-static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
-					  size_t addrlen, u32 version)
+static struct rpc_clnt *rpcb_create_local(u32 version)
 {
 	struct rpc_create_args args = {
 		.protocol	= XPRT_TRANSPORT_UDP,
-		.address	= addr,
-		.addrsize	= addrlen,
+		.address	= (struct sockaddr *)&rpcb_inaddr_loopback,
+		.addrsize	= sizeof(rpcb_inaddr_loopback),
 		.servername	= "localhost",
 		.program	= &rpcb_program,
 		.version	= version,
@@ -211,14 +210,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 
 static int rpcb_register_call(const u32 version, struct rpc_message *msg)
 {
-	struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback;
-	size_t addrlen = sizeof(rpcb_inaddr_loopback);
 	struct rpc_clnt *rpcb_clnt;
 	int result, error = 0;
 
 	msg->rpc_resp = &result;
 
-	rpcb_clnt = rpcb_create_local(addr, addrlen, version);
+	rpcb_clnt = rpcb_create_local(version);
 	if (!IS_ERR(rpcb_clnt)) {
 		error = rpc_call_sync(rpcb_clnt, msg, 0);
 		rpc_shutdown_client(rpcb_clnt);
-- 
cgit v1.2.2


From c526611dd631b2802b6b0221ffb306c5fa25c86c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: SUNRPC: Use a cached RPC client and transport for rpcbind upcalls

The kernel's rpcbind client creates and deletes an rpc_clnt and its
underlying transport socket for every upcall to the local rpcbind
daemon.

When starting a typical NFS server on IPv4 and IPv6, the NFS service
itself does three upcalls (one per version) times two upcalls (one
per transport) times two upcalls (one per address family), making 12,
plus another one for the initial call to unregister previous NFS
services.  Starting the NLM service adds an additional 13 upcalls,
for similar reasons.

(Currently the NFS service doesn't start IPv6 listeners, but it will
soon enough).

Instead, let's create an rpc_clnt for rpcbind upcalls during the
first local rpcbind query, and cache it.  This saves the overhead of
creating and destroying an rpc_clnt and a socket for every upcall.

The new logic also prevents the kernel from attempting an RPCB_SET or
RPCB_UNSET if it knows from the start that the local portmapper does
not support rpcbind protocol version 4.  This will cut down on the
number of rpcbind upcalls in legacy environments.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/rpcb_clnt.c   | 93 +++++++++++++++++++++++++++++++++++++++---------
 net/sunrpc/sunrpc_syms.c |  3 ++
 2 files changed, 80 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 28f50da60ceb..116db74dd942 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -20,6 +20,7 @@
 #include <linux/in6.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/mutex.h>
 #include <net/ipv6.h>
 
 #include <linux/sunrpc/clnt.h>
@@ -110,6 +111,9 @@ static void			rpcb_getport_done(struct rpc_task *, void *);
 static void			rpcb_map_release(void *data);
 static struct rpc_program	rpcb_program;
 
+static struct rpc_clnt *	rpcb_local_clnt;
+static struct rpc_clnt *	rpcb_local_clnt4;
+
 struct rpcbind_args {
 	struct rpc_xprt *	r_xprt;
 
@@ -163,7 +167,13 @@ static const struct sockaddr_in rpcb_inaddr_loopback = {
 	.sin_port		= htons(RPCBIND_PORT),
 };
 
-static struct rpc_clnt *rpcb_create_local(u32 version)
+static DEFINE_MUTEX(rpcb_create_local_mutex);
+
+/*
+ * Returns zero on success, otherwise a negative errno value
+ * is returned.
+ */
+static int rpcb_create_local(void)
 {
 	struct rpc_create_args args = {
 		.protocol	= XPRT_TRANSPORT_UDP,
@@ -171,12 +181,46 @@ static struct rpc_clnt *rpcb_create_local(u32 version)
 		.addrsize	= sizeof(rpcb_inaddr_loopback),
 		.servername	= "localhost",
 		.program	= &rpcb_program,
-		.version	= version,
+		.version	= RPCBVERS_2,
 		.authflavor	= RPC_AUTH_UNIX,
 		.flags		= RPC_CLNT_CREATE_NOPING,
 	};
+	struct rpc_clnt *clnt, *clnt4;
+	int result = 0;
+
+	if (rpcb_local_clnt)
+		return result;
+
+	mutex_lock(&rpcb_create_local_mutex);
+	if (rpcb_local_clnt)
+		goto out;
+
+	clnt = rpc_create(&args);
+	if (IS_ERR(clnt)) {
+		dprintk("RPC:       failed to create local rpcbind "
+				"client (errno %ld).\n", PTR_ERR(clnt));
+		result = -PTR_ERR(clnt);
+		goto out;
+	}
 
-	return rpc_create(&args);
+	/*
+	 * This results in an RPC ping.  On systems running portmapper,
+	 * the v4 ping will fail.  Proceed anyway, but disallow rpcb
+	 * v4 upcalls.
+	 */
+	clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
+	if (IS_ERR(clnt4)) {
+		dprintk("RPC:       failed to create local rpcbind v4 "
+				"cleint (errno %ld).\n", PTR_ERR(clnt4));
+		clnt4 = NULL;
+	}
+
+	rpcb_local_clnt = clnt;
+	rpcb_local_clnt4 = clnt4;
+
+out:
+	mutex_unlock(&rpcb_create_local_mutex);
+	return result;
 }
 
 static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
@@ -208,20 +252,13 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 	return rpc_create(&args);
 }
 
-static int rpcb_register_call(const u32 version, struct rpc_message *msg)
+static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg)
 {
-	struct rpc_clnt *rpcb_clnt;
 	int result, error = 0;
 
 	msg->rpc_resp = &result;
 
-	rpcb_clnt = rpcb_create_local(version);
-	if (!IS_ERR(rpcb_clnt)) {
-		error = rpc_call_sync(rpcb_clnt, msg, 0);
-		rpc_shutdown_client(rpcb_clnt);
-	} else
-		error = PTR_ERR(rpcb_clnt);
-
+	error = rpc_call_sync(clnt, msg, 0);
 	if (error < 0) {
 		dprintk("RPC:       failed to contact local rpcbind "
 				"server (errno %d).\n", -error);
@@ -276,6 +313,11 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
 	struct rpc_message msg = {
 		.rpc_argp	= &map,
 	};
+	int error;
+
+	error = rpcb_create_local();
+	if (error)
+		return error;
 
 	dprintk("RPC:       %sregistering (%u, %u, %d, %u) with local "
 			"rpcbind\n", (port ? "" : "un"),
@@ -285,7 +327,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
 	if (port)
 		msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
 
-	return rpcb_register_call(RPCBVERS_2, &msg);
+	return rpcb_register_call(rpcb_local_clnt, &msg);
 }
 
 /*
@@ -310,7 +352,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap,
 	if (port)
 		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
 
-	result = rpcb_register_call(RPCBVERS_4, msg);
+	result = rpcb_register_call(rpcb_local_clnt4, msg);
 	kfree(map->r_addr);
 	return result;
 }
@@ -337,7 +379,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap,
 	if (port)
 		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
 
-	result = rpcb_register_call(RPCBVERS_4, msg);
+	result = rpcb_register_call(rpcb_local_clnt4, msg);
 	kfree(map->r_addr);
 	return result;
 }
@@ -353,7 +395,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
 	map->r_addr = "";
 	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
 
-	return rpcb_register_call(RPCBVERS_4, msg);
+	return rpcb_register_call(rpcb_local_clnt4, msg);
 }
 
 /**
@@ -411,6 +453,13 @@ int rpcb_v4_register(const u32 program, const u32 version,
 	struct rpc_message msg = {
 		.rpc_argp	= &map,
 	};
+	int error;
+
+	error = rpcb_create_local();
+	if (error)
+		return error;
+	if (rpcb_local_clnt4 == NULL)
+		return -EPROTONOSUPPORT;
 
 	if (address == NULL)
 		return rpcb_unregister_all_protofamilies(&msg);
@@ -1024,3 +1073,15 @@ static struct rpc_program rpcb_program = {
 	.version	= rpcb_version,
 	.stats		= &rpcb_stats,
 };
+
+/**
+ * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister
+ *
+ */
+void cleanup_rpcb_clnt(void)
+{
+	if (rpcb_local_clnt4)
+		rpc_shutdown_client(rpcb_local_clnt4);
+	if (rpcb_local_clnt)
+		rpc_shutdown_client(rpcb_local_clnt);
+}
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 8cce92189019..f438347d817b 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -24,6 +24,8 @@
 
 extern struct cache_detail ip_map_cache, unix_gid_cache;
 
+extern void cleanup_rpcb_clnt(void);
+
 static int __init
 init_sunrpc(void)
 {
@@ -53,6 +55,7 @@ out:
 static void __exit
 cleanup_sunrpc(void)
 {
+	cleanup_rpcb_clnt();
 	rpcauth_remove_module();
 	cleanup_socket_xprt();
 	svc_cleanup_xprt_sock();
-- 
cgit v1.2.2


From 2a76b3bfa22993fc09166bd6a8db0dbe902b6813 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: SUNRPC: Use TCP for local rpcbind upcalls

Use TCP with the soft connect semantic for local rpcbind upcalls so
the kernel can detect immediately if the local rpcbind daemon is not
running.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/rpcb_clnt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 116db74dd942..401154094ee1 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -176,7 +176,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex);
 static int rpcb_create_local(void)
 {
 	struct rpc_create_args args = {
-		.protocol	= XPRT_TRANSPORT_UDP,
+		.protocol	= XPRT_TRANSPORT_TCP,
 		.address	= (struct sockaddr *)&rpcb_inaddr_loopback,
 		.addrsize	= sizeof(rpcb_inaddr_loopback),
 		.servername	= "localhost",
@@ -258,7 +258,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg)
 
 	msg->rpc_resp = &result;
 
-	error = rpc_call_sync(clnt, msg, 0);
+	error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN);
 	if (error < 0) {
 		dprintk("RPC:       failed to contact local rpcbind "
 				"server (errno %d).\n", -error);
-- 
cgit v1.2.2


From 012da158f636347c4eb28fd1e1cca020fef5e54d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: SUNRPC: Use soft connects for autobinding over TCP

Autobinding is handled by the rpciod process, not in user processes
that are generating regular RPC requests.  Thus autobinding is usually
not affected by signals targetting user processes, such as KILL or
timer expiration events.

In addition, an RPC request generated by a user process that has
RPC_TASK_SOFTCONN set and needs to perform an autobind will hang if
the remote rpcbind service is not available.

For rpcbind queries on connection-oriented transports, let's use the
new soft connect semantic to return control to the user's process
quickly, if the kernel's rpcbind client can't connect to the remote
rpcbind service.

Logic is introduced in call_bind_status() to handle connection errors
that occurred during an asynchronous rpcbind query.  The logic
abandons the rpcbind query if the RPC request has SOFTCONN set, and
retries after a few seconds in the normal case.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c      | 17 ++++++++++++++++-
 net/sunrpc/rpcb_clnt.c |  2 +-
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 68a23583f44c..4b76ef9336c7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1060,7 +1060,7 @@ call_bind_status(struct rpc_task *task)
 		goto retry_timeout;
 	case -EPFNOSUPPORT:
 		/* server doesn't support any rpcbind version we know of */
-		dprintk("RPC: %5u remote rpcbind service unavailable\n",
+		dprintk("RPC: %5u unrecognized remote rpcbind service\n",
 				task->tk_pid);
 		break;
 	case -EPROTONOSUPPORT:
@@ -1069,6 +1069,21 @@ call_bind_status(struct rpc_task *task)
 		task->tk_status = 0;
 		task->tk_action = call_bind;
 		return;
+	case -ECONNREFUSED:		/* connection problems */
+	case -ECONNRESET:
+	case -ENOTCONN:
+	case -EHOSTDOWN:
+	case -EHOSTUNREACH:
+	case -ENETUNREACH:
+	case -EPIPE:
+		dprintk("RPC: %5u remote rpcbind unreachable: %d\n",
+				task->tk_pid, task->tk_status);
+		if (!RPC_IS_SOFTCONN(task)) {
+			rpc_delay(task, 5*HZ);
+			goto retry_timeout;
+		}
+		status = task->tk_status;
+		break;
 	default:
 		dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
 				task->tk_pid, -task->tk_status);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 401154094ee1..3e3772d8eb92 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -537,7 +537,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
 		.rpc_message = &msg,
 		.callback_ops = &rpcb_getport_ops,
 		.callback_data = map,
-		.flags = RPC_TASK_ASYNC,
+		.flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN,
 	};
 
 	return rpc_run_task(&task_setup_data);
-- 
cgit v1.2.2


From caabea8a565fb4e16f8e58e16bb9d535e591b709 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: SUNRPC: Use soft connect semantics when performing RPC ping

Currently, if a remote RPC service is unreachable, an RPC ping will
hang until the underlying transport connect attempt times out.  A more
desirable behavior might be to have the ping fail immediately so upper
layers can recover appropriately.

In the case of an NFS mount, for instance, this would mean the
mount(2) system call could fail immediately if the server isn't
listening, rather than hanging uninterruptibly for more than 3
minutes.

Change rpc_ping() so that it fails immediately for connection-oriented
transports.  rpc_create() will then fail immediately for such
transports if an RPC ping was requested.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 4b76ef9336c7..97931d9f8579 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -79,7 +79,7 @@ static void	call_connect_status(struct rpc_task *task);
 
 static __be32	*rpc_encode_header(struct rpc_task *task);
 static __be32	*rpc_verify_header(struct rpc_task *task);
-static int	rpc_ping(struct rpc_clnt *clnt, int flags);
+static int	rpc_ping(struct rpc_clnt *clnt);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
 {
@@ -340,7 +340,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		return clnt;
 
 	if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
-		int err = rpc_ping(clnt, RPC_TASK_SOFT);
+		int err = rpc_ping(clnt);
 		if (err != 0) {
 			rpc_shutdown_client(clnt);
 			return ERR_PTR(err);
@@ -528,7 +528,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
 	clnt->cl_prog     = program->number;
 	clnt->cl_vers     = version->number;
 	clnt->cl_stats    = program->stats;
-	err = rpc_ping(clnt, RPC_TASK_SOFT);
+	err = rpc_ping(clnt);
 	if (err != 0) {
 		rpc_shutdown_client(clnt);
 		clnt = ERR_PTR(err);
@@ -1709,14 +1709,14 @@ static struct rpc_procinfo rpcproc_null = {
 	.p_decode = rpcproc_decode_null,
 };
 
-static int rpc_ping(struct rpc_clnt *clnt, int flags)
+static int rpc_ping(struct rpc_clnt *clnt)
 {
 	struct rpc_message msg = {
 		.rpc_proc = &rpcproc_null,
 	};
 	int err;
 	msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0);
-	err = rpc_call_sync(clnt, &msg, flags);
+	err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN);
 	put_rpccred(msg.rpc_cred);
 	return err;
 }
-- 
cgit v1.2.2


From 3a28becc35e5c8f1fabb707bcd8a473712653de6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: SUNRPC: soft connect semantics for UDP

Introduce soft connect behavior for UDP transports.  In this case, a
major timeout returns ETIMEDOUT instead of EIO.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 97931d9f8579..154034b675bd 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1380,6 +1380,10 @@ call_timeout(struct rpc_task *task)
 	dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid);
 	task->tk_timeouts++;
 
+	if (RPC_IS_SOFTCONN(task)) {
+		rpc_exit(task, -ETIMEDOUT);
+		return;
+	}
 	if (RPC_IS_SOFT(task)) {
 		if (clnt->cl_chatty)
 			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
-- 
cgit v1.2.2


From 480e3243df156e39eea6c91057e2ae612a6bbe19 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 8 Dec 2009 13:13:03 -0500
Subject: SUNRPC: IS_ERR/PTR_ERR confusion

IS_ERR returns 1 or 0, PTR_ERR returns the error value.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index fc6a43ccd950..129d75ea25d2 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -485,7 +485,7 @@ gss_refresh_upcall(struct rpc_task *task)
 	dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid,
 								cred->cr_uid);
 	gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
-	if (IS_ERR(gss_msg) == -EAGAIN) {
+	if (PTR_ERR(gss_msg) == -EAGAIN) {
 		/* XXX: warning on the first, under the assumption we
 		 * shouldn't normally hit this case on a refresh. */
 		warn_gssd();
-- 
cgit v1.2.2


From 053e324f67b9921fe7de0c4cbc720d29cb4bf207 Mon Sep 17 00:00:00 2001
From: Suresh Jayaraman <sjayaraman@suse.de>
Date: Wed, 9 Dec 2009 23:15:22 +0530
Subject: rpc: remove unneeded function parameter in gss_add_msg()

The pointer to struct gss_auth parameter in gss_add_msg is not really needed
after commit 5b7ddd4a. Zap it.

Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 129d75ea25d2..3c3c50f38a1c 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -304,7 +304,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid)
  * to that upcall instead of adding the new upcall.
  */
 static inline struct gss_upcall_msg *
-gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg)
+gss_add_msg(struct gss_upcall_msg *gss_msg)
 {
 	struct rpc_inode *rpci = gss_msg->inode;
 	struct inode *inode = &rpci->vfs_inode;
@@ -445,7 +445,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr
 	gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred);
 	if (IS_ERR(gss_new))
 		return gss_new;
-	gss_msg = gss_add_msg(gss_auth, gss_new);
+	gss_msg = gss_add_msg(gss_new);
 	if (gss_msg == gss_new) {
 		struct inode *inode = &gss_new->inode->vfs_inode;
 		int res = rpc_queue_upcall(inode, &gss_new->msg);
-- 
cgit v1.2.2


From 5781b2356cbecb0b73b06ec8c3897cabdfdd0928 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 13 Dec 2009 19:32:39 -0800
Subject: udp: udp_lib_get_port() fix

Now we can have a large udp hash table, udp_lib_get_port() loop
should be converted to a do {} while (cond) form,
or we dont enter it at all if hash table size is exactly 65536.

Reported-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1f9534846ca9..f0126fdd7e04 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -216,9 +216,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		 * force rand to be an odd multiple of UDP_HTABLE_SIZE
 		 */
 		rand = (rand | 1) * (udptable->mask + 1);
-		for (last = first + udptable->mask + 1;
-		     first != last;
-		     first++) {
+		last = first + udptable->mask + 1;
+		do {
 			hslot = udp_hashslot(udptable, net, first);
 			bitmap_zero(bitmap, PORTS_PER_CHAIN);
 			spin_lock_bh(&hslot->lock);
@@ -238,7 +237,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 				snum += rand;
 			} while (snum != first);
 			spin_unlock_bh(&hslot->lock);
-		}
+		} while (++first != last);
 		goto fail;
 	} else {
 		hslot = udp_hashslot(udptable, net, snum);
-- 
cgit v1.2.2


From d90a909e1f3e006a1d57fe11fd417173b6494701 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 12 Dec 2009 22:11:15 +0000
Subject: net: Fix userspace RTM_NEWLINK notifications.

I received some bug reports about userspace programs having problems
because after RTM_NEWLINK was received they could not immediate access
files under /proc/sys/net/ because they had not been registered yet.

The original problem was trivially fixed by moving the userspace
notification from rtnetlink_event() to the end of
register_netdevice().

When testing that change I discovered I was still getting RTM_NEWLINK
events before I could access proc and I was also getting RTM_NEWLINK
events after I was seeing RTM_DELLINK.  Things practically guaranteed
to confuse userspace.

After a little more investigation these extra notifications proved to
be from the new notifiers NETDEV_POST_INIT and NETDEV_UNREGISTER_BATCH
hitting the default case in rtnetlink_event, and triggering
unnecessary RTM_NEWLINK messages.

rtnetlink_event now explicitly handles NETDEV_UNREGISTER_BATCH and
NETDEV_POST_INIT to avoid sending the incorrect userspace
notifications.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c       | 11 +++++++++++
 net/core/rtnetlink.c |  6 +++---
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 6fe7d739e59b..be9924f60ec3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5035,6 +5035,11 @@ int register_netdevice(struct net_device *dev)
 		rollback_registered(dev);
 		dev->reg_state = NETREG_UNREGISTERED;
 	}
+	/*
+	 *	Prevent userspace races by waiting until the network
+	 *	device is fully setup before sending notifications.
+	 */
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
 
 out:
 	return ret;
@@ -5597,6 +5602,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	/* Notify protocols, that a new device appeared. */
 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
 
+	/*
+	 *	Prevent userspace races by waiting until the network
+	 *	device is fully setup before sending notifications.
+	 */
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+
 	synchronize_net();
 	err = 0;
 out:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 33148a568199..794bcb897ff0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1364,15 +1364,15 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_UNREGISTER:
 		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
 		break;
-	case NETDEV_REGISTER:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
-		break;
 	case NETDEV_UP:
 	case NETDEV_DOWN:
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
 		break;
+	case NETDEV_POST_INIT:
+	case NETDEV_REGISTER:
 	case NETDEV_CHANGE:
 	case NETDEV_GOING_DOWN:
+	case NETDEV_UNREGISTER_BATCH:
 		break;
 	default:
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
-- 
cgit v1.2.2


From 9abfe315de96aa5c9878b2f627542bc54901c6e9 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Mon, 14 Dec 2009 16:38:21 +0100
Subject: ipvs: fix synchronization on connection close

commit 9d3a0de makes slaves expire as they would do on the master
with much shorter timeouts. But it introduces another problem:
When we close a connection, on master server the connection became
CLOSE_WAIT/TIME_WAIT, it was synced to slaves, but if master is
finished within it's timeouts (CLOSE), it will not be synced to
slaves. Then slaves will be kept on CLOSE_WAIT/TIME_WAIT until
timeout reaches. Thus we should also sync with CLOSE.

Cc: Wensong Zhang <wensong@linux-vs.org>
Cc: Simon Horman <horms@verge.net.au>
Cc: Julian Anastasov <ja@ssi.bg>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b95699f00545..847ffca40184 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1366,6 +1366,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	       == sysctl_ip_vs_sync_threshold[0])) ||
 	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
 	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
+	       (cp->state == IP_VS_TCP_S_CLOSE) ||
 	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
 		ip_vs_sync_conn(cp);
-- 
cgit v1.2.2


From d24deb2580823ab0b8425790c6f5d18e2ff749d8 Mon Sep 17 00:00:00 2001
From: Gertjan van Wingerde <gwingerde@gmail.com>
Date: Fri, 4 Dec 2009 23:46:54 +0100
Subject: mac80211: Add define for TX headroom reserved by mac80211 itself.

Add a definition of the amount of TX headroom reserved by mac80211 itself
for its own purposes. Also add BUILD_BUG_ON to validate the value.
This define can then be used by drivers to request additional TX headroom
in the most efficient manner.

Signed-off-by: Gertjan van Wingerde <gwingerde@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8116d1a96a4a..0d2d94881f1f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -515,6 +515,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	 * and we need some headroom for passing the frame to monitor
 	 * interfaces, but never both at the same time.
 	 */
+	BUILD_BUG_ON(IEEE80211_TX_STATUS_HEADROOM !=
+			sizeof(struct ieee80211_tx_status_rtap_hdr));
 	local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom,
 				   sizeof(struct ieee80211_tx_status_rtap_hdr));
 
-- 
cgit v1.2.2


From 0b5ccb2ee250136dd7385b1c7da28417d0d4d32d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 15 Dec 2009 16:59:18 +0100
Subject: ipv6: reassembly: use seperate reassembly queues for conntrack and
 local delivery

Currently the same reassembly queue might be used for packets reassembled
by conntrack in different positions in the stack (PREROUTING/LOCAL_OUT),
as well as local delivery. This can cause "packet jumps" when the fragment
completing a reassembled packet is queued from a different position in the
stack than the previous ones.

Add a "user" identifier to the reassembly queue key to seperate the queues
of each caller, similar to what we do for IPv4.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 13 +++++++++++--
 net/ipv6/netfilter/nf_conntrack_reasm.c        |  7 ++++---
 net/ipv6/reassembly.c                          |  5 ++++-
 3 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 5f2ec208a8c3..c0a82fe78321 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -187,6 +187,16 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
+static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
+						struct sk_buff *skb)
+{
+	if (hooknum == NF_INET_PRE_ROUTING)
+		return IP6_DEFRAG_CONNTRACK_IN;
+	else
+		return IP6_DEFRAG_CONNTRACK_OUT;
+
+}
+
 static unsigned int ipv6_defrag(unsigned int hooknum,
 				struct sk_buff *skb,
 				const struct net_device *in,
@@ -199,8 +209,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 	if (skb->nfct)
 		return NF_ACCEPT;
 
-	reasm = nf_ct_frag6_gather(skb);
-
+	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
 	/* queued */
 	if (reasm == NULL)
 		return NF_STOLEN;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index e0b9424fa1b2..312c20adc83f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -168,13 +168,14 @@ out:
 /* Creation primitives. */
 
 static __inline__ struct nf_ct_frag6_queue *
-fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
+fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
 	unsigned int hash;
 
 	arg.id = id;
+	arg.user = user;
 	arg.src = src;
 	arg.dst = dst;
 
@@ -559,7 +560,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 	return 0;
 }
 
-struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
+struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 {
 	struct sk_buff *clone;
 	struct net_device *dev = skb->dev;
@@ -605,7 +606,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 	if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
 		nf_ct_frag6_evictor();
 
-	fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
+	fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr);
 	if (fq == NULL) {
 		pr_debug("Can't find and can't create new queue\n");
 		goto ret_orig;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4d98549a6868..3b3a95607125 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -72,6 +72,7 @@ struct frag_queue
 	struct inet_frag_queue	q;
 
 	__be32			id;		/* fragment id		*/
+	u32			user;
 	struct in6_addr		saddr;
 	struct in6_addr		daddr;
 
@@ -141,7 +142,7 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
 	struct ip6_create_arg *arg = a;
 
 	fq = container_of(q, struct frag_queue, q);
-	return (fq->id == arg->id &&
+	return (fq->id == arg->id && fq->user == arg->user &&
 			ipv6_addr_equal(&fq->saddr, arg->src) &&
 			ipv6_addr_equal(&fq->daddr, arg->dst));
 }
@@ -163,6 +164,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
 	struct ip6_create_arg *arg = a;
 
 	fq->id = arg->id;
+	fq->user = arg->user;
 	ipv6_addr_copy(&fq->saddr, arg->src);
 	ipv6_addr_copy(&fq->daddr, arg->dst);
 }
@@ -243,6 +245,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
 	unsigned int hash;
 
 	arg.id = id;
+	arg.user = IP6_DEFRAG_LOCAL_DELIVER;
 	arg.src = src;
 	arg.dst = dst;
 
-- 
cgit v1.2.2


From 8fa9ff6849bb86c59cc2ea9faadf3cb2d5223497 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 15 Dec 2009 16:59:59 +0100
Subject: netfilter: fix crashes in bridge netfilter caused by fragment jumps

When fragments from bridge netfilter are passed to IPv4 or IPv6 conntrack
and a reassembly queue with the same fragment key already exists from
reassembling a similar packet received on a different device (f.i. with
multicasted fragments), the reassembled packet might continue on a different
codepath than where the head fragment originated. This can cause crashes
in bridge netfilter when a fragment received on a non-bridge device (and
thus with skb->nf_bridge == NULL) continues through the bridge netfilter
code.

Add a new reassembly identifier for packets originating from bridge
netfilter and use it to put those packets in insolated queues.

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14805

Reported-and-Tested-by: Chong Qiao <qiaochong@loongson.cn>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_defrag_ipv4.c            | 21 +++++++++++++++++----
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  6 ++++++
 2 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index fa2d6b6fc3e5..331ead3ebd1b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -14,6 +14,7 @@
 #include <net/route.h>
 #include <net/ip.h>
 
+#include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 
@@ -34,6 +35,20 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 	return err;
 }
 
+static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
+					      struct sk_buff *skb)
+{
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge &&
+	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+		return IP_DEFRAG_CONNTRACK_BRIDGE_IN;
+#endif
+	if (hooknum == NF_INET_PRE_ROUTING)
+		return IP_DEFRAG_CONNTRACK_IN;
+	else
+		return IP_DEFRAG_CONNTRACK_OUT;
+}
+
 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 					  struct sk_buff *skb,
 					  const struct net_device *in,
@@ -50,10 +65,8 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 #endif
 	/* Gather fragments. */
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		if (nf_ct_ipv4_gather_frags(skb,
-					    hooknum == NF_INET_PRE_ROUTING ?
-					    IP_DEFRAG_CONNTRACK_IN :
-					    IP_DEFRAG_CONNTRACK_OUT))
+		enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb);
+		if (nf_ct_ipv4_gather_frags(skb, user))
 			return NF_STOLEN;
 	}
 	return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c0a82fe78321..0956ebabbff2 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -20,6 +20,7 @@
 #include <net/ipv6.h>
 #include <net/inet_frag.h>
 
+#include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -190,6 +191,11 @@ out:
 static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 						struct sk_buff *skb)
 {
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge &&
+	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN;
+#endif
 	if (hooknum == NF_INET_PRE_ROUTING)
 		return IP6_DEFRAG_CONNTRACK_IN;
 	else
-- 
cgit v1.2.2


From 258c889362aa95d0ab534b38ce8c15d3009705b1 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Tue, 15 Dec 2009 17:01:25 +0100
Subject: ipvs: zero usvc and udest

Make sure that any otherwise uninitialised fields of usvc are zero.

This has been obvserved to cause a problem whereby the port of
fwmark services may end up as a non-zero value which causes
scheduling of a destination server to fail for persisitent services.

As observed by Deon van der Merwe <dvdm@truteq.co.za>.
This fix suggested by Julian Anastasov <ja@ssi.bg>.

For good measure also zero udest.

Cc: Deon van der Merwe <dvdm@truteq.co.za>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index e55a6861d26f..6bde12da2fe0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2714,6 +2714,8 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
 		return -EINVAL;
 
+	memset(usvc, 0, sizeof(*usvc));
+
 	usvc->af = nla_get_u16(nla_af);
 #ifdef CONFIG_IP_VS_IPV6
 	if (usvc->af != AF_INET && usvc->af != AF_INET6)
@@ -2901,6 +2903,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
 	if (!(nla_addr && nla_port))
 		return -EINVAL;
 
+	memset(udest, 0, sizeof(*udest));
+
 	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
 	udest->port = nla_get_u16(nla_port);
 
-- 
cgit v1.2.2


From 471452104b8520337ae2fb48c4e61cd4896e025d Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 14 Dec 2009 18:00:08 -0800
Subject: const: constify remaining dev_pm_ops

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/iucv/af_iucv.c | 2 +-
 net/iucv/iucv.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 1e428863574f..c18286a2167b 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -221,7 +221,7 @@ static int afiucv_pm_restore_thaw(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops afiucv_pm_ops = {
+static const struct dev_pm_ops afiucv_pm_ops = {
 	.prepare = afiucv_pm_prepare,
 	.complete = afiucv_pm_complete,
 	.freeze = afiucv_pm_freeze,
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 3b1f5f5f8de7..fd8b28361a64 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -93,7 +93,7 @@ static int iucv_pm_freeze(struct device *);
 static int iucv_pm_thaw(struct device *);
 static int iucv_pm_restore(struct device *);
 
-static struct dev_pm_ops iucv_pm_ops = {
+static const struct dev_pm_ops iucv_pm_ops = {
 	.prepare = iucv_pm_prepare,
 	.complete = iucv_pm_complete,
 	.freeze = iucv_pm_freeze,
-- 
cgit v1.2.2


From e7d2860b690d4f3bed6824757c540579638e3d1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= <andre.goddard@gmail.com>
Date: Mon, 14 Dec 2009 18:01:06 -0800
Subject: tree-wide: convert open calls to remove spaces to skip_spaces() lib
 function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Makes use of skip_spaces() defined in lib/string.c for removing leading
spaces from strings all over the tree.

It decreases lib.a code size by 47 bytes and reuses the function tree-wide:
   text    data     bss     dec     hex filename
  64688     584     592   65864   10148 (TOTALS-BEFORE)
  64641     584     592   65817   10119 (TOTALS-AFTER)

Also, while at it, if we see (*str && isspace(*str)), we can be sure to
remove the first condition (*str) as the second one (isspace(*str)) also
evaluates to 0 whenever *str == 0, making it redundant. In other words,
"a char equals zero is never a space".

Julia Lawall tried the semantic patch (http://coccinelle.lip6.fr) below,
and found occurrences of this pattern on 3 more files:
    drivers/leds/led-class.c
    drivers/leds/ledtrig-timer.c
    drivers/video/output.c

@@
expression str;
@@

( // ignore skip_spaces cases
while (*str &&  isspace(*str)) { \(str++;\|++str;\) }
|
- *str &&
isspace(*str)
)

Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Cc: Julia Lawall <julia@diku.dk>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Cc: David Howells <dhowells@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Cc: Samuel Ortiz <samuel@sortiz.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/irda/irnet/irnet.h     | 1 +
 net/irda/irnet/irnet_ppp.c | 8 +++-----
 net/netfilter/xt_recent.c  | 3 +--
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index b001c361ad30..4300df35d37d 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -249,6 +249,7 @@
 #include <linux/poll.h>
 #include <linux/capability.h>
 #include <linux/ctype.h>	/* isspace() */
+#include <linux/string.h>	/* skip_spaces() */
 #include <asm/uaccess.h>
 #include <linux/init.h>
 
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 7dea882dbb75..156020d138b5 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -76,9 +76,8 @@ irnet_ctrl_write(irnet_socket *	ap,
       /* Look at the next command */
       start = next;
 
-      /* Scrap whitespaces before the command */
-      while(isspace(*start))
-	start++;
+	/* Scrap whitespaces before the command */
+	start = skip_spaces(start);
 
       /* ',' is our command separator */
       next = strchr(start, ',');
@@ -133,8 +132,7 @@ irnet_ctrl_write(irnet_socket *	ap,
 	      char *	endp;
 
 	      /* Scrap whitespaces before the command */
-	      while(isspace(*begp))
-		begp++;
+	      begp = skip_spaces(begp);
 
 	      /* Convert argument to a number (last arg is the base) */
 	      addr = simple_strtoul(begp, &endp, 16);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index eb0ceb846527..fc70a49c0afd 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -482,8 +482,7 @@ static ssize_t recent_old_proc_write(struct file *file,
 	if (copy_from_user(buf, input, size))
 		return -EFAULT;
 
-	while (isspace(*c))
-		c++;
+	c = skip_spaces(c);
 
 	if (size - (c - buf) < 5)
 		return c - buf;
-- 
cgit v1.2.2


From 48f186124220794fce85ed1439fc32f16f69d3e2 Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Mon, 14 Dec 2009 21:27:53 -0800
Subject: rpc: add rpc_queue_empty function

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/sched.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cef74ba0666c..89ea8e69ec78 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -384,6 +384,20 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r
 		__rpc_do_wake_up_task(queue, task);
 }
 
+/*
+ * Tests whether rpc queue is empty
+ */
+int rpc_queue_empty(struct rpc_wait_queue *queue)
+{
+	int res;
+
+	spin_lock_bh(&queue->lock);
+	res = queue->qlen;
+	spin_unlock_bh(&queue->lock);
+	return (res == 0);
+}
+EXPORT_SYMBOL_GPL(rpc_queue_empty);
+
 /*
  * Wake up a task on a specific queue
  */
-- 
cgit v1.2.2


From 689cf5c15baf603a8041565ff0bd0d65d1634fd7 Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Mon, 14 Dec 2009 21:27:56 -0800
Subject: nfs: enforce FIFO ordering of operations trying to acquire slot

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/sched.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 89ea8e69ec78..aae6907fd546 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -210,6 +210,7 @@ void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qnam
 {
 	__rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY);
 }
+EXPORT_SYMBOL_GPL(rpc_init_priority_wait_queue);
 
 void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
 {
-- 
cgit v1.2.2


From bb5b7c11263dbbe78253cd05945a6bf8f55add8e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Dec 2009 20:56:42 -0800
Subject: tcp: Revert per-route SACK/DSACK/TIMESTAMP changes.

It creates a regression, triggering badness for SYN_RECV
sockets, for example:

[19148.022102] Badness at net/ipv4/inet_connection_sock.c:293
[19148.022570] NIP: c02a0914 LR: c02a0904 CTR: 00000000
[19148.023035] REGS: eeecbd30 TRAP: 0700   Not tainted  (2.6.32)
[19148.023496] MSR: 00029032 <EE,ME,CE,IR,DR>  CR: 24002442  XER: 00000000
[19148.024012] TASK = eee9a820[1756] 'privoxy' THREAD: eeeca000

This is likely caused by the change in the 'estab' parameter
passed to tcp_parse_options() when invoked by the functions
in net/ipv4/tcp_minisocks.c

But even if that is fixed, the ->conn_request() changes made in
this patch series is fundamentally wrong.  They try to use the
listening socket's 'dst' to probe the route settings.  The
listening socket doesn't even have a route, and you can't
get the right route (the child request one) until much later
after we setup all of the state, and it must be done by hand.

This stuff really isn't ready, so the best thing to do is a
full revert.  This reverts the following commits:

f55017a93f1a74d50244b1254b9a2bd7ac9bbf7d
022c3f7d82f0f1c68018696f2f027b87b9bb45c2
1aba721eba1d84a2defce45b950272cee1e6c72a
cda42ebd67ee5fdf09d7057b5a4584d36fe8a335
345cda2fd695534be5a4494f1b59da9daed33663
dc343475ed062e13fc260acccaab91d7d80fd5b2
05eaade2782fb0c90d3034fd7a7d5a16266182bb
6a2a2d6bf8581216e08be15fcb563cfd6c430e1e

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c    | 27 +++++++++++++--------------
 net/ipv4/tcp_input.c     | 24 ++++++++----------------
 net/ipv4/tcp_ipv4.c      | 21 +++++++++------------
 net/ipv4/tcp_minisocks.c | 10 +++++-----
 net/ipv4/tcp_output.c    | 18 +++++-------------
 net/ipv6/syncookies.c    | 28 +++++++++++++---------------
 net/ipv6/tcp_ipv6.c      |  3 +--
 7 files changed, 54 insertions(+), 77 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 26399ad2a289..66fd80ef2473 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -277,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
+	/* check for timestamp cookie support */
+	memset(&tcp_opt, 0, sizeof(tcp_opt));
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+
+	if (tcp_opt.saw_tstamp)
+		cookie_check_timestamp(&tcp_opt);
+
 	ret = NULL;
 	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
 	if (!req)
@@ -292,6 +299,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->ecn_ok		= 0;
+	ireq->snd_wscale	= tcp_opt.snd_wscale;
+	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
+	ireq->sack_ok		= tcp_opt.sack_ok;
+	ireq->wscale_ok		= tcp_opt.wscale_ok;
+	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
+	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -340,20 +353,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
-	/* check for timestamp cookie support */
-	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst);
-
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
-
-	ireq->snd_wscale        = tcp_opt.snd_wscale;
-	ireq->rcv_wscale        = tcp_opt.rcv_wscale;
-	ireq->sack_ok           = tcp_opt.sack_ok;
-	ireq->wscale_ok         = tcp_opt.wscale_ok;
-	ireq->tstamp_ok         = tcp_opt.saw_tstamp;
-	req->ts_recent          = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-
 	/* Try to redo what tcp_v4_send_synack did. */
 	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 12cab7d74dba..28e029632493 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3727,7 +3727,7 @@ old_ack:
  * the fast version below fails.
  */
 void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
-		       u8 **hvpp, int estab,  struct dst_entry *dst)
+		       u8 **hvpp, int estab)
 {
 	unsigned char *ptr;
 	struct tcphdr *th = tcp_hdr(skb);
@@ -3766,8 +3766,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				break;
 			case TCPOPT_WINDOW:
 				if (opsize == TCPOLEN_WINDOW && th->syn &&
-				    !estab && sysctl_tcp_window_scaling &&
-				    !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) {
+				    !estab && sysctl_tcp_window_scaling) {
 					__u8 snd_wscale = *(__u8 *)ptr;
 					opt_rx->wscale_ok = 1;
 					if (snd_wscale > 14) {
@@ -3783,8 +3782,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 			case TCPOPT_TIMESTAMP:
 				if ((opsize == TCPOLEN_TIMESTAMP) &&
 				    ((estab && opt_rx->tstamp_ok) ||
-				     (!estab && sysctl_tcp_timestamps &&
-				      !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) {
+				     (!estab && sysctl_tcp_timestamps))) {
 					opt_rx->saw_tstamp = 1;
 					opt_rx->rcv_tsval = get_unaligned_be32(ptr);
 					opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -3792,8 +3790,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				break;
 			case TCPOPT_SACK_PERM:
 				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
-				    !estab && sysctl_tcp_sack &&
-				    !dst_feature(dst, RTAX_FEATURE_NO_SACK)) {
+				    !estab && sysctl_tcp_sack) {
 					opt_rx->sack_ok = 1;
 					tcp_sack_reset(opt_rx);
 				}
@@ -3878,7 +3875,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 		if (tcp_parse_aligned_timestamp(tp, th))
 			return 1;
 	}
-	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
+	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
 	return 1;
 }
 
@@ -4133,10 +4130,8 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
 static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 
-	if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
-	    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
+	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 		int mib_idx;
 
 		if (before(seq, tp->rcv_nxt))
@@ -4165,15 +4160,13 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
 static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 
 	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
 		tcp_enter_quickack_mode(sk);
 
-		if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
-		    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
+		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -5428,11 +5421,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 	u8 *hash_location;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
-	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
 
 	if (th->ack) {
 		/* rfc793:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 15e96030ce47..65b8ebfd078a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1262,20 +1262,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
 #endif
 
-	ireq = inet_rsk(req);
-	ireq->loc_addr = daddr;
-	ireq->rmt_addr = saddr;
-	ireq->no_srccheck = inet_sk(sk)->transparent;
-	ireq->opt = tcp_v4_save_options(sk, skb);
-
-	dst = inet_csk_route_req(sk, req);
-	if(!dst)
-		goto drop_and_free;
-
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 	if (tmp_opt.cookie_plus > 0 &&
 	    tmp_opt.saw_tstamp &&
@@ -1319,8 +1309,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
+	ireq = inet_rsk(req);
+	ireq->loc_addr = daddr;
+	ireq->rmt_addr = saddr;
+	ireq->no_srccheck = inet_sk(sk)->transparent;
+	ireq->opt = tcp_v4_save_options(sk, skb);
+
 	if (security_inet_conn_request(sk, skb, req))
-		goto drop_and_release;
+		goto drop_and_free;
 
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
@@ -1345,6 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (tmp_opt.saw_tstamp &&
 		    tcp_death_row.sysctl_tw_recycle &&
+		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 87accec8d097..f206ee5dda80 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -95,9 +95,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	int paws_reject = 0;
 
+	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
-		tmp_opt.tstamp_ok = 1;
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL);
+		tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent	= tcptw->tw_ts_recent;
@@ -526,9 +526,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 
-	if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) {
-		tmp_opt.tstamp_ok = 1;
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL);
+	tmp_opt.saw_tstamp = 0;
+	if (th->doff > (sizeof(struct tcphdr)>>2)) {
+		tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 93316a96d820..383ce237640f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -553,7 +553,6 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_md5sig_key **md5) {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
-	struct dst_entry *dst = __sk_dst_get(sk);
 	unsigned remaining = MAX_TCP_OPTION_SPACE;
 	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
 			 tcp_cookie_size_check(cvp->cookie_desired) :
@@ -581,22 +580,18 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 	opts->mss = tcp_advertise_mss(sk);
 	remaining -= TCPOLEN_MSS_ALIGNED;
 
-	if (likely(sysctl_tcp_timestamps &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) &&
-		   *md5 == NULL)) {
+	if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
 		opts->options |= OPTION_TS;
 		opts->tsval = TCP_SKB_CB(skb)->when;
 		opts->tsecr = tp->rx_opt.ts_recent;
 		remaining -= TCPOLEN_TSTAMP_ALIGNED;
 	}
-	if (likely(sysctl_tcp_window_scaling &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) {
+	if (likely(sysctl_tcp_window_scaling)) {
 		opts->ws = tp->rx_opt.rcv_wscale;
 		opts->options |= OPTION_WSCALE;
 		remaining -= TCPOLEN_WSCALE_ALIGNED;
 	}
-	if (likely(sysctl_tcp_sack &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_SACK))) {
+	if (likely(sysctl_tcp_sack)) {
 		opts->options |= OPTION_SACK_ADVERTISE;
 		if (unlikely(!(OPTION_TS & opts->options)))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -2527,9 +2522,7 @@ static void tcp_connect_init(struct sock *sk)
 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
 	 */
 	tp->tcp_header_len = sizeof(struct tcphdr) +
-		(sysctl_tcp_timestamps &&
-		(!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ?
-		  TCPOLEN_TSTAMP_ALIGNED : 0));
+		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (tp->af_specific->md5_lookup(sk, sk) != NULL)
@@ -2555,8 +2548,7 @@ static void tcp_connect_init(struct sock *sk)
 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
-				  (sysctl_tcp_window_scaling &&
-				   !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)),
+				  sysctl_tcp_window_scaling,
 				  &rcv_wscale);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5b9af508b8f2..7208a06576c6 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -185,6 +185,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
+	/* check for timestamp cookie support */
+	memset(&tcp_opt, 0, sizeof(tcp_opt));
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+
+	if (tcp_opt.saw_tstamp)
+		cookie_check_timestamp(&tcp_opt);
+
 	ret = NULL;
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (!req)
@@ -218,6 +225,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->expires = 0UL;
 	req->retrans = 0;
 	ireq->ecn_ok		= 0;
+	ireq->snd_wscale	= tcp_opt.snd_wscale;
+	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
+	ireq->sack_ok		= tcp_opt.sack_ok;
+	ireq->wscale_ok		= tcp_opt.wscale_ok;
+	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
+	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
 
@@ -253,21 +266,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 			goto out_free;
 	}
 
-	/* check for timestamp cookie support */
-	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst);
-
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
-
-	req->ts_recent          = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-
-	ireq->snd_wscale        = tcp_opt.snd_wscale;
-	ireq->rcv_wscale        = tcp_opt.rcv_wscale;
-	ireq->sack_ok           = tcp_opt.sack_ok;
-	ireq->wscale_ok         = tcp_opt.wscale_ok;
-	ireq->tstamp_ok         = tcp_opt.saw_tstamp;
-
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ee9cf62458d4..febfd595a40d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1169,7 +1169,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
@@ -1208,7 +1207,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 	if (tmp_opt.cookie_plus > 0 &&
 	    tmp_opt.saw_tstamp &&
-- 
cgit v1.2.2


From 1a35ca80c1db7279c3c0655063f6d3490e399b17 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Dec 2009 05:47:03 +0000
Subject: packet: dont call sleeping functions while holding rcu_read_lock()

commit 654d1f8a019dfa06d (packet: less dev_put() calls)
introduced a problem, calling potentially sleeping functions from a
rcu_read_lock() protected section.

Fix this by releasing lock before the sock_wmalloc()/memcpy_fromiovec() calls.

After skb allocation and copy from user space, we redo device
lookup and appropriate tests.

Reported-and-tested-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 71 ++++++++++++++++++++++----------------------------
 1 file changed, 31 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 020562164b56..e0516a22be2e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -415,7 +415,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 {
 	struct sock *sk = sock->sk;
 	struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	struct net_device *dev;
 	__be16 proto = 0;
 	int err;
@@ -437,6 +437,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 	 */
 
 	saddr->spkt_device[13] = 0;
+retry:
 	rcu_read_lock();
 	dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
 	err = -ENODEV;
@@ -456,58 +457,48 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 	if (len > dev->mtu + dev->hard_header_len)
 		goto out_unlock;
 
-	err = -ENOBUFS;
-	skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
-
-	/*
-	 * If the write buffer is full, then tough. At this level the user
-	 * gets to deal with the problem - do your own algorithmic backoffs.
-	 * That's far more flexible.
-	 */
-
-	if (skb == NULL)
-		goto out_unlock;
-
-	/*
-	 *	Fill it in
-	 */
-
-	/* FIXME: Save some space for broken drivers that write a
-	 * hard header at transmission time by themselves. PPP is the
-	 * notable one here. This should really be fixed at the driver level.
-	 */
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb_reset_network_header(skb);
-
-	/* Try to align data part correctly */
-	if (dev->header_ops) {
-		skb->data -= dev->hard_header_len;
-		skb->tail -= dev->hard_header_len;
-		if (len < dev->hard_header_len)
-			skb_reset_network_header(skb);
+	if (!skb) {
+		size_t reserved = LL_RESERVED_SPACE(dev);
+		unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
+
+		rcu_read_unlock();
+		skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
+		if (skb == NULL)
+			return -ENOBUFS;
+		/* FIXME: Save some space for broken drivers that write a hard
+		 * header at transmission time by themselves. PPP is the notable
+		 * one here. This should really be fixed at the driver level.
+		 */
+		skb_reserve(skb, reserved);
+		skb_reset_network_header(skb);
+
+		/* Try to align data part correctly */
+		if (hhlen) {
+			skb->data -= hhlen;
+			skb->tail -= hhlen;
+			if (len < hhlen)
+				skb_reset_network_header(skb);
+		}
+		err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+		if (err)
+			goto out_free;
+		goto retry;
 	}
 
-	/* Returns -EFAULT on error */
-	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+
 	skb->protocol = proto;
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	if (err)
-		goto out_free;
-
-	/*
-	 *	Now send it
-	 */
 
 	dev_queue_xmit(skb);
 	rcu_read_unlock();
 	return len;
 
-out_free:
-	kfree_skb(skb);
 out_unlock:
 	rcu_read_unlock();
+out_free:
+	kfree_skb(skb);
 	return err;
 }
 
-- 
cgit v1.2.2


From 28dfef8febe48f59cf1e7596e1992a6a1893ca24 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 15 Dec 2009 16:46:48 -0800
Subject: const: constify remaining pipe_buf_operations

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bfa3e7865a8c..93c4e060c91e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -93,7 +93,7 @@ static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
 
 
 /* Pipe buffer operations for a socket. */
-static struct pipe_buf_operations sock_pipe_buf_ops = {
+static const struct pipe_buf_operations sock_pipe_buf_ops = {
 	.can_merge = 0,
 	.map = generic_pipe_buf_map,
 	.unmap = generic_pipe_buf_unmap,
-- 
cgit v1.2.2


From 198de4d7ac3a0f1351c6377ff657950457ed0038 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 5 Aug 2009 19:29:23 +0400
Subject: reorder alloc_fd/attach_fd in socketpair()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/socket.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index b94c3dd71015..bf538bea8fbf 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1396,23 +1396,30 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
 		goto out_release_both;
 	}
 
-	fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
-	if (unlikely(fd2 < 0)) {
-		err = fd2;
+	err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
+	if (unlikely(err < 0)) {
 		put_filp(newfile1);
 		put_unused_fd(fd1);
 		goto out_release_both;
 	}
 
-	err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
-	if (unlikely(err < 0)) {
-		goto out_fd2;
+	fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
+	if (unlikely(fd2 < 0)) {
+		err = fd2;
+		fput(newfile1);
+		put_unused_fd(fd1);
+		sock_release(sock2);
+		goto out;
 	}
 
 	err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK);
 	if (unlikely(err < 0)) {
+		put_filp(newfile2);
+		put_unused_fd(fd2);
 		fput(newfile1);
-		goto out_fd1;
+		put_unused_fd(fd1);
+		sock_release(sock2);
+		goto out;
 	}
 
 	audit_fd_pair(fd1, fd2);
@@ -1438,16 +1445,6 @@ out_release_1:
 	sock_release(sock1);
 out:
 	return err;
-
-out_fd2:
-	put_filp(newfile1);
-	sock_release(sock1);
-out_fd1:
-	put_filp(newfile2);
-	sock_release(sock2);
-	put_unused_fd(fd1);
-	put_unused_fd(fd2);
-	goto out;
 }
 
 /*
-- 
cgit v1.2.2


From 7cbe66b6b53b6615f1033bd5b3dbad8162886373 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 5 Aug 2009 19:59:08 +0400
Subject: merge sock_alloc_fd/sock_attach_fd into a new helper

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/socket.c | 80 +++++++++++++++++-------------------------------------------
 1 file changed, 23 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index bf538bea8fbf..dbb3802a7645 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -355,32 +355,30 @@ static const struct dentry_operations sockfs_dentry_operations = {
  *	but we take care of internal coherence yet.
  */
 
-static int sock_alloc_fd(struct file **filep, int flags)
+static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 {
+	struct qstr name = { .name = "" };
+	struct dentry *dentry;
+	struct file *file;
 	int fd;
 
 	fd = get_unused_fd_flags(flags);
-	if (likely(fd >= 0)) {
-		struct file *file = get_empty_filp();
+	if (unlikely(fd < 0))
+		return fd;
 
-		*filep = file;
-		if (unlikely(!file)) {
-			put_unused_fd(fd);
-			return -ENFILE;
-		}
-	} else
-		*filep = NULL;
-	return fd;
-}
+	file = get_empty_filp();
 
-static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
-{
-	struct dentry *dentry;
-	struct qstr name = { .name = "" };
+	if (unlikely(!file)) {
+		put_unused_fd(fd);
+		return -ENFILE;
+	}
 
 	dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
-	if (unlikely(!dentry))
+	if (unlikely(!dentry)) {
+		put_filp(file);
+		put_unused_fd(fd);
 		return -ENOMEM;
+	}
 
 	dentry->d_op = &sockfs_dentry_operations;
 	/*
@@ -399,24 +397,18 @@ static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
 	file->f_pos = 0;
 	file->private_data = sock;
 
-	return 0;
+	*f = file;
+	return fd;
 }
 
 int sock_map_fd(struct socket *sock, int flags)
 {
 	struct file *newfile;
-	int fd = sock_alloc_fd(&newfile, flags);
-
-	if (likely(fd >= 0)) {
-		int err = sock_attach_fd(sock, newfile, flags);
+	int fd = sock_alloc_file(sock, &newfile, flags);
 
-		if (unlikely(err < 0)) {
-			put_filp(newfile);
-			put_unused_fd(fd);
-			return err;
-		}
+	if (likely(fd >= 0))
 		fd_install(fd, newfile);
-	}
+
 	return fd;
 }
 
@@ -1390,20 +1382,13 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
 	if (err < 0)
 		goto out_release_both;
 
-	fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
+	fd1 = sock_alloc_file(sock1, &newfile1, flags);
 	if (unlikely(fd1 < 0)) {
 		err = fd1;
 		goto out_release_both;
 	}
 
-	err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
-	if (unlikely(err < 0)) {
-		put_filp(newfile1);
-		put_unused_fd(fd1);
-		goto out_release_both;
-	}
-
-	fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
+	fd2 = sock_alloc_file(sock2, &newfile2, flags);
 	if (unlikely(fd2 < 0)) {
 		err = fd2;
 		fput(newfile1);
@@ -1412,16 +1397,6 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
 		goto out;
 	}
 
-	err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK);
-	if (unlikely(err < 0)) {
-		put_filp(newfile2);
-		put_unused_fd(fd2);
-		fput(newfile1);
-		put_unused_fd(fd1);
-		sock_release(sock2);
-		goto out;
-	}
-
 	audit_fd_pair(fd1, fd2);
 	fd_install(fd1, newfile1);
 	fd_install(fd2, newfile2);
@@ -1548,17 +1523,13 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 	 */
 	__module_get(newsock->ops->owner);
 
-	newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
+	newfd = sock_alloc_file(newsock, &newfile, flags);
 	if (unlikely(newfd < 0)) {
 		err = newfd;
 		sock_release(newsock);
 		goto out_put;
 	}
 
-	err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
-	if (err < 0)
-		goto out_fd_simple;
-
 	err = security_socket_accept(sock, newsock);
 	if (err)
 		goto out_fd;
@@ -1588,11 +1559,6 @@ out_put:
 	fput_light(sock->file, fput_needed);
 out:
 	return err;
-out_fd_simple:
-	sock_release(newsock);
-	put_filp(newfile);
-	put_unused_fd(newfd);
-	goto out_put;
 out_fd:
 	fput(newfile);
 	put_unused_fd(newfd);
-- 
cgit v1.2.2


From 6b18662e239a032f908b7f6e164bdf7e2e0a32c9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 6 Aug 2009 02:02:43 +0400
Subject: 9p connect fixes

* if we fail in p9_conn_create(), we shouldn't leak references to struct file.
  Logics in ->close() doesn't help - ->trans is already gone by the time it's
  called.
* sock_create_kern() can fail.
* use of sock_map_fd() is all fscked up; I'd fixed most of that, but the
  rest will have to wait for a bit more work in net/socket.c (we still are
  violating the basic rule of working with descriptor table: "once the reference
  is installed there, don't rely on finding it there again").

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/9p/trans_fd.c | 112 ++++++++++++++++++++++--------------------------------
 1 file changed, 46 insertions(+), 66 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 4dd873e3a1bb..be1cb909d8c0 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -42,6 +42,8 @@
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
 
+#include <linux/syscalls.h> /* killme */
+
 #define P9_PORT 564
 #define MAX_SOCK_BUF (64*1024)
 #define MAXPOLLWADDR	2
@@ -788,24 +790,41 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
 
 static int p9_socket_open(struct p9_client *client, struct socket *csocket)
 {
-	int fd, ret;
+	struct p9_trans_fd *p;
+	int ret, fd;
+
+	p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
 
 	csocket->sk->sk_allocation = GFP_NOIO;
 	fd = sock_map_fd(csocket, 0);
 	if (fd < 0) {
 		P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n");
+		sock_release(csocket);
+		kfree(p);
 		return fd;
 	}
 
-	ret = p9_fd_open(client, fd, fd);
-	if (ret < 0) {
-		P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n");
+	get_file(csocket->file);
+	get_file(csocket->file);
+	p->wr = p->rd = csocket->file;
+	client->trans = p;
+	client->status = Connected;
+
+	sys_close(fd);	/* still racy */
+
+	p->rd->f_flags |= O_NONBLOCK;
+
+	p->conn = p9_conn_create(client);
+	if (IS_ERR(p->conn)) {
+		ret = PTR_ERR(p->conn);
+		p->conn = NULL;
+		kfree(p);
+		sockfd_put(csocket);
 		sockfd_put(csocket);
 		return ret;
 	}
-
-	((struct p9_trans_fd *)client->trans)->rd->f_flags |= O_NONBLOCK;
-
 	return 0;
 }
 
@@ -883,7 +902,6 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 	struct socket *csocket;
 	struct sockaddr_in sin_server;
 	struct p9_fd_opts opts;
-	struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */
 
 	err = parse_opts(args, &opts);
 	if (err < 0)
@@ -897,12 +915,11 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 	sin_server.sin_family = AF_INET;
 	sin_server.sin_addr.s_addr = in_aton(addr);
 	sin_server.sin_port = htons(opts.port);
-	sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
+	err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
 
-	if (!csocket) {
+	if (err) {
 		P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n");
-		err = -EIO;
-		goto error;
+		return err;
 	}
 
 	err = csocket->ops->connect(csocket,
@@ -912,30 +929,11 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 		P9_EPRINTK(KERN_ERR,
 			"p9_trans_tcp: problem connecting socket to %s\n",
 			addr);
-		goto error;
-	}
-
-	err = p9_socket_open(client, csocket);
-	if (err < 0)
-		goto error;
-
-	p = (struct p9_trans_fd *) client->trans;
-	p->conn = p9_conn_create(client);
-	if (IS_ERR(p->conn)) {
-		err = PTR_ERR(p->conn);
-		p->conn = NULL;
-		goto error;
-	}
-
-	return 0;
-
-error:
-	if (csocket)
 		sock_release(csocket);
+		return err;
+	}
 
-	kfree(p);
-
-	return err;
+	return p9_socket_open(client, csocket);
 }
 
 static int
@@ -944,49 +942,33 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
 	int err;
 	struct socket *csocket;
 	struct sockaddr_un sun_server;
-	struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */
 
 	csocket = NULL;
 
 	if (strlen(addr) > UNIX_PATH_MAX) {
 		P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n",
 			addr);
-		err = -ENAMETOOLONG;
-		goto error;
+		return -ENAMETOOLONG;
 	}
 
 	sun_server.sun_family = PF_UNIX;
 	strcpy(sun_server.sun_path, addr);
-	sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
+	err = sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
+	if (err < 0) {
+		P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n");
+		return err;
+	}
 	err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
 			sizeof(struct sockaddr_un) - 1, 0);
 	if (err < 0) {
 		P9_EPRINTK(KERN_ERR,
 			"p9_trans_unix: problem connecting socket: %s: %d\n",
 			addr, err);
-		goto error;
-	}
-
-	err = p9_socket_open(client, csocket);
-	if (err < 0)
-		goto error;
-
-	p = (struct p9_trans_fd *) client->trans;
-	p->conn = p9_conn_create(client);
-	if (IS_ERR(p->conn)) {
-		err = PTR_ERR(p->conn);
-		p->conn = NULL;
-		goto error;
-	}
-
-	return 0;
-
-error:
-	if (csocket)
 		sock_release(csocket);
+		return err;
+	}
 
-	kfree(p);
-	return err;
+	return p9_socket_open(client, csocket);
 }
 
 static int
@@ -994,7 +976,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
 {
 	int err;
 	struct p9_fd_opts opts;
-	struct p9_trans_fd *p = NULL; /* this get allocated in p9_fd_open */
+	struct p9_trans_fd *p;
 
 	parse_opts(args, &opts);
 
@@ -1005,21 +987,19 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
 
 	err = p9_fd_open(client, opts.rfd, opts.wfd);
 	if (err < 0)
-		goto error;
+		return err;
 
 	p = (struct p9_trans_fd *) client->trans;
 	p->conn = p9_conn_create(client);
 	if (IS_ERR(p->conn)) {
 		err = PTR_ERR(p->conn);
 		p->conn = NULL;
-		goto error;
+		fput(p->rd);
+		fput(p->wr);
+		return err;
 	}
 
 	return 0;
-
-error:
-	kfree(p);
-	return err;
 }
 
 static struct p9_trans_module p9_tcp_trans = {
-- 
cgit v1.2.2


From cc3808f8c354889982e7e323050f1e50ad99a009 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 6 Aug 2009 09:43:59 +0400
Subject: switch sock_alloc_file() to alloc_file()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/socket.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index dbb3802a7645..eaaba3510e81 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -366,16 +366,8 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	if (unlikely(fd < 0))
 		return fd;
 
-	file = get_empty_filp();
-
-	if (unlikely(!file)) {
-		put_unused_fd(fd);
-		return -ENFILE;
-	}
-
 	dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
 	if (unlikely(!dentry)) {
-		put_filp(file);
 		put_unused_fd(fd);
 		return -ENOMEM;
 	}
@@ -388,11 +380,19 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	 */
 	dentry->d_flags &= ~DCACHE_UNHASHED;
 	d_instantiate(dentry, SOCK_INODE(sock));
+	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
-	sock->file = file;
-	init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
+	file = alloc_file(sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
-	SOCK_INODE(sock)->i_fop = &socket_file_ops;
+	if (unlikely(!file)) {
+		/* drop dentry, keep inode */
+		atomic_inc(&path.dentry->d_inode->i_count);
+		dput(dentry);
+		put_unused_fd(fd);
+		return -ENFILE;
+	}
+
+	sock->file = file;
 	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 	file->f_pos = 0;
 	file->private_data = sock;
-- 
cgit v1.2.2


From 2c48b9c45579a9b5e3e74694eebf3d2451f3dbd3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 9 Aug 2009 00:52:35 +0400
Subject: switch alloc_file() to passing struct path

... and have the caller grab both mnt and dentry; kill
leak in infiniband, while we are at it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/socket.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index eaaba3510e81..dbfdfa96d29b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -358,7 +358,7 @@ static const struct dentry_operations sockfs_dentry_operations = {
 static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 {
 	struct qstr name = { .name = "" };
-	struct dentry *dentry;
+	struct path path;
 	struct file *file;
 	int fd;
 
@@ -366,28 +366,29 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	if (unlikely(fd < 0))
 		return fd;
 
-	dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
-	if (unlikely(!dentry)) {
+	path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+	if (unlikely(!path.dentry)) {
 		put_unused_fd(fd);
 		return -ENOMEM;
 	}
+	path.mnt = mntget(sock_mnt);
 
-	dentry->d_op = &sockfs_dentry_operations;
+	path.dentry->d_op = &sockfs_dentry_operations;
 	/*
 	 * We dont want to push this dentry into global dentry hash table.
 	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 	 * This permits a working /proc/$pid/fd/XXX on sockets
 	 */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
-	d_instantiate(dentry, SOCK_INODE(sock));
+	path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(path.dentry, SOCK_INODE(sock));
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
-	file = alloc_file(sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
+	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
 	if (unlikely(!file)) {
 		/* drop dentry, keep inode */
 		atomic_inc(&path.dentry->d_inode->i_count);
-		dput(dentry);
+		path_put(&path);
 		put_unused_fd(fd);
 		return -ENFILE;
 	}
-- 
cgit v1.2.2


From a3a065e3f13da8a3470ed09c7f38aad256083726 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 18 Nov 2009 05:30:19 +0100
Subject: fs: no games with DCACHE_UNHASHED

Filesystems outside the regular namespace do not have to clear DCACHE_UNHASHED
in order to have a working /proc/$pid/fd/XXX. Nothing in proc prevents the
fd link from being used if its dentry is not in the hash.

Also, it does not get put into the dcache hash if DCACHE_UNHASHED is clear;
that depends on the filesystem calling d_add or d_rehash.

So delete the misleading comments and needless code.

Acked-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/socket.c | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index dbfdfa96d29b..769c386bd428 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -312,18 +312,6 @@ static struct file_system_type sock_fs_type = {
 	.kill_sb =	kill_anon_super,
 };
 
-static int sockfs_delete_dentry(struct dentry *dentry)
-{
-	/*
-	 * At creation time, we pretended this dentry was hashed
-	 * (by clearing DCACHE_UNHASHED bit in d_flags)
-	 * At delete time, we restore the truth : not hashed.
-	 * (so that dput() can proceed correctly)
-	 */
-	dentry->d_flags |= DCACHE_UNHASHED;
-	return 0;
-}
-
 /*
  * sockfs_dname() is called from d_path().
  */
@@ -334,7 +322,6 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
 }
 
 static const struct dentry_operations sockfs_dentry_operations = {
-	.d_delete = sockfs_delete_dentry,
 	.d_dname  = sockfs_dname,
 };
 
@@ -374,12 +361,6 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	path.mnt = mntget(sock_mnt);
 
 	path.dentry->d_op = &sockfs_dentry_operations;
-	/*
-	 * We dont want to push this dentry into global dentry hash table.
-	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
-	 * This permits a working /proc/$pid/fd/XXX on sockets
-	 */
-	path.dentry->d_flags &= ~DCACHE_UNHASHED;
 	d_instantiate(path.dentry, SOCK_INODE(sock));
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
-- 
cgit v1.2.2


From 971beb83aeb2a309175682cf5683d64fd4591841 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 7 Dec 2009 14:23:21 +0100
Subject: Bluetooth: Fix PTR_ERR return of wrong pointer in hidp_setup_hid()

Return the PTR_ERR of the correct pointer.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hidp/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 569750010fd3..18e7f5a43dc4 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -770,7 +770,7 @@ static int hidp_setup_hid(struct hidp_session *session,
 
 	hid = hid_allocate_device();
 	if (IS_ERR(hid))
-		return PTR_ERR(session->hid);
+		return PTR_ERR(hid);
 
 	session->hid = hid;
 	session->req = req;
-- 
cgit v1.2.2


From 186de9a33803c7ee20d9af75c9049b50e68a3a08 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Tue, 15 Dec 2009 15:56:34 -0200
Subject: Bluetooth: Fix unset of RemoteBusy flag for L2CAP

RemoteBusy flag need to be unset before l2cap_ertm_send(), otherwise
l2cap_ertm_send() will return without sending packets because it checks
that flag before start sending.

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 5129b88c8e5b..7db9a1f8f882 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3435,8 +3435,8 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 			    (pi->unacked_frames > 0))
 				__mod_retrans_timer();
 
-			l2cap_ertm_send(sk);
 			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+			l2cap_ertm_send(sk);
 		}
 		break;
 
-- 
cgit v1.2.2


From 186ee8cf0130993dea8ab8867ff1af8a148f9ae6 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Tue, 15 Dec 2009 20:13:27 -0200
Subject: Bluetooth: Ack L2CAP I-frames before retransmit missing packet

Moving the Ack to before l2cap_retransmit_frame() we can avoid the
case where txWindow is full and the packet can't be retransmited.

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7db9a1f8f882..fb0f81d99f96 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3471,9 +3471,9 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
 		if (rx_control & L2CAP_CTRL_POLL) {
-			l2cap_retransmit_frame(sk, tx_seq);
 			pi->expected_ack_seq = tx_seq;
 			l2cap_drop_acked_frames(sk);
+			l2cap_retransmit_frame(sk, tx_seq);
 			l2cap_ertm_send(sk);
 			if (pi->conn_state & L2CAP_CONN_WAIT_F) {
 				pi->srej_save_reqseq = tx_seq;
-- 
cgit v1.2.2


From b13f5860447a98daf0358a51fbff66154ac0663a Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Tue, 15 Dec 2009 11:38:04 +0200
Subject: Bluetooth: Fix L2CAP locking scheme regression

When locking was introduced the error path branch was not taken
into account. Error was found in sparse code checking. Kudos to
Jani Nikula.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index fb0f81d99f96..1120cf14a548 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1212,6 +1212,7 @@ static void l2cap_monitor_timeout(unsigned long arg)
 	bh_lock_sock(sk);
 	if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
 		l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk);
+		bh_unlock_sock(sk);
 		return;
 	}
 
-- 
cgit v1.2.2


From 9c69fabe789b0eb468a0c7031ae7bb850760aea8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 18 Dec 2009 20:11:03 -0800
Subject: netns: fix net.ipv6.route.gc_min_interval_ms in netns

sysctl table was copied, all right, but ->data for net.ipv6.route.gc_min_interval_ms
was not reinitialized for "!= &init_net" case.

In init_net everthing works by accident due to correct ->data initialization
in source table.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index db3b27303890..c2bd74c5f8d9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2630,6 +2630,7 @@ struct ctl_table *ipv6_route_sysctl_init(struct net *net)
 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
+		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 	}
 
 	return table;
-- 
cgit v1.2.2


From 3705e11a21bcdffe7422ee7870e1b23fe4ac70f4 Mon Sep 17 00:00:00 2001
From: Yang Hongyang <yanghy@cn.fujitsu.com>
Date: Fri, 18 Dec 2009 20:25:13 -0800
Subject: ipv6: fix an oops when force unload ipv6 module

When I do an ipv6 module force unload,I got the following oops:
#rmmod -f ipv6
------------[ cut here ]------------
kernel BUG at mm/slub.c:2969!
invalid opcode: 0000 [#1] SMP
last sysfs file: /sys/devices/pci0000:00/0000:00:11.0/0000:02:03.0/net/eth2/ifindex
Modules linked in: ipv6(-) dm_multipath uinput ppdev tpm_tis tpm tpm_bios pcspkr pcnet32 mii parport_pc i2c_piix4 parport i2c_core floppy mptspi mptscsih mptbase scsi_transport_spi

Pid: 2530, comm: rmmod Tainted: G  R        2.6.32 #2 440BX Desktop Reference Platform/VMware Virtual Platform
EIP: 0060:[<c04b73f2>] EFLAGS: 00010246 CPU: 0
EIP is at kfree+0x6a/0xdd
EAX: 00000000 EBX: c09e86bc ECX: c043e4dd EDX: c14293e0
ESI: e141f1d8 EDI: e140fc31 EBP: dec58ef0 ESP: dec58ed0
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process rmmod (pid: 2530, ti=dec58000 task=decb1940 task.ti=dec58000)
Stack:
 c14293e0 00000282 df624240 c0897d08 c09e86bc c09e86bc e141f1d8 dec58f1c
<0> dec58f00 e140fc31 c09e84c4 e141f1bc dec58f14 c0689d21 dec58f1c e141f1bc
<0> 00000000 dec58f2c c0689eff c09e84d8 c09e84d8 e141f1bc bff33a90 dec58f38
Call Trace:
 [<e140fc31>] ? ipv6_frags_exit_net+0x22/0x32 [ipv6]
 [<c0689d21>] ? ops_exit_list+0x19/0x3d
 [<c0689eff>] ? unregister_pernet_operations+0x2a/0x51
 [<c0689f70>] ? unregister_pernet_subsys+0x17/0x24
 [<e140fbfe>] ? ipv6_frag_exit+0x21/0x32 [ipv6]
 [<e141a361>] ? inet6_exit+0x47/0x122 [ipv6]
 [<c045f5de>] ? sys_delete_module+0x198/0x1f6
 [<c04a8acf>] ? remove_vma+0x57/0x5d
 [<c070f63f>] ? do_page_fault+0x2e7/0x315
 [<c0403218>] ? sysenter_do_call+0x12/0x28
Code: 86 00 00 00 40 c1 e8 0c c1 e0 05 01 d0 89 45 e0 66 83 38 00 79 06 8b 40 0c 89 45 e0 8b 55 e0 8b 02 84 c0 78 14 66 a9 00 c0 75 04 <0f> 0b eb fe 8b 45 e0 e8 35 15 fe ff eb 5d 8b 45 04 8b 55 e0 89
EIP: [<c04b73f2>] kfree+0x6a/0xdd SS:ESP 0068:dec58ed0
---[ end trace 4475d1a5b0afa7e5 ]---

It's because in ip6_frags_ns_sysctl_register,
"table" only alloced when "net" is not equals
to "init_net".So when we free "table" in
ip6_frags_ns_sysctl_unregister,we should check
this first.

This patch fix the problem.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3b3a95607125..2cddea3bd6be 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -708,7 +708,8 @@ static void ip6_frags_ns_sysctl_unregister(struct net *net)
 
 	table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
-	kfree(table);
+	if (!net_eq(net, &init_net))
+		kfree(table);
 }
 
 static struct ctl_table_header *ip6_ctl_header;
-- 
cgit v1.2.2


From 9a418af5df03ad133cd8c8f6742b75e542db6392 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 17 Dec 2009 13:55:48 +0100
Subject: mac80211: fix peer HT capabilities

I noticed yesterday, because Jeff had noticed
a speed regression, cf. bug
http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2138
that the SM PS settings for peers were wrong.
Instead of overwriting the SM PS settings with
the local bits, we need to keep the remote bits.

The bug was part of the original HT code from
over two years ago, but unfortunately nobody
noticed that it makes no sense -- we shouldn't
be overwriting the peer's setting with our own
but rather keep it intact when masking the peer
capabilities with our own.

While fixing that, I noticed that the masking of
capabilities is completely useless for most of
the bits, so also fix those other bits.

Finally, I also noticed that PSMP_SUPPORT no
longer exists in the final 802.11n version, so
also remove that.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ht.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 3787455fb696..d7dcee680728 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -34,9 +34,28 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
 
 	ht_cap->ht_supported = true;
 
-	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & sband->ht_cap.cap;
-	ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS;
-	ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS;
+	/*
+	 * The bits listed in this expression should be
+	 * the same for the peer and us, if the station
+	 * advertises more then we can't use those thus
+	 * we mask them out.
+	 */
+	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) &
+		(sband->ht_cap.cap |
+		 ~(IEEE80211_HT_CAP_LDPC_CODING |
+		   IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+		   IEEE80211_HT_CAP_GRN_FLD |
+		   IEEE80211_HT_CAP_SGI_20 |
+		   IEEE80211_HT_CAP_SGI_40 |
+		   IEEE80211_HT_CAP_DSSSCCK40));
+	/*
+	 * The STBC bits are asymmetric -- if we don't have
+	 * TX then mask out the peer's RX and vice versa.
+	 */
+	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC))
+		ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC;
+	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC))
+		ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC;
 
 	ampdu_info = ht_cap_ie->ampdu_params_info;
 	ht_cap->ampdu_factor =
-- 
cgit v1.2.2


From 0183826b58a2712ffe608bc3302447be3e6a3ab8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 17 Dec 2009 16:16:53 +0100
Subject: mac80211: fix WMM AP settings application

My
  commit 77fdaa12cea26c204cc12c312fe40bc0f3dcdfd8
  Author: Johannes Berg <johannes@sipsolutions.net>
  Date:   Tue Jul 7 03:45:17 2009 +0200

      mac80211: rework MLME for multiple authentications

inadvertedly broke WMM because it removed, along with
a bunch of other now useless initialisations, the line
initialising sdata->u.mgd.wmm_last_param_set to -1
which would make it adopt any WMM parameter set. If,
as is usually the case, the AP uses WMM parameter set
sequence number zero, we'd never update it until the
AP changes the sequence number.

Add the missing initialisation back to get the WMM
settings from the AP applied locally.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: stable@kernel.org [2.6.31+]
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d8d50fb5e823..c79e59f82fd9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -915,6 +915,14 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
 				IEEE80211_STA_BEACON_POLL);
 
+	/*
+	 * Always handle WMM once after association regardless
+	 * of the first value the AP uses. Setting -1 here has
+	 * that effect because the AP values is an unsigned
+	 * 4-bit value.
+	 */
+	sdata->u.mgd.wmm_last_param_set = -1;
+
 	ieee80211_led_assoc(local, 1);
 
 	sdata->vif.bss_conf.assoc = 1;
-- 
cgit v1.2.2


From 45465487897a1c6d508b14b904dc5777f7ec7e04 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:26 -0800
Subject: kfifo: move struct kfifo in place

This is a new generic kernel FIFO implementation.

The current kernel fifo API is not very widely used, because it has to
many constrains.  Only 17 files in the current 2.6.31-rc5 used it.
FIFO's are like list's a very basic thing and a kfifo API which handles
the most use case would save a lot of development time and memory
resources.

I think this are the reasons why kfifo is not in use:

 - The API is to simple, important functions are missing
 - A fifo can be only allocated dynamically
 - There is a requirement of a spinlock whether you need it or not
 - There is no support for data records inside a fifo

So I decided to extend the kfifo in a more generic way without blowing up
the API to much.  The new API has the following benefits:

 - Generic usage: For kernel internal use and/or device driver.
 - Provide an API for the most use case.
 - Slim API: The whole API provides 25 functions.
 - Linux style habit.
 - DECLARE_KFIFO, DEFINE_KFIFO and INIT_KFIFO Macros
 - Direct copy_to_user from the fifo and copy_from_user into the fifo.
 - The kfifo itself is an in place member of the using data structure, this save an
   indirection access and does not waste the kernel allocator.
 - Lockless access: if only one reader and one writer is active on the fifo,
   which is the common use case, no additional locking is necessary.
 - Remove spinlock - give the user the freedom of choice what kind of locking to use if
   one is required.
 - Ability to handle records. Three type of records are supported:
   - Variable length records between 0-255 bytes, with a record size
     field of 1 bytes.
   - Variable length records between 0-65535 bytes, with a record size
     field of 2 bytes.
   - Fixed size records, which no record size field.
 - Preserve memory resource.
 - Performance!
 - Easy to use!

This patch:

Since most users want to have the kfifo as part of another object,
reorganize the code to allow including struct kfifo in another data
structure.  This requires changing the kfifo_alloc and kfifo_init
prototypes so that we pass an existing kfifo pointer into them.  This
patch changes the implementation and all existing users.

[akpm@linux-foundation.org: fix warning]
Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/dccp/probe.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index dc328425fa20..6230ceb0823e 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -43,7 +43,7 @@ static int bufsize = 64 * 1024;
 static const char procname[] = "dccpprobe";
 
 static struct {
-	struct kfifo	  *fifo;
+	struct kfifo	  fifo;
 	spinlock_t	  lock;
 	wait_queue_head_t wait;
 	struct timespec	  tstart;
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put(dccpw.fifo, tbuf, len);
+	kfifo_put(&dccpw.fifo, tbuf, len);
 	wake_up(&dccpw.wait);
 }
 
@@ -109,7 +109,7 @@ static struct jprobe dccp_send_probe = {
 
 static int dccpprobe_open(struct inode *inode, struct file *file)
 {
-	kfifo_reset(dccpw.fifo);
+	kfifo_reset(&dccpw.fifo);
 	getnstimeofday(&dccpw.tstart);
 	return 0;
 }
@@ -131,11 +131,11 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	error = wait_event_interruptible(dccpw.wait,
-					 __kfifo_len(dccpw.fifo) != 0);
+					 __kfifo_len(&dccpw.fifo) != 0);
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get(dccpw.fifo, tbuf, len);
+	cnt = kfifo_get(&dccpw.fifo, tbuf, len);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
@@ -156,10 +156,8 @@ static __init int dccpprobe_init(void)
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
-	if (IS_ERR(dccpw.fifo))
-		return PTR_ERR(dccpw.fifo);
-
+	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock))
+		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
 
@@ -172,14 +170,14 @@ static __init int dccpprobe_init(void)
 err1:
 	proc_net_remove(&init_net, procname);
 err0:
-	kfifo_free(dccpw.fifo);
+	kfifo_free(&dccpw.fifo);
 	return ret;
 }
 module_init(dccpprobe_init);
 
 static __exit void dccpprobe_exit(void)
 {
-	kfifo_free(dccpw.fifo);
+	kfifo_free(&dccpw.fifo);
 	proc_net_remove(&init_net, procname);
 	unregister_jprobe(&dccp_send_probe);
 
-- 
cgit v1.2.2


From c1e13f25674ed564948ecb7dfe5f83e578892896 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:27 -0800
Subject: kfifo: move out spinlock

Move the pointer to the spinlock out of struct kfifo.  Most users in
tree do not actually use a spinlock, so the few exceptions now have to
call kfifo_{get,put}_locked, which takes an extra argument to a
spinlock.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/dccp/probe.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 6230ceb0823e..c6b50351aa78 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put(&dccpw.fifo, tbuf, len);
+	kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	wake_up(&dccpw.wait);
 }
 
@@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get(&dccpw.fifo, tbuf, len);
+	cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
@@ -156,7 +156,7 @@ static __init int dccpprobe_init(void)
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock))
+	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
 		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
-- 
cgit v1.2.2


From e64c026dd09b73faf20707711402fc5ed55a8e70 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:28 -0800
Subject: kfifo: cleanup namespace

change name of __kfifo_* functions to kfifo_*, because the prefix __kfifo
should be reserved for internal functions only.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/dccp/probe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index c6b50351aa78..9ef36849edd7 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -131,7 +131,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	error = wait_event_interruptible(dccpw.wait,
-					 __kfifo_len(&dccpw.fifo) != 0);
+					 kfifo_len(&dccpw.fifo) != 0);
 	if (error)
 		goto out_free;
 
-- 
cgit v1.2.2


From 7acd72eb85f1c7a15e8b5eb554994949241737f1 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:28 -0800
Subject: kfifo: rename kfifo_put... into kfifo_in... and kfifo_get... into
 kfifo_out...

rename kfifo_put...  into kfifo_in...  to prevent miss use of old non in
kernel-tree drivers

ditto for kfifo_get...  -> kfifo_out...

Improve the prototypes of kfifo_in and kfifo_out to make the kerneldoc
annotations more readable.

Add mini "howto porting to the new API" in kfifo.h

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/dccp/probe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 9ef36849edd7..a1362dc8abb0 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
+	kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	wake_up(&dccpw.wait);
 }
 
@@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
+	cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
-- 
cgit v1.2.2


From f466dba1832f05006cf6caa9be41fb98d11cb848 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 23 Dec 2009 22:02:57 -0800
Subject: pktgen: ndo_start_xmit can return NET_XMIT_xxx values

This updates pktgen so that it does not decrement skb->users
when it receives valid NET_XMIT_xxx values.  These are now
valid return values from ndo_start_xmit in net-next-2.6.
They also indicate the skb has been consumed.

This fixes pktgen to work correctly with vlan devices.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a23b45f08ec9..de0c2c726420 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -250,8 +250,7 @@ struct pktgen_dev {
 	__u64 count;		/* Default No packets to send */
 	__u64 sofar;		/* How many pkts we've sent so far */
 	__u64 tx_bytes;		/* How many bytes we've transmitted */
-	__u64 errors;		/* Errors when trying to transmit,
-				   pkts will be re-sent */
+	__u64 errors;		/* Errors when trying to transmit, */
 
 	/* runtime counters relating to clone_skb */
 
@@ -3465,6 +3464,12 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		pkt_dev->seq_num++;
 		pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
 		break;
+	case NET_XMIT_DROP:
+	case NET_XMIT_CN:
+	case NET_XMIT_POLICED:
+		/* skb has been consumed */
+		pkt_dev->errors++;
+		break;
 	default: /* Drivers are not supposed to return other values! */
 		if (net_ratelimit())
 			pr_info("pktgen: %s xmit error: %d\n",
-- 
cgit v1.2.2


From 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 25 Dec 2009 17:30:22 -0800
Subject: net: restore ip source validation

when using policy routing and the skb mark:
there are cases where a back path validation requires us
to use a different routing table for src ip validation than
the one used for mapping ingress dst ip.
One such a case is transparent proxying where we pretend to be
the destination system and therefore the local table
is used for incoming packets but possibly a main table would
be used on outbound.
Make the default behavior to allow the above and if users
need to turn on the symmetry via sysctl src_valid_mark

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c      | 1 +
 net/ipv4/fib_frontend.c | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5cdbc102a418..040c4f05b653 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1397,6 +1397,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
 					"accept_source_route"),
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
+		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3323168ee52d..82dbf711d6d0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 		no_addr = in_dev->ifa_list == NULL;
 		rpf = IN_DEV_RPFILTER(in_dev);
 		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
+		if (mark && !IN_DEV_SRC_VMARK(in_dev))
+			fl.mark = 0;
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.2


From 96c5340147584481ef0c0afbb5423f7563c1d24a Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 25 Dec 2009 23:30:02 +0000
Subject: NET: XFRM: Fix spelling of neighbour.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

 net/xfrm/xfrm_policy.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cb81ca35b0d6..4725a549ad4d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1445,7 +1445,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	if (!dev)
 		goto free_dst;
 
-	/* Copy neighbout for reachability confirmation */
+	/* Copy neighbour for reachability confirmation */
 	dst0->neighbour = neigh_clone(dst->neighbour);
 
 	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
-- 
cgit v1.2.2


From 2e10d330f8d5f039fa1e00baf59435ab0f11c722 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 20 Dec 2009 19:07:09 +0100
Subject: mac80211: fix ibss join with fixed-bssid

When fixed bssid is requested when joining an ibss network, incoming
beacons that match the configured bssid cause mac80211 to create new
sta entries, even before the ibss interface is in joined state.
When that happens, it fails to bring up the interface entirely, because
it checks for existing sta entries before joining.
This patch fixes this bug by refusing to create sta info entries before
the interface is fully operational.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 10d13856f86c..1f2db647bb5c 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -382,6 +382,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 					u8 *bssid,u8 *addr, u32 supp_rates)
 {
+	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 	int band = local->hw.conf.channel->band;
@@ -397,6 +398,9 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 		return NULL;
 	}
 
+	if (ifibss->state == IEEE80211_IBSS_MLME_SEARCH)
+		return NULL;
+
 	if (compare_ether_addr(bssid, sdata->u.ibss.bssid))
 		return NULL;
 
-- 
cgit v1.2.2


From 3bdb2d48c5f58c781a4099c99044384a23620884 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:12:05 +0100
Subject: cfg80211: fix race between deauth and assoc response

Joseph Nahmias reported, in http://bugs.debian.org/562016,
that he was getting the following warning (with some log
around the issue):

  ath0: direct probe to AP 00:11:95:77:e0:b0 (try 1)
  ath0: direct probe responded
  ath0: authenticate with AP 00:11:95:77:e0:b0 (try 1)
  ath0: authenticated
  ath0: associate with AP 00:11:95:77:e0:b0 (try 1)
  ath0: deauthenticating from 00:11:95:77:e0:b0 by local choice (reason=3)
  ath0: direct probe to AP 00:11:95:77:e0:b0 (try 1)
  ath0: RX AssocResp from 00:11:95:77:e0:b0 (capab=0x421 status=0 aid=2)
  ath0: associated
  ------------[ cut here ]------------
  WARNING: at net/wireless/mlme.c:97 cfg80211_send_rx_assoc+0x14d/0x152 [cfg80211]()
  Hardware name: 7658CTO
  ...
  Pid: 761, comm: phy0 Not tainted 2.6.32-trunk-686 #1
  Call Trace:
   [<c1030a5d>] ? warn_slowpath_common+0x5e/0x8a
   [<c1030a93>] ? warn_slowpath_null+0xa/0xc
   [<f86cafc7>] ? cfg80211_send_rx_assoc+0x14d/0x152
  ...
  ath0: link becomes ready
  ath0: deauthenticating from 00:11:95:77:e0:b0 by local choice (reason=3)
  ath0: no IPv6 routers present
  ath0: link is not ready
  ath0: direct probe to AP 00:11:95:77:e0:b0 (try 1)
  ath0: direct probe responded
  ath0: authenticate with AP 00:11:95:77:e0:b0 (try 1)
  ath0: authenticated
  ath0: associate with AP 00:11:95:77:e0:b0 (try 1)
  ath0: RX ReassocResp from 00:11:95:77:e0:b0 (capab=0x421 status=0 aid=2)
  ath0: associated

It is not clear to me how the first "direct probe" here
happens, but this seems to be a race condition, if the
user requests to deauth after requesting assoc, but before
the assoc response is received. In that case, it may
happen that mac80211 tries to report the assoc success to
cfg80211, but gets blocked on the wdev lock that is held
because the user is requesting the deauth.

The result is that we run into a warning. This is mostly
harmless, but maybe cause an unexpected event to be sent
to userspace; we'd send an assoc success event although
userspace was no longer expecting that.

To fix this, remove the warning and check whether the
race happened and in that case abort processing.

Reported-by: Joseph Nahmias <joe@nahmias.net>
Cc: stable@kernel.org
Cc: 562016-quiet@bugs.debian.org
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/mlme.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 1001db4912f7..82e6002c8d67 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -93,7 +93,18 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len)
 			}
 		}
 
-		WARN_ON(!bss);
+		/*
+		 * We might be coming here because the driver reported
+		 * a successful association at the same time as the
+		 * user requested a deauth. In that case, we will have
+		 * removed the BSS from the auth_bsses list due to the
+		 * deauth request when the assoc response makes it. If
+		 * the two code paths acquire the lock the other way
+		 * around, that's just the standard situation of a
+		 * deauth being requested while connected.
+		 */
+		if (!bss)
+			goto out;
 	} else if (wdev->conn) {
 		cfg80211_sme_failed_assoc(wdev);
 		/*
-- 
cgit v1.2.2


From 65486c8b30498dd274eea2c542696f22b63fe5b8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 15:33:35 +0100
Subject: cfg80211: fix error path in cfg80211_wext_siwscan

If there's an invalid channel or SSID, the code leaks
the scan request. Always free the scan request, unless
it was successfully given to the driver.

Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/scan.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 12dfa62aad18..0c2cbbebca95 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -601,7 +601,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 	struct cfg80211_registered_device *rdev;
 	struct wiphy *wiphy;
 	struct iw_scan_req *wreq = NULL;
-	struct cfg80211_scan_request *creq;
+	struct cfg80211_scan_request *creq = NULL;
 	int i, err, n_channels = 0;
 	enum ieee80211_band band;
 
@@ -694,8 +694,10 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 	/* translate "Scan for SSID" request */
 	if (wreq) {
 		if (wrqu->data.flags & IW_SCAN_THIS_ESSID) {
-			if (wreq->essid_len > IEEE80211_MAX_SSID_LEN)
-				return -EINVAL;
+			if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) {
+				err = -EINVAL;
+				goto out;
+			}
 			memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len);
 			creq->ssids[0].ssid_len = wreq->essid_len;
 		}
@@ -707,12 +709,15 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 	err = rdev->ops->scan(wiphy, dev, creq);
 	if (err) {
 		rdev->scan_req = NULL;
-		kfree(creq);
+		/* creq will be freed below */
 	} else {
 		nl80211_send_scan_start(rdev, dev);
+		/* creq now owned by driver */
+		creq = NULL;
 		dev_hold(dev);
 	}
  out:
+	kfree(creq);
 	cfg80211_unlock_rdev(rdev);
 	return err;
 }
-- 
cgit v1.2.2


From b98c06b6debfe84c90200143bb1102f312f50a33 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 24 Dec 2009 15:26:09 -0500
Subject: mac80211: fix race with suspend and dynamic_ps_disable_work

When mac80211 suspends it calls a driver's suspend callback
as a last step and after that the driver assumes no calls will
be made to it until we resume and its start callback is kicked.
If such calls are made, however, suspend can end up throwing
hardware in an unexpected state and making the device unusable
upon resume.

Fix this by preventing mac80211 to schedule dynamic_ps_disable_work
by checking for when mac80211 starts to suspend and starts
quiescing. Frames should be allowed to go through though as
that is part of the quiescing steps and we do not flush the
mac80211 workqueue since it was already done towards the
beginning of suspend cycle.

The other mac80211 issue will be hanled in the next patch.

For further details see refer to the thread:

http://marc.info/?t=126144866100001&r=1&w=2

Cc: stable@kernel.org
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8834cc93c716..27ceaefd7bc8 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1419,6 +1419,10 @@ static bool need_dynamic_ps(struct ieee80211_local *local)
 	if (!local->ps_sdata)
 		return false;
 
+	/* No point if we're going to suspend */
+	if (local->quiescing)
+		return false;
+
 	return true;
 }
 
-- 
cgit v1.2.2


From 24feda0084722189468a65e20019cdd8ef99702b Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 24 Dec 2009 15:38:22 -0500
Subject: mac80211: fix propagation of failed hardware reconfigurations

mac80211 does not propagate failed hardware reconfiguration
requests. For suspend and resume this is important due to all
the possible issues that can come out of the suspend <-> resume
cycle. Not propagating the error means cfg80211 will assume
the resume for the device went through fine and mac80211 will
continue on trying to poke at the hardware, enable timers,
queue work, and so on for a device which is completley
unfunctional.

The least we can do is to propagate device start issues and
warn when this occurs upon resume. A side effect of this patch
is we also now propagate the start errors upon harware
reconfigurations (non-suspend), but this should also be desirable
anyway, there is not point in continuing to reconfigure a
device if mac80211 was unable to start the device.

For further details refer to the thread:

http://marc.info/?t=126151038700001&r=1&w=2

Cc: stable@kernel.org
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 78a6e924c7e1..dc76267e436e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1039,7 +1039,19 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 
 	/* restart hardware */
 	if (local->open_count) {
+		/*
+		 * Upon resume hardware can sometimes be goofy due to
+		 * various platform / driver / bus issues, so restarting
+		 * the device may at times not work immediately. Propagate
+		 * the error.
+		 */
 		res = drv_start(local);
+		if (res) {
+			WARN(local->suspended, "Harware became unavailable "
+			     "upon resume. This is could be a software issue"
+			     "prior to suspend or a harware issue\n");
+			return res;
+		}
 
 		ieee80211_led_radio(local, true);
 	}
-- 
cgit v1.2.2


From b292cf9ce70d221c3f04ff62db5ab13d9a249ca8 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Thu, 31 Dec 2009 10:52:36 +0800
Subject: sunrpc: fix peername failed on closed listener

There're some warnings of "nfsd: peername failed (err 107)!"
socket error -107 means Transport endpoint is not connected.
This warning message was outputed by svc_tcp_accept() [net/sunrpc/svcsock.c],
when kernel_getpeername returns -107. This means socket might be CLOSED.

And svc_tcp_accept was called by svc_recv() [net/sunrpc/svc_xprt.c]

        if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
        <snip>
                newxpt = xprt->xpt_ops->xpo_accept(xprt);
        <snip>

So this might happen when xprt->xpt_flags has both XPT_LISTENER and XPT_CLOSE.

Let's take a look at commit b0401d72, this commit has moved the close
processing after do recvfrom method, but this commit also introduces this
warnings, if the xpt_flags has both XPT_LISTENER and XPT_CLOSED, we should
close it, not accpet then close.

Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc_xprt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2c58b75a236f..810ffe8a636b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -699,7 +699,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	spin_unlock_bh(&pool->sp_lock);
 
 	len = 0;
-	if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
+	if (test_bit(XPT_LISTENER, &xprt->xpt_flags) &&
+	    !test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
 		struct svc_xprt *newxpt;
 		newxpt = xprt->xpt_ops->xpo_accept(xprt);
 		if (newxpt) {
-- 
cgit v1.2.2