From 62bac4af3d778f6d06d351c0442008967c512588 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 25 Oct 2010 12:50:15 -0400
Subject: svcrpc: don't set then immediately clear XPT_DEFERRED

There's no harm to doing this, since the only caller will immediately
call svc_enqueue() afterwards, ensuring we don't miss the remaining
deferred requests just because XPT_DEFERRED was briefly cleared.

But why not just do this the simple way?

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c82fe739fbdc..a74cb67f15bf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1059,14 +1059,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
 	if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags))
 		return NULL;
 	spin_lock(&xprt->xpt_lock);
-	clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
 	if (!list_empty(&xprt->xpt_deferred)) {
 		dr = list_entry(xprt->xpt_deferred.next,
 				struct svc_deferred_req,
 				handle.recent);
 		list_del_init(&dr->handle.recent);
-		set_bit(XPT_DEFERRED, &xprt->xpt_flags);
-	}
+	} else
+		clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
 	spin_unlock(&xprt->xpt_lock);
 	return dr;
 }
-- 
cgit v1.2.2


From ca7896cd83456082b1e78816cdf7e41658ef7bcd Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 25 Oct 2010 14:12:40 -0400
Subject: nfsd4: centralize more calls to svc_xprt_received

Follow up on b48fa6b99100dc7772af3cd276035fcec9719ceb by moving all the
svc_xprt_received() calls for the main xprt to one place.  The clearing
of XPT_BUSY here is critical to the correctness of the server, so I'd
prefer it to be obvious where we do it.

The only substantive result is moving svc_xprt_received() after
svc_receive_deferred().  Other than a (likely insignificant) delay
waking up the next thread, that should be harmless.

Also reshuffle the exit code a little to skip a few other steps that we
don't care about the in the svc_delete_xprt() case.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index a74cb67f15bf..dd61cd02461f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -716,7 +716,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
 		dprintk("svc_recv: found XPT_CLOSE\n");
 		svc_delete_xprt(xprt);
-	} else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
+		/* Leave XPT_BUSY set on the dead xprt: */
+		goto out;
+	}
+	if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
 		struct svc_xprt *newxpt;
 		newxpt = xprt->xpt_ops->xpo_accept(xprt);
 		if (newxpt) {
@@ -741,28 +744,23 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 			spin_unlock_bh(&serv->sv_lock);
 			svc_xprt_received(newxpt);
 		}
-		svc_xprt_received(xprt);
 	} else {
 		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
 			rqstp, pool->sp_id, xprt,
 			atomic_read(&xprt->xpt_ref.refcount));
 		rqstp->rq_deferred = svc_deferred_dequeue(xprt);
-		if (rqstp->rq_deferred) {
-			svc_xprt_received(xprt);
+		if (rqstp->rq_deferred)
 			len = svc_deferred_recv(rqstp);
-		} else {
+		else
 			len = xprt->xpt_ops->xpo_recvfrom(rqstp);
-			svc_xprt_received(xprt);
-		}
 		dprintk("svc: got len=%d\n", len);
 	}
+	svc_xprt_received(xprt);
 
 	/* No data, incomplete (TCP) read, or accept() */
-	if (len == 0 || len == -EAGAIN) {
-		rqstp->rq_res.len = 0;
-		svc_xprt_release(rqstp);
-		return -EAGAIN;
-	}
+	if (len == 0 || len == -EAGAIN)
+		goto out;
+
 	clear_bit(XPT_OLD, &xprt->xpt_flags);
 
 	rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
@@ -771,6 +769,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	if (serv->sv_stats)
 		serv->sv_stats->netcnt++;
 	return len;
+out:
+	rqstp->rq_res.len = 0;
+	svc_xprt_release(rqstp);
+	return -EAGAIN;
 }
 EXPORT_SYMBOL_GPL(svc_recv);
 
-- 
cgit v1.2.2


From f8c0d226fef05226ff1a85055c8ed663022f40c1 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 25 Oct 2010 18:11:21 -0400
Subject: svcrpc: simplify svc_close_all

There's no need to be fooling with XPT_BUSY now that all the threads
are gone.

The list_del_init() here could execute at the same time as the
svc_xprt_enqueue()'s list_add_tail(), with undefined results.  We don't
really care at this point, but it might result in a spurious
list-corruption warning or something.

And svc_close() isn't adding any value; just call svc_delete_xprt()
directly.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index dd61cd02461f..8c018df80692 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -941,16 +941,16 @@ void svc_close_all(struct list_head *xprt_list)
 	struct svc_xprt *xprt;
 	struct svc_xprt *tmp;
 
+	/*
+	 * The server is shutting down, and no more threads are running.
+	 * svc_xprt_enqueue() might still be running, but at worst it
+	 * will re-add the xprt to sp_sockets, which will soon get
+	 * freed.  So we don't bother with any more locking, and don't
+	 * leave the close to the (nonexistent) server threads:
+	 */
 	list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
 		set_bit(XPT_CLOSE, &xprt->xpt_flags);
-		if (test_bit(XPT_BUSY, &xprt->xpt_flags)) {
-			/* Waiting to be processed, but no threads left,
-			 * So just remove it from the waiting list
-			 */
-			list_del_init(&xprt->xpt_ready);
-			clear_bit(XPT_BUSY, &xprt->xpt_flags);
-		}
-		svc_close_xprt(xprt);
+		svc_delete_xprt(xprt);
 	}
 }
 
-- 
cgit v1.2.2


From b176331627fccc726d28f4fc4a357d1f3c19dbf0 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 25 Oct 2010 20:24:48 -0400
Subject: svcrpc: svc_close_xprt comment

Neil Brown had to explain to me why we do this here; record the answer
for posterity.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 8c018df80692..bfbda676574a 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -931,7 +931,12 @@ void svc_close_xprt(struct svc_xprt *xprt)
 	if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
 		/* someone else will have to effect the close */
 		return;
-
+	/*
+	 * We expect svc_close_xprt() to work even when no threads are
+	 * running (e.g., while configuring the server before starting
+	 * any threads), so if the transport isn't busy, we delete
+	 * it ourself:
+	 */
 	svc_delete_xprt(xprt);
 }
 EXPORT_SYMBOL_GPL(svc_close_xprt);
-- 
cgit v1.2.2


From 9c335c0b8daf56b9f73479d00b1dd726e1fcca09 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 26 Oct 2010 11:32:03 -0400
Subject: svcrpc: fix wspace-checking race

We call svc_xprt_enqueue() after something happens which we think may
require handling from a server thread.  To avoid such events being lost,
svc_xprt_enqueue() must guarantee that there will be a svc_serv() call
from a server thread following any such event.  It does that by either
waking up a server thread itself, or checking that XPT_BUSY is set (in
which case somebody else is doing it).

But the check of XPT_BUSY could occur just as someone finishes
processing some other event, and just before they clear XPT_BUSY.

Therefore it's important not to clear XPT_BUSY without subsequently
doing another svc_export_enqueue() to check whether the xprt should be
requeued.

The xpo_wspace() check in svc_xprt_enqueue() breaks this rule, allowing
an event to be missed in situations like:

	data arrives
	call svc_tcp_data_ready():
	call svc_xprt_enqueue():
	set BUSY
	find no write space
				svc_reserve():
				free up write space
				call svc_enqueue():
				test BUSY
	clear BUSY

So, instead, check wspace in the same places that the state flags are
checked: before taking BUSY, and in svc_receive().

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index bfbda676574a..b6f47da170b3 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -302,6 +302,15 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
 	list_del(&rqstp->rq_list);
 }
 
+static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
+{
+	if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
+		return true;
+	if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED)))
+		return xprt->xpt_ops->xpo_has_wspace(xprt);
+	return false;
+}
+
 /*
  * Queue up a transport with data pending. If there are idle nfsd
  * processes, wake 'em up.
@@ -314,8 +323,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	struct svc_rqst	*rqstp;
 	int cpu;
 
-	if (!(xprt->xpt_flags &
-	      ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
+	if (!svc_xprt_has_something_to_do(xprt))
 		return;
 
 	cpu = get_cpu();
@@ -345,25 +353,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	BUG_ON(xprt->xpt_pool != NULL);
 	xprt->xpt_pool = pool;
 
-	/* Handle pending connection */
-	if (test_bit(XPT_CONN, &xprt->xpt_flags))
-		goto process;
-
-	/* Handle close in-progress */
-	if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
-		goto process;
-
-	/* Check if we have space to reply to a request */
-	if (!xprt->xpt_ops->xpo_has_wspace(xprt)) {
-		/* Don't enqueue while not enough space for reply */
-		dprintk("svc: no write space, transport %p  not enqueued\n",
-			xprt);
-		xprt->xpt_pool = NULL;
-		clear_bit(XPT_BUSY, &xprt->xpt_flags);
-		goto out_unlock;
-	}
-
- process:
 	if (!list_empty(&pool->sp_threads)) {
 		rqstp = list_entry(pool->sp_threads.next,
 				   struct svc_rqst,
@@ -744,7 +733,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 			spin_unlock_bh(&serv->sv_lock);
 			svc_xprt_received(newxpt);
 		}
-	} else {
+	} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
 		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
 			rqstp, pool->sp_id, xprt,
 			atomic_read(&xprt->xpt_ref.refcount));
-- 
cgit v1.2.2


From 7c96aef75949a56ec427fc6a2522dace2af33605 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 15 Nov 2010 11:27:01 +1100
Subject: sunrpc: remove xpt_pool

The xpt_pool field is only used for reporting BUGs.
And it isn't used correctly.

In particular, when it is cleared in svc_xprt_received before
XPT_BUSY is cleared, there is no guarantee that either the
compiler or the CPU might not re-order to two assignments, just
setting xpt_pool to NULL after XPT_BUSY is cleared.

If a different cpu were running svc_xprt_enqueue at this moment,
it might see XPT_BUSY clear and then xpt_pool non-NULL, and
so BUG.

This could be fixed by calling
  smp_mb__before_clear_bit()
before the clear_bit.  However as xpt_pool isn't really used,
it seems safest to simply remove xpt_pool.

Another alternate would be to change the clear_bit to
clear_bit_unlock, and the test_and_set_bit to test_and_set_bit_lock.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 5a75d23645c8..5eae53b1e306 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -351,8 +351,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 		dprintk("svc: transport %p busy, not enqueued\n", xprt);
 		goto out_unlock;
 	}
-	BUG_ON(xprt->xpt_pool != NULL);
-	xprt->xpt_pool = pool;
 
 	if (!list_empty(&pool->sp_threads)) {
 		rqstp = list_entry(pool->sp_threads.next,
@@ -370,13 +368,11 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 		rqstp->rq_reserved = serv->sv_max_mesg;
 		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
 		pool->sp_stats.threads_woken++;
-		BUG_ON(xprt->xpt_pool != pool);
 		wake_up(&rqstp->rq_wait);
 	} else {
 		dprintk("svc: transport %p put into queue\n", xprt);
 		list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
 		pool->sp_stats.sockets_queued++;
-		BUG_ON(xprt->xpt_pool != pool);
 	}
 
 out_unlock:
@@ -415,7 +411,6 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
 void svc_xprt_received(struct svc_xprt *xprt)
 {
 	BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags));
-	xprt->xpt_pool = NULL;
 	/* As soon as we clear busy, the xprt could be closed and
 	 * 'put', so we need a reference to call svc_xprt_enqueue with:
 	 */
-- 
cgit v1.2.2


From 3942302ea9e1dffa933021b20bf1642046e7641b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 15 Nov 2010 11:27:01 +1100
Subject: sunrpc: svc_sock_names should hold ref to socket being closed.

Currently svc_sock_names calls svc_close_xprt on a svc_sock to
which it does not own a reference.
As soon as svc_close_xprt sets XPT_CLOSE, the socket could be
freed by a separate thread (though this is a very unlikely race).

It is safer to hold a reference while calling svc_close_xprt.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcsock.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 07919e16be3e..52bd1130e83a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -324,19 +324,21 @@ int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen,
 			len = onelen;
 			break;
 		}
-		if (toclose && strcmp(toclose, buf + len) == 0)
+		if (toclose && strcmp(toclose, buf + len) == 0) {
 			closesk = svsk;
-		else
+			svc_xprt_get(&closesk->sk_xprt);
+		} else
 			len += onelen;
 	}
 	spin_unlock_bh(&serv->sv_lock);
 
-	if (closesk)
+	if (closesk) {
 		/* Should unregister with portmap, but you cannot
 		 * unregister just one protocol...
 		 */
 		svc_close_xprt(&closesk->sk_xprt);
-	else if (toclose)
+		svc_xprt_put(&closesk->sk_xprt);
+	} else if (toclose)
 		return -ENOENT;
 	return len;
 }
-- 
cgit v1.2.2


From 66c941f4aa8aef397a757001af61073db23b39e5 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Wed, 15 Dec 2010 14:47:20 +0800
Subject: net: sunrpc: kill unused macros

These macros never be used for several years.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_gss/svcauth_gss.c | 2 --
 net/sunrpc/svcauth.c              | 1 -
 net/sunrpc/svcauth_unix.c         | 2 --
 3 files changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index dec2a6fc7c12..bcdae78fdfc6 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -67,7 +67,6 @@ static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b)
 
 #define	RSI_HASHBITS	6
 #define	RSI_HASHMAX	(1<<RSI_HASHBITS)
-#define	RSI_HASHMASK	(RSI_HASHMAX-1)
 
 struct rsi {
 	struct cache_head	h;
@@ -319,7 +318,6 @@ static struct rsi *rsi_update(struct rsi *new, struct rsi *old)
 
 #define	RSC_HASHBITS	10
 #define	RSC_HASHMAX	(1<<RSC_HASHBITS)
-#define	RSC_HASHMASK	(RSC_HASHMAX-1)
 
 #define GSS_SEQ_WIN	128
 
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 4e9393c24687..7963569fc04f 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -118,7 +118,6 @@ EXPORT_SYMBOL_GPL(svc_auth_unregister);
 
 #define	DN_HASHBITS	6
 #define	DN_HASHMAX	(1<<DN_HASHBITS)
-#define	DN_HASHMASK	(DN_HASHMAX-1)
 
 static struct hlist_head	auth_domain_table[DN_HASHMAX];
 static spinlock_t	auth_domain_lock =
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 560677d187f1..a04ac9193d59 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -85,7 +85,6 @@ static void svcauth_unix_domain_release(struct auth_domain *dom)
  */
 #define	IP_HASHBITS	8
 #define	IP_HASHMAX	(1<<IP_HASHBITS)
-#define	IP_HASHMASK	(IP_HASHMAX-1)
 
 struct ip_map {
 	struct cache_head	h;
@@ -497,7 +496,6 @@ svcauth_unix_info_release(struct svc_xprt *xpt)
  */
 #define	GID_HASHBITS	8
 #define	GID_HASHMAX	(1<<GID_HASHBITS)
-#define	GID_HASHMASK	(GID_HASHMAX - 1)
 
 struct unix_gid {
 	struct cache_head	h;
-- 
cgit v1.2.2


From f3c0ceea83ba3741226ea04eb1804d254da2642f Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 14 Nov 2010 18:08:11 -0800
Subject: net/sunrpc/auth_gss/gss_krb5_crypto.c: Use normal negative error
 value return

And remove unnecessary double semicolon too.

No effect to code, as test is != 0.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 75ee993ea057..9576f35ab701 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -137,7 +137,7 @@ arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4])
 		ms_usage = 13;
 		break;
 	default:
-		return EINVAL;;
+		return -EINVAL;
 	}
 	salt[0] = (ms_usage >> 0) & 0xff;
 	salt[1] = (ms_usage >> 8) & 0xff;
-- 
cgit v1.2.2


From 31f7aa65f536995c6d933c57230919ae408952a5 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 24 Dec 2010 14:03:38 -0500
Subject: svcrpc: modifying valid sunrpc cache entries is racy

Once a sunrpc cache entry is VALID, we should be replacing it (and
allowing any concurrent users to destroy it on last put) instead of
trying to update it in place.

Otherwise someone referencing the ip_map we're modifying here could try
to use the m_client just as we're putting the last reference.

The bug should only be seen by users of the legacy nfsd interfaces.

(Thanks to Neil for suggestion to use sunrpc_invalidate.)

Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcauth_unix.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index a04ac9193d59..59a7c524a8b1 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -401,8 +401,7 @@ struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr)
 		return NULL;
 
 	if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) {
-		if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0)
-			auth_domain_put(&ipm->m_client->h);
+		sunrpc_invalidate(&ipm->h, sn->ip_map_cache);
 		rv = NULL;
 	} else {
 		rv = &ipm->m_client->h;
-- 
cgit v1.2.2


From bdd5f05d91e8ae68075b812ce244c918d3d752cd Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Tue, 4 Jan 2011 13:31:45 -0500
Subject: SUNRPC: Remove more code when NFSD_DEPRECATED is not configured

Signed-off-by: NeilBrown <neilb@suse.de>
[bfields@redhat.com: moved svcauth_unix_purge outside ifdef's.]
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcauth_unix.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 59a7c524a8b1..30916b06c12b 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -30,7 +30,9 @@
 
 struct unix_domain {
 	struct auth_domain	h;
+#ifdef CONFIG_NFSD_DEPRECATED
 	int	addr_changes;
+#endif /* CONFIG_NFSD_DEPRECATED */
 	/* other stuff later */
 };
 
@@ -64,7 +66,9 @@ struct auth_domain *unix_domain_find(char *name)
 			return NULL;
 		}
 		new->h.flavour = &svcauth_unix;
+#ifdef CONFIG_NFSD_DEPRECATED
 		new->addr_changes = 0;
+#endif /* CONFIG_NFSD_DEPRECATED */
 		rv = auth_domain_lookup(name, &new->h);
 	}
 }
@@ -91,7 +95,9 @@ struct ip_map {
 	char			m_class[8]; /* e.g. "nfsd" */
 	struct in6_addr		m_addr;
 	struct unix_domain	*m_client;
+#ifdef CONFIG_NFSD_DEPRECATED
 	int			m_add_change;
+#endif /* CONFIG_NFSD_DEPRECATED */
 };
 
 static void ip_map_put(struct kref *kref)
@@ -145,7 +151,9 @@ static void update(struct cache_head *cnew, struct cache_head *citem)
 
 	kref_get(&item->m_client->h.ref);
 	new->m_client = item->m_client;
+#ifdef CONFIG_NFSD_DEPRECATED
 	new->m_add_change = item->m_add_change;
+#endif /* CONFIG_NFSD_DEPRECATED */
 }
 static struct cache_head *ip_map_alloc(void)
 {
@@ -330,6 +338,7 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
 	ip.h.flags = 0;
 	if (!udom)
 		set_bit(CACHE_NEGATIVE, &ip.h.flags);
+#ifdef CONFIG_NFSD_DEPRECATED
 	else {
 		ip.m_add_change = udom->addr_changes;
 		/* if this is from the legacy set_client system call,
@@ -338,6 +347,7 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
 		if (expiry == NEVER)
 			ip.m_add_change++;
 	}
+#endif /* CONFIG_NFSD_DEPRECATED */
 	ip.h.expiry_time = expiry;
 	ch = sunrpc_cache_update(cd, &ip.h, &ipm->h,
 				 hash_str(ipm->m_class, IP_HASHBITS) ^
@@ -357,6 +367,7 @@ static inline int ip_map_update(struct net *net, struct ip_map *ipm,
 	return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
 }
 
+#ifdef CONFIG_NFSD_DEPRECATED
 int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom)
 {
 	struct unix_domain *udom;
@@ -411,6 +422,7 @@ struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr)
 	return rv;
 }
 EXPORT_SYMBOL_GPL(auth_unix_lookup);
+#endif /* CONFIG_NFSD_DEPRECATED */
 
 void svcauth_unix_purge(void)
 {
-- 
cgit v1.2.2


From d76d1815f3e72fb627ad7f95ef63120b0a557c9c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sun, 2 Jan 2011 21:28:34 -0500
Subject: svcrpc: avoid double reply caused by deferral race

Commit d29068c431599fa "sunrpc: Simplify cache_defer_req and related
functions." asserted that cache_check() could determine success or
failure of cache_defer_req() by checking the CACHE_PENDING bit.

This isn't quite right.

We need to know whether cache_defer_req() created a deferred request,
in which case sending an rpc reply has become the responsibility of the
deferred request, and it is important that we not send our own reply,
resulting in two different replies to the same request.

And the CACHE_PENDING bit doesn't tell us that; we could have
succesfully created a deferred request at the same time as another
thread cleared the CACHE_PENDING bit.

So, partially revert that commit, to ensure that cache_check() returns
-EAGAIN if and only if a deferred request has been created.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Acked-by: NeilBrown <neilb@suse.de>
---
 net/sunrpc/cache.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index e433e7580e27..0d6002f26d82 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -37,7 +37,7 @@
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
-static void cache_defer_req(struct cache_req *req, struct cache_head *item);
+static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
 static void cache_revisit_request(struct cache_head *item);
 
 static void cache_init(struct cache_head *h)
@@ -268,9 +268,11 @@ int cache_check(struct cache_detail *detail,
 	}
 
 	if (rv == -EAGAIN) {
-		cache_defer_req(rqstp, h);
-		if (!test_bit(CACHE_PENDING, &h->flags)) {
-			/* Request is not deferred */
+		if (!cache_defer_req(rqstp, h)) {
+			/*
+			 * Request was not deferred; handle it as best
+			 * we can ourselves:
+			 */
 			rv = cache_is_valid(detail, h);
 			if (rv == -EAGAIN)
 				rv = -ETIMEDOUT;
@@ -618,18 +620,19 @@ static void cache_limit_defers(void)
 		discard->revisit(discard, 1);
 }
 
-static void cache_defer_req(struct cache_req *req, struct cache_head *item)
+/* Return true if and only if a deferred request is queued. */
+static bool cache_defer_req(struct cache_req *req, struct cache_head *item)
 {
 	struct cache_deferred_req *dreq;
 
 	if (req->thread_wait) {
 		cache_wait_req(req, item);
 		if (!test_bit(CACHE_PENDING, &item->flags))
-			return;
+			return false;
 	}
 	dreq = req->defer(req);
 	if (dreq == NULL)
-		return;
+		return false;
 	setup_deferral(dreq, item, 1);
 	if (!test_bit(CACHE_PENDING, &item->flags))
 		/* Bit could have been cleared before we managed to
@@ -638,6 +641,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item)
 		cache_revisit_request(item);
 
 	cache_limit_defers();
+	return true;
 }
 
 static void cache_revisit_request(struct cache_head *item)
-- 
cgit v1.2.2


From 9e701c610923aaeac8b38b9202a686d1cc9ee35d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sun, 2 Jan 2011 21:56:36 -0500
Subject: svcrpc: simpler request dropping

Currently we use -EAGAIN returns to determine when to drop a deferred
request.  On its own, that is error-prone, as it makes us treat -EAGAIN
returns from other functions specially to prevent inadvertent dropping.

So, use a flag on the request instead.

Returning an error on request deferral is still required, to prevent
further processing, but we no longer need worry that an error return on
its own could result in a drop.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc.c      | 3 ++-
 net/sunrpc/svc_xprt.c | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6359c42c4941..df1931f8ae98 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1005,6 +1005,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	rqstp->rq_splice_ok = 1;
 	/* Will be turned off only when NFSv4 Sessions are used */
 	rqstp->rq_usedeferral = 1;
+	rqstp->rq_dropme = false;
 
 	/* Setup reply header */
 	rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
@@ -1106,7 +1107,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
 
 		/* Encode reply */
-		if (*statp == rpc_drop_reply) {
+		if (rqstp->rq_dropme) {
 			if (procp->pc_release)
 				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
 			goto dropit;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 5eae53b1e306..173f3b975d49 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1019,6 +1019,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
 	}
 	svc_xprt_get(rqstp->rq_xprt);
 	dr->xprt = rqstp->rq_xprt;
+	rqstp->rq_dropme = true;
 
 	dr->handle.revisit = svc_revisit;
 	return &dr->handle;
-- 
cgit v1.2.2


From 6bab93f87ec703bf6650875881b11f9f27d7da56 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Jan 2011 15:10:27 -0500
Subject: svcrpc: take lock on turning entry NEGATIVE in cache_check

We attempt to turn a cache entry negative in place.  But that entry may
already have been filled in by some other task since we last checked
whether it was valid, so we could be modifying an already-valid entry.
If nothing else there's a likely leak in such a case when the entry is
eventually put() and contents are not freed because it has
CACHE_NEGATIVE set.

So, take the cache_lock just as sunrpc_cache_update() does.

Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 0d6002f26d82..a6c57334fa13 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -213,6 +213,23 @@ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head
 	}
 }
 
+static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h)
+{
+	int rv;
+
+	write_lock(&detail->hash_lock);
+	rv = cache_is_valid(detail, h);
+	if (rv != -EAGAIN) {
+		write_unlock(&detail->hash_lock);
+		return rv;
+	}
+	set_bit(CACHE_NEGATIVE, &h->flags);
+	cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
+	write_unlock(&detail->hash_lock);
+	cache_fresh_unlocked(h, detail);
+	return -ENOENT;
+}
+
 /*
  * This is the generic cache management routine for all
  * the authentication caches.
@@ -251,14 +268,8 @@ int cache_check(struct cache_detail *detail,
 			case -EINVAL:
 				clear_bit(CACHE_PENDING, &h->flags);
 				cache_revisit_request(h);
-				if (rv == -EAGAIN) {
-					set_bit(CACHE_NEGATIVE, &h->flags);
-					cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
-					cache_fresh_unlocked(h, detail);
-					rv = -ENOENT;
-				}
+				rv = try_to_negate_entry(detail, h);
 				break;
-
 			case -EAGAIN:
 				clear_bit(CACHE_PENDING, &h->flags);
 				cache_revisit_request(h);
-- 
cgit v1.2.2


From fdef7aa5d4020fd94ffcbf0078d6bd9e5a111e19 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 4 Jan 2011 14:12:47 -0500
Subject: svcrpc: ensure cache_check caller sees updated entry

Supposes cache_check runs simultaneously with an update on a different
CPU:

	cache_check			task doing update
	^^^^^^^^^^^			^^^^^^^^^^^^^^^^^

	1. test for CACHE_VALID		1'. set entry->data
	   & !CACHE_NEGATIVE

	2. use entry->data		2'. set CACHE_VALID

If the two memory writes performed in step 1' and 2' appear misordered
with respect to the reads in step 1 and 2, then the caller could get
stale data at step 2 even though it saw CACHE_VALID set on the cache
entry.

Add memory barriers to prevent this.

Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index a6c57334fa13..72ad836e4fe0 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -128,6 +128,7 @@ static void cache_fresh_locked(struct cache_head *head, time_t expiry)
 {
 	head->expiry_time = expiry;
 	head->last_refresh = seconds_since_boot();
+	smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
 	set_bit(CACHE_VALID, &head->flags);
 }
 
@@ -208,8 +209,16 @@ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head
 		/* entry is valid */
 		if (test_bit(CACHE_NEGATIVE, &h->flags))
 			return -ENOENT;
-		else
+		else {
+			/*
+			 * In combination with write barrier in
+			 * sunrpc_cache_update, ensures that anyone
+			 * using the cache entry after this sees the
+			 * updated contents:
+			 */
+			smp_rmb();
 			return 0;
+		}
 	}
 }
 
-- 
cgit v1.2.2


From d75faea330dbd1873c9094e9926ae306590c0998 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 30 Nov 2010 19:15:01 -0500
Subject: rpc: move sk_bc_xprt to svc_xprt

This seems obviously transport-level information even if it's currently
used only by the server socket code.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcsock.c  | 10 ++++++----
 net/sunrpc/xprtsock.c |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 52bd1130e83a..3bb400f86eae 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -987,15 +987,17 @@ static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
 		vec[0] = rqstp->rq_arg.head[0];
 	} else {
 		/* REPLY */
-		if (svsk->sk_bc_xprt)
-			req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid);
+		struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
+
+		if (bc_xprt)
+			req = xprt_lookup_rqst(bc_xprt, xid);
 
 		if (!req) {
 			printk(KERN_NOTICE
 				"%s: Got unrecognized reply: "
-				"calldir 0x%x sk_bc_xprt %p xid %08x\n",
+				"calldir 0x%x xpt_bc_xprt %p xid %08x\n",
 				__func__, ntohl(calldir),
-				svsk->sk_bc_xprt, xid);
+				bc_xprt, xid);
 			vec[0] = rqstp->rq_arg.head[0];
 			goto out;
 		}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index dfcab5ac65af..18dc42eb5597 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2379,9 +2379,9 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 	 * The backchannel uses the same socket connection as the
 	 * forechannel
 	 */
+	args->bc_xprt->xpt_bc_xprt = xprt;
 	xprt->bc_xprt = args->bc_xprt;
 	bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
-	bc_sock->sk_bc_xprt = xprt;
 	transport->sock = bc_sock->sk_sock;
 	transport->inet = bc_sock->sk_sk;
 
-- 
cgit v1.2.2


From 99de8ea962bbc11a51ad4c52e3dc93bee5f6ba70 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 8 Dec 2010 12:45:44 -0500
Subject: rpc: keep backchannel xprt as long as server connection

Multiple backchannels can share the same tcp connection; from rfc 5661 section
2.10.3.1:

	A connection's association with a session is not exclusive.  A
	connection associated with the channel(s) of one session may be
	simultaneously associated with the channel(s) of other sessions
	including sessions associated with other client IDs.

However, multiple backchannels share a connection, they must all share
the same xid stream (hence the same rpc_xprt); the only way we have to
match replies with calls at the rpc layer is using the xid.

So, keep the rpc_xprt around as long as the connection lasts, in case
we're asked to use the connection as a backchannel again.

Requests to create new backchannel clients over a given server
connection should results in creating new clients that reuse the
existing rpc_xprt.

But to start, just reject attempts to associate multiple rpc_xprt's with
the same underlying bc_xprt.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c |  4 ++++
 net/sunrpc/xprt.c     |  2 +-
 net/sunrpc/xprtsock.c | 34 ++++++++++++++++++++++++----------
 3 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 173f3b975d49..ab86b7927f84 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -13,6 +13,7 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svc_xprt.h>
 #include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/xprt.h>
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
@@ -128,6 +129,9 @@ static void svc_xprt_free(struct kref *kref)
 	if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
 		svcauth_unix_info_release(xprt);
 	put_net(xprt->xpt_net);
+	/* See comment on corresponding get in xs_setup_bc_tcp(): */
+	if (xprt->xpt_bc_xprt)
+		xprt_put(xprt->xpt_bc_xprt);
 	xprt->xpt_ops->xpo_free(xprt);
 	module_put(owner);
 }
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 4c8f18aff7c3..749ad15ae305 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -965,6 +965,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
 	xprt = kzalloc(size, GFP_KERNEL);
 	if (xprt == NULL)
 		goto out;
+	kref_init(&xprt->kref);
 
 	xprt->max_reqs = max_req;
 	xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
@@ -1102,7 +1103,6 @@ found:
 		return xprt;
 	}
 
-	kref_init(&xprt->kref);
 	spin_lock_init(&xprt->transport_lock);
 	spin_lock_init(&xprt->reserve_lock);
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 18dc42eb5597..0ef4dd4414ec 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2375,16 +2375,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 	xprt->reestablish_timeout = 0;
 	xprt->idle_timeout = 0;
 
-	/*
-	 * The backchannel uses the same socket connection as the
-	 * forechannel
-	 */
-	args->bc_xprt->xpt_bc_xprt = xprt;
-	xprt->bc_xprt = args->bc_xprt;
-	bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
-	transport->sock = bc_sock->sk_sock;
-	transport->inet = bc_sock->sk_sk;
-
 	xprt->ops = &bc_tcp_ops;
 
 	switch (addr->sa_family) {
@@ -2406,6 +2396,29 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 			xprt->address_strings[RPC_DISPLAY_PORT],
 			xprt->address_strings[RPC_DISPLAY_PROTO]);
 
+	/*
+	 * The backchannel uses the same socket connection as the
+	 * forechannel
+	 */
+	if (args->bc_xprt->xpt_bc_xprt) {
+		/* XXX: actually, want to catch this case... */
+		ret = ERR_PTR(-EINVAL);
+		goto out_err;
+	}
+	/*
+	 * Once we've associated a backchannel xprt with a connection,
+	 * we want to keep it around as long as long as the connection
+	 * lasts, in case we need to start using it for a backchannel
+	 * again; this reference won't be dropped until bc_xprt is
+	 * destroyed.
+	 */
+	xprt_get(xprt);
+	args->bc_xprt->xpt_bc_xprt = xprt;
+	xprt->bc_xprt = args->bc_xprt;
+	bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
+	transport->sock = bc_sock->sk_sock;
+	transport->inet = bc_sock->sk_sk;
+
 	/*
 	 * Since we don't want connections for the backchannel, we set
 	 * the xprt status to connected
@@ -2415,6 +2428,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 
 	if (try_module_get(THIS_MODULE))
 		return xprt;
+	xprt_put(xprt);
 	ret = ERR_PTR(-EINVAL);
 out_err:
 	xprt_free(xprt);
-- 
cgit v1.2.2


From f0418aa4b1103f959d64dc18273efa04ee0140e9 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 8 Dec 2010 13:48:19 -0500
Subject: rpc: allow xprt_class->setup to return a preexisting xprt

This allows us to reuse the xprt associated with a server connection if
one has already been set up.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprt.c     |  3 +++
 net/sunrpc/xprtsock.c | 18 +++++++++---------
 2 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 749ad15ae305..856274d7e85c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1102,6 +1102,9 @@ found:
 				-PTR_ERR(xprt));
 		return xprt;
 	}
+	if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state))
+		/* ->setup returned a pre-initialized xprt: */
+		return xprt;
 
 	spin_lock_init(&xprt->transport_lock);
 	spin_lock_init(&xprt->reserve_lock);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0ef4dd4414ec..ee091c869fcd 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2359,6 +2359,15 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 	struct svc_sock *bc_sock;
 	struct rpc_xprt *ret;
 
+	if (args->bc_xprt->xpt_bc_xprt) {
+		/*
+		 * This server connection already has a backchannel
+		 * export; we can't create a new one, as we wouldn't be
+		 * able to match replies based on xid any more.  So,
+		 * reuse the already-existing one:
+		 */
+		 return args->bc_xprt->xpt_bc_xprt;
+	}
 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
 		return xprt;
@@ -2396,15 +2405,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 			xprt->address_strings[RPC_DISPLAY_PORT],
 			xprt->address_strings[RPC_DISPLAY_PROTO]);
 
-	/*
-	 * The backchannel uses the same socket connection as the
-	 * forechannel
-	 */
-	if (args->bc_xprt->xpt_bc_xprt) {
-		/* XXX: actually, want to catch this case... */
-		ret = ERR_PTR(-EINVAL);
-		goto out_err;
-	}
 	/*
 	 * Once we've associated a backchannel xprt with a connection,
 	 * we want to keep it around as long as long as the connection
-- 
cgit v1.2.2


From 5b919f833d9d60588d026ad82d17f17e8872c7a9 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Wed, 12 Jan 2011 00:34:49 -0800
Subject: net: ax25: fix information leak to userland harder

Commit fe10ae53384e48c51996941b7720ee16995cbcb7 adds a memset() to clear
the structure being sent back to userspace, but accidentally used the
wrong size.

Reported-by: Brad Spengler <spender@grsecurity.net>
Signed-off-by: Kees Cook <kees.cook@canonical.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index bb86d2932394..6da5daeebab7 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1392,7 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
 	ax25_cb *ax25;
 	int err = 0;
 
-	memset(fsa, 0, sizeof(fsa));
+	memset(fsa, 0, sizeof(*fsa));
 	lock_sock(sk);
 	ax25 = ax25_sk(sk);
 
-- 
cgit v1.2.2


From 2fc72c7b84002ffb3c66918e2a7b0ee607d8b5aa Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@balabit.hu>
Date: Wed, 12 Jan 2011 20:25:08 +0100
Subject: netfilter: fix compilation when conntrack is disabled but tproxy is
 enabled

The IPv6 tproxy patches split IPv6 defragmentation off of conntrack, but
failed to update the #ifdef stanzas guarding the defragmentation related
fields and code in skbuff and conntrack related code in nf_defrag_ipv6.c.

This patch adds the required #ifdefs so that IPv6 tproxy can truly be used
without connection tracking.

Original report:
http://marc.info/?l=linux-netdev&m=129010118516341&w=2

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/core/skbuff.c                         | 2 ++
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 19d6c21220fd..d31bb36ae0dc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -380,6 +380,8 @@ static void skb_release_head_state(struct sk_buff *skb)
 	}
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(skb->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	nf_conntrack_put_reasm(skb->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 99abfb53bab9..97c5b21b9674 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -19,13 +19,15 @@
 
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_bridge.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
 static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
@@ -33,8 +35,10 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 {
 	u16 zone = NF_CT_DEFAULT_ZONE;
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	if (skb->nfct)
 		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge &&
@@ -56,9 +60,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 {
 	struct sk_buff *reasm;
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	/* Previously seen (loopback)?	*/
 	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
 		return NF_ACCEPT;
+#endif
 
 	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
 	/* queued */
-- 
cgit v1.2.2


From 72b43d0898e97f588293b4a24b33c58c46633d81 Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Wed, 12 Jan 2011 08:34:08 +0000
Subject: inet6: prevent network storms caused by linux IPv6 routers

Linux IPv6 forwards unicast packets, which are link layer multicasts...
The hole was present since day one. I was 100% this check is there, but it is not.

The problem shows itself, f.e. when Microsoft Network Load Balancer runs on a network.
This software resolves IPv6 unicast addresses to multicast MAC addresses.

Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 94b5bf132b2e..5f8d242be3f3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -401,6 +401,9 @@ int ip6_forward(struct sk_buff *skb)
 		goto drop;
 	}
 
+	if (skb->pkt_type != PACKET_HOST)
+		goto drop;
+
 	skb_forward_csum(skb);
 
 	/*
-- 
cgit v1.2.2


From 3806b4f3b6115ce324b7125844f9e6acc80d34ec Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 12 Jan 2011 14:50:51 +0000
Subject: eth: fix new kernel-doc warning

Fix new kernel-doc warning (copy-paste typo):

Warning(net/ethernet/eth.c:366): No description found for parameter 'rxqs'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f9d7ac924f15..44d2b42fda56 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -351,7 +351,7 @@ EXPORT_SYMBOL(ether_setup);
  * @sizeof_priv: Size of additional driver-private structure to be allocated
  *	for this Ethernet device
  * @txqs: The number of TX queues this device has.
- * @txqs: The number of RX queues this device has.
+ * @rxqs: The number of RX queues this device has.
  *
  * Fill in the fields of the device structure with Ethernet-generic
  * values. Basically does everything except registering the device.
-- 
cgit v1.2.2


From f31e8d4982653b39fe312f9938be0f49dd9ab5fa Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 13 Jan 2011 14:19:55 +0100
Subject: netfilter: ctnetlink: fix loop in ctnetlink_get_conntrack()

This patch fixes a loop in ctnetlink_get_conntrack() that can be
triggered if you use the same socket to receive events and to
perform a GET operation. Under heavy load, netlink_unicast()
may return -EAGAIN, this error code is reserved in nfnetlink for
the module load-on-demand. Instead, we return -ENOBUFS which is
the appropriate error code that has to be propagated to
user-space.

Reported-by: Holger Eitzenberger <holger@eitzenberger.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5cb8d3027b18..2b7eef37875c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -972,7 +972,8 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 free:
 	kfree_skb(skb2);
 out:
-	return err;
+	/* this avoids a loop in nfnetlink. */
+	return err == -EAGAIN ? -ENOBUFS : err;
 }
 
 #ifdef CONFIG_NF_NAT_NEEDED
-- 
cgit v1.2.2


From 681c4d07dd5b2ce2ad9f6dbbf7841e479fbc7754 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 12 Jan 2011 13:40:33 +0100
Subject: mac80211: fix lockdep warning

Since the introduction of the fixes for the
reorder timer, mac80211 will cause lockdep
warnings because lockdep confuses
local->skb_queue and local->rx_skb_queue
and treats their lock as the same.

However, their locks are different, and are
valid in different contexts (the former is
used in IRQ context, the latter in BH only)
and the only thing to be done is mark the
former as a different lock class so that
lockdep can tell the difference.

Reported-by: Larry Finger <Larry.Finger@lwfinger.net>
Reported-by: Sujith <m.sujith@gmail.com>
Reported-by: Miles Lane <miles.lane@gmail.com>
Tested-by: Sujith <m.sujith@gmail.com>
Tested-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 485d36bc9a46..a46ff06d7cb8 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -39,6 +39,8 @@ module_param(ieee80211_disable_40mhz_24ghz, bool, 0644);
 MODULE_PARM_DESC(ieee80211_disable_40mhz_24ghz,
 		 "Disable 40MHz support in the 2.4GHz band");
 
+static struct lock_class_key ieee80211_rx_skb_queue_class;
+
 void ieee80211_configure_filter(struct ieee80211_local *local)
 {
 	u64 mc;
@@ -569,7 +571,15 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	spin_lock_init(&local->filter_lock);
 	spin_lock_init(&local->queue_stop_reason_lock);
 
-	skb_queue_head_init(&local->rx_skb_queue);
+	/*
+	 * The rx_skb_queue is only accessed from tasklets,
+	 * but other SKB queues are used from within IRQ
+	 * context. Therefore, this one needs a different
+	 * locking class so our direct, non-irq-safe use of
+	 * the queue's lock doesn't throw lockdep warnings.
+	 */
+	skb_queue_head_init_class(&local->rx_skb_queue,
+				  &ieee80211_rx_skb_queue_class);
 
 	INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work);
 
-- 
cgit v1.2.2


From 82694f764dad783a123394e2220b92b9be721b43 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Wed, 12 Jan 2011 15:18:11 +0200
Subject: mac80211: use maximum number of AMPDU frames as default in BA RX

When the buffer size is set to zero in the block ack parameter set
field, we should use the maximum supported number of subframes.  The
existing code was bogus and was doing some unnecessary calculations
that lead to wrong values.

Thanks Johannes for helping me figure this one out.

Cc: stable@kernel.org
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Luciano Coelho <coelho@ti.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index f138b195d657..227ca82eef72 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -185,8 +185,6 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len)
 {
-	struct ieee80211_hw *hw = &local->hw;
-	struct ieee80211_conf *conf = &hw->conf;
 	struct tid_ampdu_rx *tid_agg_rx;
 	u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
 	u8 dialog_token;
@@ -231,13 +229,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 		goto end_no_lock;
 	}
 	/* determine default buffer size */
-	if (buf_size == 0) {
-		struct ieee80211_supported_band *sband;
-
-		sband = local->hw.wiphy->bands[conf->channel->band];
-		buf_size = IEEE80211_MIN_AMPDU_BUF;
-		buf_size = buf_size << sband->ht_cap.ampdu_factor;
-	}
+	if (buf_size == 0)
+		buf_size = IEEE80211_MAX_AMPDU_BUF;
 
 
 	/* examine state machine */
-- 
cgit v1.2.2


From ed7809d9c41b514115ddffaa860694393c2016b3 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Thu, 13 Jan 2011 21:53:38 +0100
Subject: batman-adv: Even Batman should not dereference NULL pointers

There's a problem in net/batman-adv/unicast.c::frag_send_skb().
dev_alloc_skb() allocates memory and may fail, thus returning NULL. If
this happens we'll pass a NULL pointer on to skb_split() which in turn
hands it to skb_split_inside_header() from where it gets passed to
skb_put() that lets skb_tail_pointer() play with it and that function
dereferences it. And thus the bat dies.

While I was at it I also moved the call to dev_alloc_skb() above the
assignment to 'unicast_packet' since there's no reason to do that
assignment if the memory allocation fails.

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
---
 net/batman-adv/unicast.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index dc2e28bed844..ee41fef04b21 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -229,10 +229,12 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
 	if (!bat_priv->primary_if)
 		goto dropped;
 
-	unicast_packet = (struct unicast_packet *) skb->data;
+	frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
+	if (!frag_skb)
+		goto dropped;
 
+	unicast_packet = (struct unicast_packet *) skb->data;
 	memcpy(&tmp_uc, unicast_packet, uc_hdr_len);
-	frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
 	skb_split(skb, frag_skb, data_len / 2);
 
 	if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 ||
-- 
cgit v1.2.2


From 1ac9ad1394fa542ac7ae0dc943ee3cda678799fa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 12 Jan 2011 12:13:14 +0000
Subject: net: remove dev_txq_stats_fold()

After recent changes, (percpu stats on vlan/tunnels...), we dont need
anymore per struct netdev_queue tx_bytes/tx_packets/tx_dropped counters.

Only remaining users are ixgbe, sch_teql, gianfar & macvlan :

1) ixgbe can be converted to use existing tx_ring counters.

2) macvlan incremented txq->tx_dropped, it can use the
dev->stats.tx_dropped counter.

3) sch_teql : almost revert ab35cd4b8f42 (Use net_device internal stats)
    Now we have ndo_get_stats64(), use it, even for "unsigned long"
fields (No need to bring back a struct net_device_stats)

4) gianfar adds a stats structure per tx queue to hold
tx_bytes/tx_packets

This removes a lockdep warning (and possible lockup) in rndis gadget,
calling dev_get_stats() from hard IRQ context.

Ref: http://www.spinics.net/lists/netdev/msg149202.html

Reported-by: Neil Jones <neiljay@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
CC: Sandeep Gopalpet <sandeep.kumar@freescale.com>
CC: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c       | 29 -----------------------------
 net/sched/sch_teql.c | 26 +++++++++++++++++++++-----
 2 files changed, 21 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a3ef808b5e36..83507c265e48 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5523,34 +5523,6 @@ void netdev_run_todo(void)
 	}
 }
 
-/**
- *	dev_txq_stats_fold - fold tx_queues stats
- *	@dev: device to get statistics from
- *	@stats: struct rtnl_link_stats64 to hold results
- */
-void dev_txq_stats_fold(const struct net_device *dev,
-			struct rtnl_link_stats64 *stats)
-{
-	u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
-	unsigned int i;
-	struct netdev_queue *txq;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		txq = netdev_get_tx_queue(dev, i);
-		spin_lock_bh(&txq->_xmit_lock);
-		tx_bytes   += txq->tx_bytes;
-		tx_packets += txq->tx_packets;
-		tx_dropped += txq->tx_dropped;
-		spin_unlock_bh(&txq->_xmit_lock);
-	}
-	if (tx_bytes || tx_packets || tx_dropped) {
-		stats->tx_bytes   = tx_bytes;
-		stats->tx_packets = tx_packets;
-		stats->tx_dropped = tx_dropped;
-	}
-}
-EXPORT_SYMBOL(dev_txq_stats_fold);
-
 /* Convert net_device_stats to rtnl_link_stats64.  They have the same
  * fields in the same order, with only the type differing.
  */
@@ -5594,7 +5566,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
 	} else {
 		netdev_stats_to_stats64(storage, &dev->stats);
-		dev_txq_stats_fold(dev, storage);
 	}
 	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
 	return storage;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index af9360d1f6eb..84ce48eadff4 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -59,6 +59,10 @@ struct teql_master
 	struct net_device *dev;
 	struct Qdisc *slaves;
 	struct list_head master_list;
+	unsigned long	tx_bytes;
+	unsigned long	tx_packets;
+	unsigned long	tx_errors;
+	unsigned long	tx_dropped;
 };
 
 struct teql_sched_data
@@ -274,7 +278,6 @@ static inline int teql_resolve(struct sk_buff *skb,
 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct teql_master *master = netdev_priv(dev);
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 	struct Qdisc *start, *q;
 	int busy;
 	int nores;
@@ -314,8 +317,8 @@ restart:
 					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
-					txq->tx_packets++;
-					txq->tx_bytes += length;
+					master->tx_packets++;
+					master->tx_bytes += length;
 					return NETDEV_TX_OK;
 				}
 				__netif_tx_unlock(slave_txq);
@@ -342,10 +345,10 @@ restart:
 		netif_stop_queue(dev);
 		return NETDEV_TX_BUSY;
 	}
-	dev->stats.tx_errors++;
+	master->tx_errors++;
 
 drop:
-	txq->tx_dropped++;
+	master->tx_dropped++;
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
 }
@@ -398,6 +401,18 @@ static int teql_master_close(struct net_device *dev)
 	return 0;
 }
 
+static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
+						     struct rtnl_link_stats64 *stats)
+{
+	struct teql_master *m = netdev_priv(dev);
+
+	stats->tx_packets	= m->tx_packets;
+	stats->tx_bytes		= m->tx_bytes;
+	stats->tx_errors	= m->tx_errors;
+	stats->tx_dropped	= m->tx_dropped;
+	return stats;
+}
+
 static int teql_master_mtu(struct net_device *dev, int new_mtu)
 {
 	struct teql_master *m = netdev_priv(dev);
@@ -422,6 +437,7 @@ static const struct net_device_ops teql_netdev_ops = {
 	.ndo_open	= teql_master_open,
 	.ndo_stop	= teql_master_close,
 	.ndo_start_xmit	= teql_master_xmit,
+	.ndo_get_stats64 = teql_master_stats64,
 	.ndo_change_mtu	= teql_master_mtu,
 };
 
-- 
cgit v1.2.2


From e1fcc7e2a719d139322fab3f47cfbd4340cf3d82 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Jan 2011 15:56:31 +0000
Subject: rxrpc: rxrpc_workqueue isn't used during memory reclaim

rxrpc_workqueue isn't depended upon while reclaiming memory.  Convert
to alloc_workqueue() without WQ_MEM_RECLAIM.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: linux-afs@lists.infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/rxrpc/af_rxrpc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0b9bb2085ce4..74c064c0dfdd 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -808,7 +808,7 @@ static int __init af_rxrpc_init(void)
 		goto error_call_jar;
 	}
 
-	rxrpc_workqueue = create_workqueue("krxrpcd");
+	rxrpc_workqueue = alloc_workqueue("krxrpcd", 0, 1);
 	if (!rxrpc_workqueue) {
 		printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n");
 		goto error_work_queue;
-- 
cgit v1.2.2


From aa0adb1a85e159cf57f0e11282bc6c9e3606a5f3 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 15 Jan 2011 14:39:43 +0000
Subject: batman-adv: Use "__attribute__" shortcut macros

Linux 2.6.21 defines different macros for __attribute__ which are also
used inside batman-adv. The next version of checkpatch.pl warns about
the usage of __attribute__((packed))).

Linux 2.6.33 defines an extra macro __always_unused which is used to
assist source code analyzers and can be used to removed the last
existing __attribute__ inside the source code.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
---
 net/batman-adv/main.h   |  6 +++---
 net/batman-adv/packet.h | 14 +++++++-------
 net/batman-adv/types.h  |  4 ++--
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index d4d9926c2201..65106fb61b8f 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -151,9 +151,9 @@ int debug_log(struct bat_priv *bat_priv, char *fmt, ...);
 	}							\
 	while (0)
 #else /* !CONFIG_BATMAN_ADV_DEBUG */
-static inline void bat_dbg(char type __attribute__((unused)),
-			   struct bat_priv *bat_priv __attribute__((unused)),
-			   char *fmt __attribute__((unused)), ...)
+static inline void bat_dbg(char type __always_unused,
+			   struct bat_priv *bat_priv __always_unused,
+			   char *fmt __always_unused, ...)
 {
 }
 #endif
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index b49fdf70a6d5..2284e8129cb2 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -63,7 +63,7 @@ struct batman_packet {
 	uint8_t  num_hna;
 	uint8_t  gw_flags;  /* flags related to gateway class */
 	uint8_t  align;
-} __attribute__((packed));
+} __packed;
 
 #define BAT_PACKET_LEN sizeof(struct batman_packet)
 
@@ -76,7 +76,7 @@ struct icmp_packet {
 	uint8_t  orig[6];
 	uint16_t seqno;
 	uint8_t  uid;
-} __attribute__((packed));
+} __packed;
 
 #define BAT_RR_LEN 16
 
@@ -93,14 +93,14 @@ struct icmp_packet_rr {
 	uint8_t  uid;
 	uint8_t  rr_cur;
 	uint8_t  rr[BAT_RR_LEN][ETH_ALEN];
-} __attribute__((packed));
+} __packed;
 
 struct unicast_packet {
 	uint8_t  packet_type;
 	uint8_t  version;  /* batman version field */
 	uint8_t  dest[6];
 	uint8_t  ttl;
-} __attribute__((packed));
+} __packed;
 
 struct unicast_frag_packet {
 	uint8_t  packet_type;
@@ -110,7 +110,7 @@ struct unicast_frag_packet {
 	uint8_t  flags;
 	uint8_t  orig[6];
 	uint16_t seqno;
-} __attribute__((packed));
+} __packed;
 
 struct bcast_packet {
 	uint8_t  packet_type;
@@ -118,7 +118,7 @@ struct bcast_packet {
 	uint8_t  orig[6];
 	uint8_t  ttl;
 	uint32_t seqno;
-} __attribute__((packed));
+} __packed;
 
 struct vis_packet {
 	uint8_t  packet_type;
@@ -131,6 +131,6 @@ struct vis_packet {
 				  * neighbors */
 	uint8_t  target_orig[6]; /* who should receive this packet */
 	uint8_t  sender_orig[6]; /* who sent or rebroadcasted this packet */
-} __attribute__((packed));
+} __packed;
 
 #endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 97cb23dd3e69..bf3f6f5a12c4 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -246,13 +246,13 @@ struct vis_info {
 	/* this packet might be part of the vis send queue. */
 	struct sk_buff *skb_packet;
 	/* vis_info may follow here*/
-} __attribute__((packed));
+} __packed;
 
 struct vis_info_entry {
 	uint8_t  src[ETH_ALEN];
 	uint8_t  dest[ETH_ALEN];
 	uint8_t  quality;	/* quality = 0 means HNA */
-} __attribute__((packed));
+} __packed;
 
 struct recvlist_node {
 	struct list_head list;
-- 
cgit v1.2.2


From 5e5073280379d38e86ade471daa7443b553fc839 Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <kurt.van.dijck@eia.be>
Date: Sat, 15 Jan 2011 20:56:42 -0800
Subject: can: test size of struct sockaddr in sendmsg

This patch makes the CAN socket code conform to the manpage of sendmsg.

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/bcm.c | 3 +++
 net/can/raw.c | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/can/bcm.c b/net/can/bcm.c
index 9d5e8accfab1..092dc88a7c64 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1256,6 +1256,9 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
 		struct sockaddr_can *addr =
 			(struct sockaddr_can *)msg->msg_name;
 
+		if (msg->msg_namelen < sizeof(*addr))
+			return -EINVAL;
+
 		if (addr->can_family != AF_CAN)
 			return -EINVAL;
 
diff --git a/net/can/raw.c b/net/can/raw.c
index e88f610fdb7b..883e9d74fddf 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -649,6 +649,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 		struct sockaddr_can *addr =
 			(struct sockaddr_can *)msg->msg_name;
 
+		if (msg->msg_namelen < sizeof(*addr))
+			return -EINVAL;
+
 		if (addr->can_family != AF_CAN)
 			return -EINVAL;
 
-- 
cgit v1.2.2


From 01a859014b35deb6cc63b1dc2808ca7a0e10a4de Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 15 Jan 2011 03:06:39 +0000
Subject: caif: checking the wrong variable

In the original code we check if (servl == NULL) twice.  The first time
should print the message that cfmuxl_remove_uplayer() failed and set
"ret" correctly, but instead it just returns success.  The second check
should be checking the value of "ret" instead of "servl".

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 21ede141018a..c665de778b60 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -191,6 +191,7 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
 	struct cflayer *servl = NULL;
 	struct cfcnfg_phyinfo *phyinfo = NULL;
 	u8 phyid = 0;
+
 	caif_assert(adap_layer != NULL);
 	channel_id = adap_layer->id;
 	if (adap_layer->dn == NULL || channel_id == 0) {
@@ -199,16 +200,16 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
 		goto end;
 	}
 	servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id);
-	if (servl == NULL)
-		goto end;
-	layer_set_up(servl, NULL);
-	ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
 	if (servl == NULL) {
 		pr_err("PROTOCOL ERROR - Error removing service_layer Channel_Id(%d)",
 		       channel_id);
 		ret = -EINVAL;
 		goto end;
 	}
+	layer_set_up(servl, NULL);
+	ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
+	if (ret)
+		goto end;
 	caif_assert(channel_id == servl->id);
 	if (adap_layer->dn != NULL) {
 		phyid = cfsrvl_getphyid(adap_layer->dn);
-- 
cgit v1.2.2


From 2fdc1c8093255f9da877d7b9ce3f46c2098377dc Mon Sep 17 00:00:00 2001
From: Romain Francoise <romain@orebokech.com>
Date: Mon, 17 Jan 2011 07:59:18 +0000
Subject: ipv6: Silence privacy extensions initialization

When a network namespace is created (via CLONE_NEWNET), the loopback
interface is automatically added to the new namespace, triggering a
printk in ipv6_add_dev() if CONFIG_IPV6_PRIVACY is set.

This is problematic for applications which use CLONE_NEWNET as
part of a sandbox, like Chromium's suid sandbox or recent versions of
vsftpd. On a busy machine, it can lead to thousands of useless
"lo: Disabled Privacy Extensions" messages appearing in dmesg.

It's easy enough to check the status of privacy extensions via the
use_tempaddr sysctl, so just removing the printk seems like the most
sensible solution.

Signed-off-by: Romain Francoise <romain@orebokech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5b189c97c2fc..24a1cf110d80 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -420,9 +420,6 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 	    dev->type == ARPHRD_TUNNEL6 ||
 	    dev->type == ARPHRD_SIT ||
 	    dev->type == ARPHRD_NONE) {
-		printk(KERN_INFO
-		       "%s: Disabled Privacy Extensions\n",
-		       dev->name);
 		ndev->cnf.use_tempaddr = -1;
 	} else {
 		in6_dev_hold(ndev);
-- 
cgit v1.2.2


From 6ee400aafb60289b78fcde5ebccd8c4973fc53f4 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Mon, 17 Jan 2011 20:46:00 +0000
Subject: net offloading: Do not mask out NETIF_F_HW_VLAN_TX for vlan.

In netif_skb_features() we return only the features that are valid for vlans
if we have a vlan packet.  However, we should not mask out NETIF_F_HW_VLAN_TX
since it enables transmission of vlan tags and is obviously valid.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 83507c265e48..4c58d11d3b68 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2023,13 +2023,13 @@ int netif_skb_features(struct sk_buff *skb)
 		return harmonize_features(skb, protocol, features);
 	}
 
-	features &= skb->dev->vlan_features;
+	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
 
 	if (protocol != htons(ETH_P_8021Q)) {
 		return harmonize_features(skb, protocol, features);
 	} else {
 		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
-				NETIF_F_GEN_CSUM;
+				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
 		return harmonize_features(skb, protocol, features);
 	}
 }
-- 
cgit v1.2.2


From 4571928fc73589e9c5217cd069d2c0b4ff1818a8 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.cz>
Date: Fri, 14 Jan 2011 14:59:44 +0100
Subject: Bluetooth: l2cap: fix misuse of logical operation in place of bitop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CC: Marcel Holtmann <marcel@holtmann.org>
CC: "Gustavo F. Padovan" <padovan@profusion.mobi>
CC: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c791fcda7b2d..4fd88eb0a464 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1893,8 +1893,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		if (pi->mode == L2CAP_MODE_STREAMING) {
 			l2cap_streaming_send(sk);
 		} else {
-			if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY &&
-					pi->conn_state && L2CAP_CONN_WAIT_F) {
+			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
+					(pi->conn_state & L2CAP_CONN_WAIT_F)) {
 				err = len;
 				break;
 			}
-- 
cgit v1.2.2


From e2e0cacbd4b0c7c69c7591d37c243f2363aeaa71 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Tue, 4 Jan 2011 12:08:50 +0200
Subject: Bluetooth: Fix leaking blacklist when unregistering a hci device

The blacklist should be freed before the hci device gets unregistered.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_core.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 8b602d881fd7..9c4541bc488a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1011,6 +1011,10 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	destroy_workqueue(hdev->workqueue);
 
+	hci_dev_lock_bh(hdev);
+	hci_blacklist_clear(hdev);
+	hci_dev_unlock_bh(hdev);
+
 	__hci_dev_put(hdev);
 
 	return 0;
-- 
cgit v1.2.2


From 683d949a7fbf33c244670e34d35c460e0d6558cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Turek?= <8an@praha12.net>
Date: Wed, 5 Jan 2011 02:43:59 +0100
Subject: Bluetooth: Never deallocate a session when some DLC points to it

Fix a bug introduced in commit 9cf5b0ea3a7f1432c61029f7aaf4b8b338628884:
function rfcomm_recv_ua calls rfcomm_session_put without checking that
the session is not referenced by some DLC. If the session is freed, that
DLC would refer to deallocated memory, causing an oops later, as shown
in this bug report: https://bugzilla.kernel.org/show_bug.cgi?id=15994

Signed-off-by: Lukas Turek <8an@praha12.net>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/rfcomm/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index ff8aaa736650..6b83776534fb 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1164,7 +1164,8 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
 			 * initiator rfcomm_process_rx already calls
 			 * rfcomm_session_put() */
 			if (s->sock->sk->sk_state != BT_CLOSED)
-				rfcomm_session_put(s);
+				if (list_empty(&s->dlcs))
+					rfcomm_session_put(s);
 			break;
 		}
 	}
-- 
cgit v1.2.2


From 88644bb9fee591b2743a881923263bc28df4cded Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Wed, 19 Jan 2011 12:06:48 +0530
Subject: Revert "Bluetooth: Update sec_level/auth_type for already existing
 connections"

This reverts commit 045309820afe047920a50de25634dab46a1e851d. That
commit is wrong for two reasons:

- The conn->sec_level shouldn't be updated without performing
authentication first (as it's supposed to represent the level of
security that the existing connection has)

- A higher auth_type value doesn't mean "more secure" like the commit
seems to assume. E.g. dedicated bonding with MITM protection is 0x03
whereas general bonding without MITM protection is 0x04. hci_conn_auth
already takes care of updating conn->auth_type so hci_connect doesn't
need to do it.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_conn.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6b90a4191734..65a3fb5678eb 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -382,11 +382,6 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 		acl->sec_level = sec_level;
 		acl->auth_type = auth_type;
 		hci_acl_connect(acl);
-	} else {
-		if (acl->sec_level < sec_level)
-			acl->sec_level = sec_level;
-		if (acl->auth_type < auth_type)
-			acl->auth_type = auth_type;
 	}
 
 	if (type == ACL_LINK)
-- 
cgit v1.2.2


From 65cf686ee102b7eb0477a4bab82ff227071a0258 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Wed, 19 Jan 2011 12:06:49 +0530
Subject: Bluetooth: Fix MITM protection requirement preservation

If an existing connection has a MITM protection requirement (the first
bit of the auth_type) then that requirement should not be cleared by new
sockets that reuse the ACL but don't have that requirement.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_conn.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 65a3fb5678eb..fe712a89a856 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -442,6 +442,9 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
 	else if (conn->link_mode & HCI_LM_AUTH)
 		return 1;
 
+	/* Make sure we preserve an existing MITM requirement*/
+	auth_type |= (conn->auth_type & 0x01);
+
 	conn->auth_type = auth_type;
 
 	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
-- 
cgit v1.2.2


From 8556edd32f01c50a3c99e44dc2c3b1252ea59605 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Wed, 19 Jan 2011 12:06:50 +0530
Subject: Bluetooth: Create a unified auth_type evaluation function

The logic for determining the needed auth_type for an L2CAP socket is
rather complicated and has so far been duplicated in
l2cap_check_security as well as l2cap_do_connect. Additionally the
l2cap_check_security code was completely missing the handling of
SOCK_RAW type sockets. This patch creates a unified function for the
evaluation and makes l2cap_do_connect and l2cap_check_security use that
function.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 77 +++++++++++++++++++--------------------------------
 1 file changed, 28 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4fd88eb0a464..ae227bf25563 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -305,33 +305,44 @@ static void l2cap_chan_del(struct sock *sk, int err)
 	}
 }
 
-/* Service level security */
-static inline int l2cap_check_security(struct sock *sk)
+static inline u8 l2cap_get_auth_type(struct sock *sk)
 {
-	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
-	__u8 auth_type;
+	if (sk->sk_type == SOCK_RAW) {
+		switch (l2cap_pi(sk)->sec_level) {
+		case BT_SECURITY_HIGH:
+			return HCI_AT_DEDICATED_BONDING_MITM;
+		case BT_SECURITY_MEDIUM:
+			return HCI_AT_DEDICATED_BONDING;
+		default:
+			return HCI_AT_NO_BONDING;
+		}
+	} else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
+		if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
+			l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
 
-	if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
 		if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
-			auth_type = HCI_AT_NO_BONDING_MITM;
+			return HCI_AT_NO_BONDING_MITM;
 		else
-			auth_type = HCI_AT_NO_BONDING;
-
-		if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
-			l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
+			return HCI_AT_NO_BONDING;
 	} else {
 		switch (l2cap_pi(sk)->sec_level) {
 		case BT_SECURITY_HIGH:
-			auth_type = HCI_AT_GENERAL_BONDING_MITM;
-			break;
+			return HCI_AT_GENERAL_BONDING_MITM;
 		case BT_SECURITY_MEDIUM:
-			auth_type = HCI_AT_GENERAL_BONDING;
-			break;
+			return HCI_AT_GENERAL_BONDING;
 		default:
-			auth_type = HCI_AT_NO_BONDING;
-			break;
+			return HCI_AT_NO_BONDING;
 		}
 	}
+}
+
+/* Service level security */
+static inline int l2cap_check_security(struct sock *sk)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+	__u8 auth_type;
+
+	auth_type = l2cap_get_auth_type(sk);
 
 	return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level,
 								auth_type);
@@ -1068,39 +1079,7 @@ static int l2cap_do_connect(struct sock *sk)
 
 	err = -ENOMEM;
 
-	if (sk->sk_type == SOCK_RAW) {
-		switch (l2cap_pi(sk)->sec_level) {
-		case BT_SECURITY_HIGH:
-			auth_type = HCI_AT_DEDICATED_BONDING_MITM;
-			break;
-		case BT_SECURITY_MEDIUM:
-			auth_type = HCI_AT_DEDICATED_BONDING;
-			break;
-		default:
-			auth_type = HCI_AT_NO_BONDING;
-			break;
-		}
-	} else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
-		if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
-			auth_type = HCI_AT_NO_BONDING_MITM;
-		else
-			auth_type = HCI_AT_NO_BONDING;
-
-		if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
-			l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
-	} else {
-		switch (l2cap_pi(sk)->sec_level) {
-		case BT_SECURITY_HIGH:
-			auth_type = HCI_AT_GENERAL_BONDING_MITM;
-			break;
-		case BT_SECURITY_MEDIUM:
-			auth_type = HCI_AT_GENERAL_BONDING;
-			break;
-		default:
-			auth_type = HCI_AT_NO_BONDING;
-			break;
-		}
-	}
+	auth_type = l2cap_get_auth_type(sk);
 
 	hcon = hci_connect(hdev, ACL_LINK, dst,
 					l2cap_pi(sk)->sec_level, auth_type);
-- 
cgit v1.2.2


From d00ef24fc2923b65fdd440dc6445903e965841ac Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Wed, 19 Jan 2011 12:06:51 +0530
Subject: Bluetooth: Fix authentication request for L2CAP raw sockets

When there is an existing connection l2cap_check_security needs to be
called to ensure that the security level of the new socket is fulfilled.
Normally l2cap_do_start takes care of this, but that function doesn't
get called for SOCK_RAW type sockets. This patch adds the necessary
l2cap_check_security call to the appropriate branch in l2cap_do_connect.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ae227bf25563..7550abb0c96a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1106,7 +1106,8 @@ static int l2cap_do_connect(struct sock *sk)
 		if (sk->sk_type != SOCK_SEQPACKET &&
 				sk->sk_type != SOCK_STREAM) {
 			l2cap_sock_clear_timer(sk);
-			sk->sk_state = BT_CONNECTED;
+			if (l2cap_check_security(sk))
+				sk->sk_state = BT_CONNECTED;
 		} else
 			l2cap_do_start(sk);
 	}
-- 
cgit v1.2.2


From 765c2a964b49bd06b61a52991519281c85d82b67 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Wed, 19 Jan 2011 12:06:52 +0530
Subject: Bluetooth: Fix race condition with conn->sec_level

The conn->sec_level value is supposed to represent the current level of
security that the connection has. However, by assigning to it before
requesting authentication it will have the wrong value during the
authentication procedure. To fix this a pending_sec_level variable is
added which is used to track the desired security level while making
sure that sec_level always represents the current level of security.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_conn.c  | 8 ++++++--
 net/bluetooth/hci_event.c | 9 +++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index fe712a89a856..99cd8d9d891b 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -379,7 +379,8 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 	hci_conn_hold(acl);
 
 	if (acl->state == BT_OPEN || acl->state == BT_CLOSED) {
-		acl->sec_level = sec_level;
+		acl->sec_level = BT_SECURITY_LOW;
+		acl->pending_sec_level = sec_level;
 		acl->auth_type = auth_type;
 		hci_acl_connect(acl);
 	}
@@ -437,8 +438,11 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
 {
 	BT_DBG("conn %p", conn);
 
+	if (conn->pending_sec_level > sec_level)
+		sec_level = conn->pending_sec_level;
+
 	if (sec_level > conn->sec_level)
-		conn->sec_level = sec_level;
+		conn->pending_sec_level = sec_level;
 	else if (conn->link_mode & HCI_LM_AUTH)
 		return 1;
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 38100170d380..a290854fdaa6 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -692,13 +692,13 @@ static int hci_outgoing_auth_needed(struct hci_dev *hdev,
 	if (conn->state != BT_CONFIG || !conn->out)
 		return 0;
 
-	if (conn->sec_level == BT_SECURITY_SDP)
+	if (conn->pending_sec_level == BT_SECURITY_SDP)
 		return 0;
 
 	/* Only request authentication for SSP connections or non-SSP
 	 * devices with sec_level HIGH */
 	if (!(hdev->ssp_mode > 0 && conn->ssp_mode > 0) &&
-					conn->sec_level != BT_SECURITY_HIGH)
+				conn->pending_sec_level != BT_SECURITY_HIGH)
 		return 0;
 
 	return 1;
@@ -1095,9 +1095,10 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
 	if (conn) {
-		if (!ev->status)
+		if (!ev->status) {
 			conn->link_mode |= HCI_LM_AUTH;
-		else
+			conn->sec_level = conn->pending_sec_level;
+		} else
 			conn->sec_level = BT_SECURITY_LOW;
 
 		clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
-- 
cgit v1.2.2


From b8f3ab4290f1e720166e888ea2a1d1d44c4d15dd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 18 Jan 2011 12:40:38 -0800
Subject: Revert "netlink: test for all flags of the NLM_F_DUMP composite"

This reverts commit 0ab03c2b1478f2438d2c80204f7fef65b1bca9cf.

It breaks several things including the avahi daemon.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c                 | 2 +-
 net/ipv4/inet_diag.c                 | 2 +-
 net/netfilter/nf_conntrack_netlink.c | 4 ++--
 net/netlink/genetlink.c              | 2 +-
 net/xfrm/xfrm_user.c                 | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a5f7535aab5b..750db57f3bb3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1820,7 +1820,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
+	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
 		struct sock *rtnl;
 		rtnl_dumpit_func dumpit;
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2746c1fa6417..2ada17129fce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -858,7 +858,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	    nlmsg_len(nlh) < hdrlen)
 		return -EINVAL;
 
-	if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		if (nlmsg_attrlen(nlh, hdrlen)) {
 			struct nlattr *attr;
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2b7eef37875c..93297aaceb2b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -924,7 +924,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	u16 zone;
 	int err;
 
-	if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP)
+	if (nlh->nlmsg_flags & NLM_F_DUMP)
 		return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
 					  ctnetlink_done);
 
@@ -1787,7 +1787,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	u16 zone;
 	int err;
 
-	if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		return netlink_dump_start(ctnl, skb, nlh,
 					  ctnetlink_exp_dump_table,
 					  ctnetlink_exp_done);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f83cb370292b..1781d99145e2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -519,7 +519,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	    security_netlink_recv(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		if (ops->dumpit == NULL)
 			return -EOPNOTSUPP;
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d5e1e0b08890..61291965c5f6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2189,7 +2189,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
 	     type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
-	    (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
+	    (nlh->nlmsg_flags & NLM_F_DUMP)) {
 		if (link->dump == NULL)
 			return -EINVAL;
 
-- 
cgit v1.2.2


From d402786ea4f8433774a812d6b8635e737425cddd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 19 Jan 2011 00:51:36 +0000
Subject: net: fix can_checksum_protocol() arguments swap

commit 0363466866d901fbc (net offloading: Convert checksums to use
centrally computed features.) mistakenly swapped can_checksum_protocol()
arguments.

This broke IPv6 on bnx2 for instance, on NIC without TCPv6 checksum
offloads.

Reported-by: Hans de Bruin <jmdebruin@xmsnet.nl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4c58d11d3b68..8393ec408cd4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2001,7 +2001,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
 
 static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
 {
-	if (!can_checksum_protocol(protocol, features)) {
+	if (!can_checksum_protocol(features, protocol)) {
 		features &= ~NETIF_F_ALL_CSUM;
 		features &= ~NETIF_F_SG;
 	} else if (illegal_highdma(skb->dev, skb)) {
-- 
cgit v1.2.2


From 4580ccc04ddd8c17a470573a7fdb8def2e036dfa Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Tue, 18 Jan 2011 22:39:00 +0000
Subject: sctp: user perfect name for Delayed SACK Timer option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The option name of Delayed SACK Timer should be SCTP_DELAYED_SACK,
not SCTP_DELAYED_ACK.

Left SCTP_DELAYED_ACK be concomitant with SCTP_DELAYED_SACK,
for making compatibility with existing applications.

Reference:
8.1.19.  Get or Set Delayed SACK Timer (SCTP_DELAYED_SACK)
（http://tools.ietf.org/html/draft-ietf-tsvwg-sctpsocket-25)

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a09b0dd25f50..8e02550ff3e8 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3428,7 +3428,7 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen);
 		break;
 
-	case SCTP_DELAYED_ACK:
+	case SCTP_DELAYED_SACK:
 		retval = sctp_setsockopt_delayed_ack(sk, optval, optlen);
 		break;
 	case SCTP_PARTIAL_DELIVERY_POINT:
@@ -5333,7 +5333,7 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_peer_addr_params(sk, len, optval,
 							  optlen);
 		break;
-	case SCTP_DELAYED_ACK:
+	case SCTP_DELAYED_SACK:
 		retval = sctp_getsockopt_delayed_ack(sk, len, optval,
 							  optlen);
 		break;
-- 
cgit v1.2.2


From 6a108a14fa356ef607be308b68337939e56ea94e Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 20 Jan 2011 14:44:16 -0800
Subject: kconfig: rename CONFIG_EMBEDDED to CONFIG_EXPERT

The meaning of CONFIG_EMBEDDED has long since been obsoleted; the option
is used to configure any non-standard kernel with a much larger scope than
only small devices.

This patch renames the option to CONFIG_EXPERT in init/Kconfig and fixes
references to the option throughout the kernel.  A new CONFIG_EMBEDDED
option is added that automatically selects CONFIG_EXPERT when enabled and
can be used in the future to isolate options that should only be
considered for embedded systems (RISC architectures, SLOB, etc).

Calling the option "EXPERT" more accurately represents its intention: only
expert users who understand the impact of the configuration changes they
are making should enable it.

Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: David Woodhouse <david.woodhouse@intel.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Greg KH <gregkh@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Robin Holt <holt@sgi.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/mac80211/Kconfig | 6 +++---
 net/rfkill/Kconfig   | 4 ++--
 net/wireless/Kconfig | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 9109262abd24..c766056d0488 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -20,7 +20,7 @@ config MAC80211_HAS_RC
 	def_bool n
 
 config MAC80211_RC_PID
-	bool "PID controller based rate control algorithm" if EMBEDDED
+	bool "PID controller based rate control algorithm" if EXPERT
 	select MAC80211_HAS_RC
 	---help---
 	  This option enables a TX rate control algorithm for
@@ -28,14 +28,14 @@ config MAC80211_RC_PID
 	  rate.
 
 config MAC80211_RC_MINSTREL
-	bool "Minstrel" if EMBEDDED
+	bool "Minstrel" if EXPERT
 	select MAC80211_HAS_RC
 	default y
 	---help---
 	  This option enables the 'minstrel' TX rate control algorithm
 
 config MAC80211_RC_MINSTREL_HT
-	bool "Minstrel 802.11n support" if EMBEDDED
+	bool "Minstrel 802.11n support" if EXPERT
 	depends on MAC80211_RC_MINSTREL
 	default y
 	---help---
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index eaf765876458..7fce6dfd2180 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -18,7 +18,7 @@ config RFKILL_LEDS
 	default y
 
 config RFKILL_INPUT
-	bool "RF switch input support" if EMBEDDED
+	bool "RF switch input support" if EXPERT
 	depends on RFKILL
 	depends on INPUT = y || RFKILL = INPUT
-	default y if !EMBEDDED
+	default y if !EXPERT
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index d0ee29063e5d..1f1ef70f34f2 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -95,7 +95,7 @@ config CFG80211_DEBUGFS
 	  If unsure, say N.
 
 config CFG80211_INTERNAL_REGDB
-	bool "use statically compiled regulatory rules database" if EMBEDDED
+	bool "use statically compiled regulatory rules database" if EXPERT
 	default n
 	depends on CFG80211
 	---help---
-- 
cgit v1.2.2


From 9190b3b3208d052d98cb601fcc192f3f71a5658b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 20 Jan 2011 23:31:33 -0800
Subject: net_sched: accurate bytes/packets stats/rates

In commit 44b8288308ac9d (net_sched: pfifo_head_drop problem), we fixed
a problem with pfifo_head drops that incorrectly decreased
sch->bstats.bytes and sch->bstats.packets

Several qdiscs (CHOKe, SFQ, pfifo_head, ...) are able to drop a
previously enqueued packet, and bstats cannot be changed, so
bstats/rates are not accurate (over estimated)

This patch changes the qdisc_bstats updates to be done at dequeue() time
instead of enqueue() time. bstats counters no longer account for dropped
frames, and rates are more correct, since enqueue() bursts dont have
effect on dequeue() rate.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cbq.c    |  3 +--
 net/sched/sch_drr.c    |  2 +-
 net/sched/sch_dsmark.c |  2 +-
 net/sched/sch_fifo.c   |  5 +----
 net/sched/sch_hfsc.c   |  2 +-
 net/sched/sch_htb.c    | 12 +++++-------
 net/sched/sch_multiq.c |  2 +-
 net/sched/sch_netem.c  |  3 +--
 net/sched/sch_prio.c   |  2 +-
 net/sched/sch_red.c    | 11 ++++++-----
 net/sched/sch_sfq.c    |  5 ++---
 net/sched/sch_tbf.c    |  2 +-
 net/sched/sch_teql.c   |  3 ++-
 13 files changed, 24 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index c80d1c210c5d..5f63ec58942c 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -390,7 +390,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	ret = qdisc_enqueue(skb, cl->q);
 	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
-		qdisc_bstats_update(sch, skb);
 		cbq_mark_toplevel(q, cl);
 		if (!cl->next_alive)
 			cbq_activate_class(cl);
@@ -649,7 +648,6 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 		ret = qdisc_enqueue(skb, cl->q);
 		if (ret == NET_XMIT_SUCCESS) {
 			sch->q.qlen++;
-			qdisc_bstats_update(sch, skb);
 			if (!cl->next_alive)
 				cbq_activate_class(cl);
 			return 0;
@@ -971,6 +969,7 @@ cbq_dequeue(struct Qdisc *sch)
 
 		skb = cbq_dequeue_1(sch);
 		if (skb) {
+			qdisc_bstats_update(sch, skb);
 			sch->q.qlen--;
 			sch->flags &= ~TCQ_F_THROTTLED;
 			return skb;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index de55e642eafc..6b7fe4a84f13 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -376,7 +376,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	bstats_update(&cl->bstats, skb);
-	qdisc_bstats_update(sch, skb);
 
 	sch->q.qlen++;
 	return err;
@@ -403,6 +402,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
 			skb = qdisc_dequeue_peeked(cl->qdisc);
 			if (cl->qdisc->q.qlen == 0)
 				list_del(&cl->alist);
+			qdisc_bstats_update(sch, skb);
 			sch->q.qlen--;
 			return skb;
 		}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 60f4bdd4408e..0f7bf3fdfea5 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -260,7 +260,6 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 	}
 
-	qdisc_bstats_update(sch, skb);
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
@@ -283,6 +282,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	if (skb == NULL)
 		return NULL;
 
+	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 
 	index = skb->tc_index & (p->indices - 1);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index aa4d6337e43c..d468b479aa93 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -46,17 +46,14 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 {
-	struct sk_buff *skb_head;
 	struct fifo_sched_data *q = qdisc_priv(sch);
 
 	if (likely(skb_queue_len(&sch->q) < q->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
 	/* queue full, remove one skb to fulfill the limit */
-	skb_head = qdisc_dequeue_head(sch);
+	__qdisc_queue_drop_head(sch, &sch->q);
 	sch->qstats.drops++;
-	kfree_skb(skb_head);
-
 	qdisc_enqueue_tail(skb, sch);
 
 	return NET_XMIT_CN;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 2e45791d4f6c..14a799de1c35 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1600,7 +1600,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		set_active(cl, qdisc_pkt_len(skb));
 
 	bstats_update(&cl->bstats, skb);
-	qdisc_bstats_update(sch, skb);
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
@@ -1666,6 +1665,7 @@ hfsc_dequeue(struct Qdisc *sch)
 	}
 
 	sch->flags &= ~TCQ_F_THROTTLED;
+	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 
 	return skb;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 984c1b0c6836..fc12fe6f5597 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -574,7 +574,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	sch->q.qlen++;
-	qdisc_bstats_update(sch, skb);
 	return NET_XMIT_SUCCESS;
 }
 
@@ -842,7 +841,7 @@ next:
 
 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 {
-	struct sk_buff *skb = NULL;
+	struct sk_buff *skb;
 	struct htb_sched *q = qdisc_priv(sch);
 	int level;
 	psched_time_t next_event;
@@ -851,6 +850,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
 	skb = __skb_dequeue(&q->direct_queue);
 	if (skb != NULL) {
+ok:
+		qdisc_bstats_update(sch, skb);
 		sch->flags &= ~TCQ_F_THROTTLED;
 		sch->q.qlen--;
 		return skb;
@@ -884,11 +885,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 			int prio = ffz(m);
 			m |= 1 << prio;
 			skb = htb_dequeue_tree(q, prio, level);
-			if (likely(skb != NULL)) {
-				sch->q.qlen--;
-				sch->flags &= ~TCQ_F_THROTTLED;
-				goto fin;
-			}
+			if (likely(skb != NULL))
+				goto ok;
 		}
 	}
 	sch->qstats.overlimits++;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 21f13da24763..436a2e75b322 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -83,7 +83,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	ret = qdisc_enqueue(skb, qdisc);
 	if (ret == NET_XMIT_SUCCESS) {
-		qdisc_bstats_update(sch, skb);
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
@@ -112,6 +111,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 			qdisc = q->queues[q->curband];
 			skb = qdisc->dequeue(qdisc);
 			if (skb) {
+				qdisc_bstats_update(sch, skb);
 				sch->q.qlen--;
 				return skb;
 			}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1c4bce863479..6a3006b38dc5 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -240,7 +240,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	if (likely(ret == NET_XMIT_SUCCESS)) {
 		sch->q.qlen++;
-		qdisc_bstats_update(sch, skb);
 	} else if (net_xmit_drop_count(ret)) {
 		sch->qstats.drops++;
 	}
@@ -289,6 +288,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 				skb->tstamp.tv64 = 0;
 #endif
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
+			qdisc_bstats_update(sch, skb);
 			sch->q.qlen--;
 			return skb;
 		}
@@ -476,7 +476,6 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 		__skb_queue_after(list, skb, nskb);
 
 		sch->qstats.backlog += qdisc_pkt_len(nskb);
-		qdisc_bstats_update(sch, nskb);
 
 		return NET_XMIT_SUCCESS;
 	}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 966158d49dd1..fbd710d619bf 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -84,7 +84,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	ret = qdisc_enqueue(skb, qdisc);
 	if (ret == NET_XMIT_SUCCESS) {
-		qdisc_bstats_update(sch, skb);
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
@@ -116,6 +115,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
 		struct Qdisc *qdisc = q->queues[prio];
 		struct sk_buff *skb = qdisc->dequeue(qdisc);
 		if (skb) {
+			qdisc_bstats_update(sch, skb);
 			sch->q.qlen--;
 			return skb;
 		}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index a6009c5a2c97..9f98dbd32d4c 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -94,7 +94,6 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	ret = qdisc_enqueue(skb, child);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
-		qdisc_bstats_update(sch, skb);
 		sch->q.qlen++;
 	} else if (net_xmit_drop_count(ret)) {
 		q->stats.pdrop++;
@@ -114,11 +113,13 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)
 	struct Qdisc *child = q->qdisc;
 
 	skb = child->dequeue(child);
-	if (skb)
+	if (skb) {
+		qdisc_bstats_update(sch, skb);
 		sch->q.qlen--;
-	else if (!red_is_idling(&q->parms))
-		red_start_of_idle_period(&q->parms);
-
+	} else {
+		if (!red_is_idling(&q->parms))
+			red_start_of_idle_period(&q->parms);
+	}
 	return skb;
 }
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 239ec53a634d..edea8cefec6c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -402,10 +402,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->tail = slot;
 		slot->allot = q->scaled_quantum;
 	}
-	if (++sch->q.qlen <= q->limit) {
-		qdisc_bstats_update(sch, skb);
+	if (++sch->q.qlen <= q->limit)
 		return NET_XMIT_SUCCESS;
-	}
 
 	sfq_drop(sch);
 	return NET_XMIT_CN;
@@ -445,6 +443,7 @@ next_slot:
 	}
 	skb = slot_dequeue_head(slot);
 	sfq_dec(q, a);
+	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 77565e721811..e93165820c3f 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -134,7 +134,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	}
 
 	sch->q.qlen++;
-	qdisc_bstats_update(sch, skb);
 	return NET_XMIT_SUCCESS;
 }
 
@@ -187,6 +186,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 			q->ptokens = ptoks;
 			sch->q.qlen--;
 			sch->flags &= ~TCQ_F_THROTTLED;
+			qdisc_bstats_update(sch, skb);
 			return skb;
 		}
 
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 84ce48eadff4..d84e7329660f 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -87,7 +87,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	if (q->q.qlen < dev->tx_queue_len) {
 		__skb_queue_tail(&q->q, skb);
-		qdisc_bstats_update(sch, skb);
 		return NET_XMIT_SUCCESS;
 	}
 
@@ -111,6 +110,8 @@ teql_dequeue(struct Qdisc* sch)
 			dat->m->slaves = sch;
 			netif_wake_queue(m);
 		}
+	} else {
+		qdisc_bstats_update(sch, skb);
 	}
 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 	return skb;
-- 
cgit v1.2.2


From 577d6a7c3a0e273e115c65a148b71be6c1950f69 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 24 Jan 2011 14:32:52 -0600
Subject: module: fix missing semicolons in MODULE macro usage

You always needed them when you were a module, but the builtin versions
of the macros used to be more lenient.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 net/dsa/dsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0c877a74e1f4..3fb14b7c13cf 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -428,7 +428,7 @@ static void __exit dsa_cleanup_module(void)
 }
 module_exit(dsa_cleanup_module);
 
-MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>")
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>");
 MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:dsa");
-- 
cgit v1.2.2


From c71caf4114a0e1da3451cc92fba6a152929cd4c2 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 24 Jan 2011 19:01:07 +0100
Subject: netfilter: ctnetlink: fix missing refcount increment during dumps

In 13ee6ac netfilter: fix race in conntrack between dump_table and
destroy, we recovered spinlocks to protect the dump of the conntrack
table according to reports from Stephen and acknowledgments on the
issue from Eric.

In that patch, the refcount bump that allows to keep a reference
to the current ct object was removed. However, we still decrement
the refcount for that object in the output path of
ctnetlink_dump_table():

        if (last)
                nf_ct_put(last)

Cc: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 93297aaceb2b..eead9db6f899 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -667,6 +667,7 @@ restart:
 			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
 						cb->nlh->nlmsg_seq,
 						IPCTNL_MSG_CT_NEW, ct) < 0) {
+				nf_conntrack_get(&ct->ct_general);
 				cb->args[1] = (unsigned long)ct;
 				goto out;
 			}
-- 
cgit v1.2.2


From 08b5194b5d6485d12ebf24cf6ee389fc55691122 Mon Sep 17 00:00:00 2001
From: Thomas Jacob <jacob@internet24.de>
Date: Mon, 24 Jan 2011 21:35:36 +0100
Subject: netfilter: xt_iprange: Incorrect xt_iprange boundary check for IPv6

iprange_ipv6_sub was substracting 2 unsigned ints and then casting
the result to int to find out whether they are lt, eq or gt each
other, this doesn't work if the full 32 bits of each part
can be used in IPv6 addresses. Patch should remedy that without
significant performance penalties. Also number of ntohl
calls can be reduced this way (Jozsef Kadlecsik).

Signed-off-by: Thomas Jacob <jacob@internet24.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_iprange.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 88f7c3511c72..73c33a42f87f 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -53,15 +53,13 @@ iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
 }
 
 static inline int
-iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
+iprange_ipv6_lt(const struct in6_addr *a, const struct in6_addr *b)
 {
 	unsigned int i;
-	int r;
 
 	for (i = 0; i < 4; ++i) {
-		r = ntohl(a->s6_addr32[i]) - ntohl(b->s6_addr32[i]);
-		if (r != 0)
-			return r;
+		if (a->s6_addr32[i] != b->s6_addr32[i])
+			return ntohl(a->s6_addr32[i]) < ntohl(b->s6_addr32[i]);
 	}
 
 	return 0;
@@ -75,15 +73,15 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	bool m;
 
 	if (info->flags & IPRANGE_SRC) {
-		m  = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
-		m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
+		m  = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6);
+		m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr);
 		m ^= !!(info->flags & IPRANGE_SRC_INV);
 		if (m)
 			return false;
 	}
 	if (info->flags & IPRANGE_DST) {
-		m  = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
-		m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
+		m  = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6);
+		m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr);
 		m ^= !!(info->flags & IPRANGE_DST_INV);
 		if (m)
 			return false;
-- 
cgit v1.2.2


From c506653d35249bb4738bb139c24362e1ae724bc1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 24 Jan 2011 13:16:16 -0800
Subject: net: arp_ioctl() must hold RTNL

Commit 941666c2e3e0 "net: RCU conversion of dev_getbyhwaddr() and
arp_ioctl()" introduced a regression, reported by Jamie Heilman.
"arp -Ds 192.168.2.41 eth0 pub" triggered the ASSERT_RTNL() assert
in pneigh_lookup()

Removing RTNL requirement from arp_ioctl() was a mistake, just revert
that part.

Reported-by: Jamie Heilman <jamie@audible.transient.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c |  3 ++-
 net/ipv4/arp.c | 11 +++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8393ec408cd4..1e5077d2cca6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -749,7 +749,8 @@ EXPORT_SYMBOL(dev_get_by_index);
  *	@ha: hardware address
  *
  *	Search for an interface by MAC address. Returns NULL if the device
- *	is not found or a pointer to the device. The caller must hold RCU
+ *	is not found or a pointer to the device.
+ *	The caller must hold RCU or RTNL.
  *	The returned device has not had its ref count increased
  *	and the caller must therefore be careful about locking
  *
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 04c8b69fd426..7927589813b5 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1017,14 +1017,13 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
 		IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
 		return 0;
 	}
-	if (__in_dev_get_rcu(dev)) {
-		IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on);
+	if (__in_dev_get_rtnl(dev)) {
+		IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on);
 		return 0;
 	}
 	return -ENXIO;
 }
 
-/* must be called with rcu_read_lock() */
 static int arp_req_set_public(struct net *net, struct arpreq *r,
 		struct net_device *dev)
 {
@@ -1233,10 +1232,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	if (!(r.arp_flags & ATF_NETMASK))
 		((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
 							   htonl(0xFFFFFFFFUL);
-	rcu_read_lock();
+	rtnl_lock();
 	if (r.arp_dev[0]) {
 		err = -ENODEV;
-		dev = dev_get_by_name_rcu(net, r.arp_dev);
+		dev = __dev_get_by_name(net, r.arp_dev);
 		if (dev == NULL)
 			goto out;
 
@@ -1263,7 +1262,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		break;
 	}
 out:
-	rcu_read_unlock();
+	rtnl_unlock();
 	if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
 		err = -EFAULT;
 	return err;
-- 
cgit v1.2.2


From d1dc7abf2fafa34b0ffcd070fd59405aa9c0a4d8 Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Mon, 24 Jan 2011 12:08:48 +0000
Subject: GRO: fix merging a paged skb after non-paged skbs

Suppose that several linear skbs of the same flow were received by GRO. They
were thus merged into one skb with a frag_list. Then a new skb of the same flow
arrives, but it is a paged skb with data starting in its frags[].

Before adding the skb to the frag_list skb_gro_receive() will of course adjust
the skb to throw away the headers. It correctly modifies the page_offset and
size of the frag, but it leaves incorrect information in the skb:
 ->data_len is not decreased at all.
 ->len is decreased only by headlen, as if no change were done to the frag.
Later in a receiving process this causes skb_copy_datagram_iovec() to return
-EFAULT and this is seen in userspace as the result of the recv() syscall.

In practice the bug can be reproduced with the sfc driver. By default the
driver uses an adaptive scheme when it switches between using
napi_gro_receive() (with skbs) and napi_gro_frags() (with pages). The bug is
reproduced when under rx load with enough successful GRO merging the driver
decides to switch from the former to the latter.

Manual control is also possible, so reproducing this is easy with netcat:
 - on machine1 (with sfc): nc -l 12345 > /dev/null
 - on machine2: nc machine1 12345 < /dev/zero
 - on machine1:
   echo 1 > /sys/module/sfc/parameters/rx_alloc_method  # use skbs
   echo 2 > /sys/module/sfc/parameters/rx_alloc_method  # use pages
 - See that nc has quit suddenly.

[v2: Modified by Eric Dumazet to avoid advancing skb->data past the end
     and to use a temporary variable.]

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d31bb36ae0dc..7cd1bc86d591 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2744,8 +2744,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 merge:
 	if (offset > headlen) {
-		skbinfo->frags[0].page_offset += offset - headlen;
-		skbinfo->frags[0].size -= offset - headlen;
+		unsigned int eat = offset - headlen;
+
+		skbinfo->frags[0].page_offset += eat;
+		skbinfo->frags[0].size -= eat;
+		skb->data_len -= eat;
+		skb->len -= eat;
 		offset = headlen;
 	}
 
-- 
cgit v1.2.2


From 3408404a4c2a4eead9d73b0bbbfe3f225b65f492 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 24 Jan 2011 14:37:46 -0800
Subject: inetpeer: Use correct AVL tree base pointer in inet_getpeer().

Family was hard-coded to AF_INET but should be daddr->family.

This fixes crashes when unlinking ipv6 peer entries, since the
unlink code was looking up the base pointer properly.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index d9bc85751c74..a96e65674ac3 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -475,7 +475,7 @@ static int cleanup_once(unsigned long ttl)
 struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
 {
 	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
-	struct inet_peer_base *base = family_to_base(AF_INET);
+	struct inet_peer_base *base = family_to_base(daddr->family);
 	struct inet_peer *p;
 
 	/* Look up for the address quickly, lockless.
-- 
cgit v1.2.2


From fd0273c5033630b8673554cd39660435d1ab2ac4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 24 Jan 2011 14:41:20 -0800
Subject: tcp: fix bug in listening_get_next()

commit a8b690f98baf9fb19 (tcp: Fix slowness in read /proc/net/tcp)
introduced a bug in handling of SYN_RECV sockets.

st->offset represents number of sockets found since beginning of
listening_hash[st->bucket].

We should not reset st->offset when iterating through
syn_table[st->sbucket], or else if more than ~25 sockets (if
PAGE_SIZE=4096) are in SYN_RECV state, we exit from listening_get_next()
with a too small st->offset

Next time we enter tcp_seek_last_pos(), we are not able to seek past
already found sockets.

Reported-by: PK <runningdoglackey@yahoo.com>
CC: Tom Herbert <therbert@google.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 856f68466d49..02f583b3744a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1994,7 +1994,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 				}
 				req = req->dl_next;
 			}
-			st->offset = 0;
 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
 				break;
 get_req:
-- 
cgit v1.2.2


From 3dce38a02d6370dca690cd923619d4b00024b723 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Fri, 21 Jan 2011 16:35:18 +0000
Subject: dcbnl: make get_app handling symmetric for IEEE and CEE DCBx

The IEEE get/set app handlers use generic routines and do not
require the net_device to implement the dcbnl_ops routines. This
patch makes it symmetric so user space and drivers do not have
to handle the CEE version and IEEE DCBx versions differently.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index d900ab99814a..6b03f561caec 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -583,7 +583,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb,
 	u8 up, idtype;
 	int ret = -EINVAL;
 
-	if (!tb[DCB_ATTR_APP] || !netdev->dcbnl_ops->getapp)
+	if (!tb[DCB_ATTR_APP])
 		goto out;
 
 	ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP],
@@ -604,7 +604,16 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb,
 		goto out;
 
 	id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]);
-	up = netdev->dcbnl_ops->getapp(netdev, idtype, id);
+
+	if (netdev->dcbnl_ops->getapp) {
+		up = netdev->dcbnl_ops->getapp(netdev, idtype, id);
+	} else {
+		struct dcb_app app = {
+					.selector = idtype,
+					.protocol = id,
+				     };
+		up = dcb_getapp(netdev, &app);
+	}
 
 	/* send this back */
 	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-- 
cgit v1.2.2


From d80bc0fd262ef840ed4e82593ad6416fa1ba3fc4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 24 Jan 2011 16:01:58 -0800
Subject: ipv6: Always clone offlink routes.

Do not handle PMTU vs. route lookup creation any differently
wrt. offlink routes, always clone them.

Reported-by: PK <runningdoglackey@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 373bd0416f69..1534508f6c68 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -72,8 +72,6 @@
 #define RT6_TRACE(x...) do { ; } while (0)
 #endif
 
-#define CLONE_OFFLINK_ROUTE 0
-
 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
@@ -738,13 +736,8 @@ restart:
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
-	else {
-#if CLONE_OFFLINK_ROUTE
+	else
 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
-#else
-		goto out2;
-#endif
-	}
 
 	dst_release(&rt->dst);
 	rt = nrt ? : net->ipv6.ip6_null_entry;
-- 
cgit v1.2.2


From b7c7d01aaed1f71d9afe815a569f0a81465a1744 Mon Sep 17 00:00:00 2001
From: Eugene Teo <eugeneteo@kernel.org>
Date: Mon, 24 Jan 2011 21:05:17 -0800
Subject: net: clear heap allocation for ethtool_get_regs()

There is a conflict between commit b00916b1 and a77f5db3. This patch resolves
the conflict by clearing the heap allocation in ethtool_get_regs().

Cc: stable@kernel.org
Signed-off-by: Eugene Teo <eugeneteo@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 17741782a345..ff2302910b5e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -817,7 +817,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 	if (regs.len > reglen)
 		regs.len = reglen;
 
-	regbuf = vmalloc(reglen);
+	regbuf = vzalloc(reglen);
 	if (!regbuf)
 		return -ENOMEM;
 
-- 
cgit v1.2.2


From 778be232a207e79088ba70d832ac25dfea6fbf1a Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Tue, 25 Jan 2011 15:38:01 +0000
Subject: NFS do not find client in NFSv4 pg_authenticate

The information required to find the nfs_client cooresponding to the incoming
back channel request is contained in the NFS layer. Perform minimal checking
in the RPC layer pg_authenticate method, and push more detailed checking into
the NFS layer where the nfs_client can be found.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/svcsock.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7bd3bbba4710..d802e941d365 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1609,9 +1609,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
  */
 static void svc_bc_sock_free(struct svc_xprt *xprt)
 {
-	if (xprt) {
-		kfree(xprt->xpt_bc_sid);
+	if (xprt)
 		kfree(container_of(xprt, struct svc_sock, sk_xprt));
-	}
 }
 #endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.2


From 73a8bd74e2618990dbb218c3d82f53e60acd9af0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 23 Jan 2011 23:27:15 -0800
Subject: ipv6: Revert 'administrative down' address handling changes.

This reverts the following set of commits:

d1ed113f1669390da9898da3beddcc058d938587 ("ipv6: remove duplicate neigh_ifdown")
29ba5fed1bbd09c2cba890798c8f9eaab251401d ("ipv6: don't flush routes when setting loopback down")
9d82ca98f71fd686ef2f3017c5e3e6a4871b6e46 ("ipv6: fix missing in6_ifa_put in addrconf")
2de795707294972f6c34bae9de713e502c431296 ("ipv6: addrconf: don't remove address state on ifdown if the address is being kept")
8595805aafc8b077e01804c9a3668e9aa3510e89 ("IPv6: only notify protocols if address is compeletely gone")
27bdb2abcc5edb3526e25407b74bf17d1872c329 ("IPv6: keep tentative addresses in hash table")
93fa159abe50d3c55c7f83622d3f5c09b6e06f4b ("IPv6: keep route for tentative address")
8f37ada5b5f6bfb4d251a7f510f249cb855b77b3 ("IPv6: fix race between cleanup and add/delete address")
84e8b803f1e16f3a2b8b80f80a63fa2f2f8a9be6 ("IPv6: addrconf notify when address is unavailable")
dc2b99f71ef477a31020511876ab4403fb7c4420 ("IPv6: keep permanent addresses on admin down")

because the core semantic change to ipv6 address handling on ifdown
has broken some things, in particular "disable_ipv6" sysctl handling.

Stephen has made several attempts to get things back in working order,
but nothing has restored disable_ipv6 fully yet.

Reported-by: Eric W. Biederman <ebiederm@xmission.com>
Tested-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 81 ++++++++++++++++++++++-------------------------------
 1 file changed, 33 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 24a1cf110d80..fd6782e3a038 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2661,14 +2661,12 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	struct net *net = dev_net(dev);
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa;
-	LIST_HEAD(keep_list);
-	int state;
+	int state, i;
 
 	ASSERT_RTNL();
 
-	/* Flush routes if device is being removed or it is not loopback */
-	if (how || !(dev->flags & IFF_LOOPBACK))
-		rt6_ifdown(net, dev);
+	rt6_ifdown(net, dev);
+	neigh_ifdown(&nd_tbl, dev);
 
 	idev = __in6_dev_get(dev);
 	if (idev == NULL)
@@ -2689,6 +2687,23 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	}
 
+	/* Step 2: clear hash table */
+	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
+		struct hlist_head *h = &inet6_addr_lst[i];
+		struct hlist_node *n;
+
+		spin_lock_bh(&addrconf_hash_lock);
+	restart:
+		hlist_for_each_entry_rcu(ifa, n, h, addr_lst) {
+			if (ifa->idev == idev) {
+				hlist_del_init_rcu(&ifa->addr_lst);
+				addrconf_del_timer(ifa);
+				goto restart;
+			}
+		}
+		spin_unlock_bh(&addrconf_hash_lock);
+	}
+
 	write_lock_bh(&idev->lock);
 
 	/* Step 2: clear flags for stateless addrconf */
@@ -2722,52 +2737,23 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 				       struct inet6_ifaddr, if_list);
 		addrconf_del_timer(ifa);
 
-		/* If just doing link down, and address is permanent
-		   and not link-local, then retain it. */
-		if (!how &&
-		    (ifa->flags&IFA_F_PERMANENT) &&
-		    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
-			list_move_tail(&ifa->if_list, &keep_list);
-
-			/* If not doing DAD on this address, just keep it. */
-			if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
-			    idev->cnf.accept_dad <= 0 ||
-			    (ifa->flags & IFA_F_NODAD))
-				continue;
+		list_del(&ifa->if_list);
 
-			/* If it was tentative already, no need to notify */
-			if (ifa->flags & IFA_F_TENTATIVE)
-				continue;
+		write_unlock_bh(&idev->lock);
 
-			/* Flag it for later restoration when link comes up */
-			ifa->flags |= IFA_F_TENTATIVE;
-			ifa->state = INET6_IFADDR_STATE_DAD;
-		} else {
-			list_del(&ifa->if_list);
-
-			/* clear hash table */
-			spin_lock_bh(&addrconf_hash_lock);
-			hlist_del_init_rcu(&ifa->addr_lst);
-			spin_unlock_bh(&addrconf_hash_lock);
-
-			write_unlock_bh(&idev->lock);
-			spin_lock_bh(&ifa->state_lock);
-			state = ifa->state;
-			ifa->state = INET6_IFADDR_STATE_DEAD;
-			spin_unlock_bh(&ifa->state_lock);
-
-			if (state != INET6_IFADDR_STATE_DEAD) {
-				__ipv6_ifa_notify(RTM_DELADDR, ifa);
-				atomic_notifier_call_chain(&inet6addr_chain,
-							   NETDEV_DOWN, ifa);
-			}
+		spin_lock_bh(&ifa->state_lock);
+		state = ifa->state;
+		ifa->state = INET6_IFADDR_STATE_DEAD;
+		spin_unlock_bh(&ifa->state_lock);
 
-			in6_ifa_put(ifa);
-			write_lock_bh(&idev->lock);
+		if (state != INET6_IFADDR_STATE_DEAD) {
+			__ipv6_ifa_notify(RTM_DELADDR, ifa);
+			atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
 		}
-	}
+		in6_ifa_put(ifa);
 
-	list_splice(&keep_list, &idev->addr_list);
+		write_lock_bh(&idev->lock);
+	}
 
 	write_unlock_bh(&idev->lock);
 
@@ -4156,8 +4142,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		dst_hold(&ifp->rt->dst);
 
-		if (ifp->state == INET6_IFADDR_STATE_DEAD &&
-		    ip6_del_rt(ifp->rt))
+		if (ip6_del_rt(ifp->rt))
 			dst_free(&ifp->rt->dst);
 		break;
 	}
-- 
cgit v1.2.2


From eb3e554b4b3a56386ef5214dbe0e3935a350178b Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 24 Jan 2011 19:28:49 +0100
Subject: mac80211: fix a crash in ieee80211_beacon_get_tim on change_interface

Some drivers (e.g. ath9k) do not always disable beacons when they're
supposed to. When an interface is changed using the change_interface op,
the mode specific sdata part is in an undefined state and trying to
get a beacon at this point can produce weird crashes.

To fix this, add a check for ieee80211_sdata_running before using
anything from the sdata.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5950e3abead9..b64b42bc774b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2230,6 +2230,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 
 	sdata = vif_to_sdata(vif);
 
+	if (!ieee80211_sdata_running(sdata))
+		goto out;
+
 	if (tim_offset)
 		*tim_offset = 0;
 	if (tim_length)
-- 
cgit v1.2.2


From 44f5324b5d13ef2187729d949eca442689627f39 Mon Sep 17 00:00:00 2001
From: Jerry Chu <hkchu@google.com>
Date: Tue, 25 Jan 2011 13:46:30 -0800
Subject: TCP: fix a bug that triggers large number of TCP RST by mistake

This patch fixes a bug that causes TCP RST packets to be generated
on otherwise correctly behaved applications, e.g., no unread data
on close,..., etc. To trigger the bug, at least two conditions must
be met:

1. The FIN flag is set on the last data packet, i.e., it's not on a
separate, FIN only packet.
2. The size of the last data chunk on the receive side matches
exactly with the size of buffer posted by the receiver, and the
receiver closes the socket without any further read attempt.

This bug was first noticed on our netperf based testbed for our IW10
proposal to IETF where a large number of RST packets were observed.
netperf's read side code meets the condition 2 above 100%.

Before the fix, tcp_data_queue() will queue the last skb that meets
condition 1 to sk_receive_queue even though it has fully copied out
(skb_copy_datagram_iovec()) the data. Then if condition 2 is also met,
tcp_recvmsg() often returns all the copied out data successfully
without actually consuming the skb, due to a check
"if ((chunk = len - tp->ucopy.len) != 0) {"
and
"len -= chunk;"
after tcp_prequeue_process() that causes "len" to become 0 and an
early exit from the big while loop.

I don't see any reason not to free the skb whose data have been fully
consumed in tcp_data_queue(), regardless of the FIN flag.  We won't
get there if MSG_PEEK is on. Am I missing some arcane cases related
to urgent data?

Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2549b29b062d..eb7f82ebf4a3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4399,7 +4399,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
 				tp->ucopy.len -= chunk;
 				tp->copied_seq += chunk;
-				eaten = (chunk == skb->len && !th->fin);
+				eaten = (chunk == skb->len);
 				tcp_rcv_space_adjust(sk);
 			}
 			local_bh_disable();
-- 
cgit v1.2.2


From dd58ddc6928f711d8fb7101182215a0f23cf41f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Tue, 25 Jan 2011 21:56:16 +0000
Subject: batman-adv: Fix kernel panic when fetching vis data on a vis server
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hash_iterate removal introduced a bug leading to a kernel panic when
fetching the vis data on a vis server. That commit forgot to rename one
variable name, which this commit fixes now.

Reported-by: Russell Senior <russell@personaltelco.net>
Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
---
 net/batman-adv/vis.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index cd4c4231fa48..f69a3748f8ae 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -268,10 +268,10 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
 				buff_pos += sprintf(buff + buff_pos, "%pM,",
 						entry->addr);
 
-				for (i = 0; i < packet->entries; i++)
+				for (j = 0; j < packet->entries; j++)
 					buff_pos += vis_data_read_entry(
 							buff + buff_pos,
-							&entries[i],
+							&entries[j],
 							entry->addr,
 							entry->primary);
 
-- 
cgit v1.2.2


From 7cc2edb83447775a34ed3bf9d29d8295a434b523 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 26 Jan 2011 13:41:03 -0800
Subject: xfrm6: Don't forget to propagate peer into ipsec route.

Like ipv4, we have to propagate the ipv6 route peer into
the ipsec top-level route during instantiation.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_policy.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 7e74023ea6e4..da87428681cc 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -98,6 +98,10 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	if (!xdst->u.rt6.rt6i_idev)
 		return -ENODEV;
 
+	xdst->u.rt6.rt6i_peer = rt->rt6i_peer;
+	if (rt->rt6i_peer)
+		atomic_inc(&rt->rt6i_peer->refcnt);
+
 	/* Sheit... I remember I did this right. Apparently,
 	 * it was magically lost, so this code needs audit */
 	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
@@ -216,6 +220,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
 
 	if (likely(xdst->u.rt6.rt6i_idev))
 		in6_dev_put(xdst->u.rt6.rt6i_idev);
+	if (likely(xdst->u.rt6.rt6i_peer))
+		inet_putpeer(xdst->u.rt6.rt6i_peer);
 	xfrm_dst_destroy(xdst);
 }
 
-- 
cgit v1.2.2


From 389f2a18c6a2a5531ac5a155c3ba25784065b1cb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 26 Jan 2011 00:04:18 +0000
Subject: econet: remove compiler warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/econet/af_econet.c: In function ‘econet_sendmsg’:
net/econet/af_econet.c:494: warning: label ‘error’ defined but not used
net/econet/af_econet.c:268: warning: unused variable ‘sk’

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Phil Blundell <philb@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 15dcc1a586b4..0c2826337919 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -265,13 +265,13 @@ static void ec_tx_done(struct sk_buff *skb, int result)
 static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 			  struct msghdr *msg, size_t len)
 {
-	struct sock *sk = sock->sk;
 	struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name;
 	struct net_device *dev;
 	struct ec_addr addr;
 	int err;
 	unsigned char port, cb;
 #if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
+	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	struct ec_cb *eb;
 #endif
@@ -488,10 +488,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 error_free_buf:
 	vfree(userbuf);
+error:
 #else
 	err = -EPROTOTYPE;
 #endif
-	error:
 	mutex_unlock(&econet_mutex);
 
 	return err;
-- 
cgit v1.2.2


From 6d3a9a685465986d7653c5abbc0b24681e7c44d7 Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <kurt.van.dijck@eia.be>
Date: Wed, 26 Jan 2011 04:55:24 +0000
Subject: net: fix validate_link_af in rtnetlink core

I'm testing an API that uses IFLA_AF_SPEC attribute.
In the rtnetlink core , the set_link_af() member
of the rtnl_af_ops struct receives the nested attribute
(as I expected), but the validate_link_af() member
receives the parent attribute.
IMO, this patch fixes this.

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 750db57f3bb3..31459ef13ca2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1121,8 +1121,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 				return -EOPNOTSUPP;
 
 			if (af_ops->validate_link_af) {
-				err = af_ops->validate_link_af(dev,
-							tb[IFLA_AF_SPEC]);
+				err = af_ops->validate_link_af(dev, af);
 				if (err < 0)
 					return err;
 			}
-- 
cgit v1.2.2


From c2aa3665cf8510b1665ee2f5a9525cf7be6dec4f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 25 Jan 2011 23:18:38 +0000
Subject: net: add kmemcheck annotation in __alloc_skb()

pskb_expand_head() triggers a kmemcheck warning when copy of
skb_shared_info is done in pskb_expand_head()

This is because destructor_arg field is not necessarily initialized at
this point. Add kmemcheck_annotate_variable() call in __alloc_skb() to
instruct kmemcheck this is a normal situation.

Resolves bugzilla.kernel.org 27212

Reference: https://bugzilla.kernel.org/show_bug.cgi?id=27212
Reported-by: Christian Casteyde <casteyde.christian@free.fr>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7cd1bc86d591..d883dcc78b6b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -210,6 +210,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	shinfo = skb_shinfo(skb);
 	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
+	kmemcheck_annotate_variable(shinfo->destructor_arg);
 
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
-- 
cgit v1.2.2


From 8f2771f2b85aea4d0f9a0137ad3b63d1173c0962 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 27 Jan 2011 14:55:22 -0800
Subject: ipv6: Remove route peer binding assertions.

They are bogus.  The basic idea is that I wanted to make sure
that prefixed routes never bind to peers.

The test I used was whether RTF_CACHE was set.

But first of all, the RTF_CACHE flag is set at different spots
depending upon which ip6_rt_copy() caller you're talking about.

I've validated all of the code paths, and even in the future
where we bind peers more aggressively (for route metric COW'ing)
we never bind to prefix'd routes, only fully specified ones.
This even applies when addrconf or icmp6 routes are allocated.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1534508f6c68..28a85fc63cb8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -194,7 +194,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 		in6_dev_put(idev);
 	}
 	if (peer) {
-		BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
 		rt->rt6i_peer = NULL;
 		inet_putpeer(peer);
 	}
@@ -204,9 +203,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
 {
 	struct inet_peer *peer;
 
-	if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
-		return;
-
 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
-- 
cgit v1.2.2


From 66c46d741e2e60f0e8b625b80edb0ab820c46d7a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 29 Jan 2011 20:44:54 -0800
Subject: gro: Reset dev pointer on reuse

On older kernels the VLAN code may zero skb->dev before dropping
it and causing it to be reused by GRO.

Unfortunately we didn't reset skb->dev in that case which causes
the next GRO user to get a bogus skb->dev pointer.

This particular problem no longer happens with the current upstream
kernel due to changes in VLAN processing.

However, for correctness we should still reset the skb->dev pointer
in the GRO reuse function in case a future user does the same thing.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 24ea2d71e7ea..93e44dbf6793 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3424,6 +3424,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 	__skb_pull(skb, skb_headlen(skb));
 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
 	skb->vlan_tci = 0;
+	skb->dev = napi->dev;
 
 	napi->skb = skb;
 }
-- 
cgit v1.2.2


From 13ad17745c2cbd437d9e24b2d97393e0be11c439 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 29 Jan 2011 14:57:22 +0000
Subject: net: Fix ip link add netns oops

Ed Swierk <eswierk@bigswitch.com> writes:
> On 2.6.35.7
>  ip link add link eth0 netns 9999 type macvlan
> where 9999 is a nonexistent PID triggers an oops and causes all network functions to hang:
> [10663.821898] BUG: unable to handle kernel NULL pointer dereference at 000000000000006d
>  [10663.821917] IP: [<ffffffff8149c2fa>] __dev_alloc_name+0x9a/0x170
>  [10663.821933] PGD 1d3927067 PUD 22f5c5067 PMD 0
>  [10663.821944] Oops: 0000 [#1] SMP
>  [10663.821953] last sysfs file: /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq
>  [10663.821959] CPU 3
>  [10663.821963] Modules linked in: macvlan ip6table_filter ip6_tables rfcomm ipt_MASQUERADE binfmt_misc iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack sco ipt_REJECT bnep l2cap xt_tcpudp iptable_filter ip_tables x_tables bridge stp vboxnetadp vboxnetflt vboxdrv kvm_intel kvm parport_pc ppdev snd_hda_codec_intelhdmi snd_hda_codec_conexant arc4 iwlagn iwlcore mac80211 snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_seq_midi snd_rawmidi i915 snd_seq_midi_event snd_seq thinkpad_acpi drm_kms_helper btusb tpm_tis nvram uvcvideo snd_timer snd_seq_device bluetooth videodev v4l1_compat v4l2_compat_ioctl32 tpm drm tpm_bios snd cfg80211 psmouse serio_raw intel_ips soundcore snd_page_alloc intel_agp i2c_algo_bit video output netconsole configfs lp parport usbhid hid e1000e sdhci_pci ahci libahci sdhci led_class
>  [10663.822155]
>  [10663.822161] Pid: 6000, comm: ip Not tainted 2.6.35-23-generic #41-Ubuntu 2901CTO/2901CTO
>  [10663.822167] RIP: 0010:[<ffffffff8149c2fa>] [<ffffffff8149c2fa>] __dev_alloc_name+0x9a/0x170
>  [10663.822177] RSP: 0018:ffff88014aebf7b8 EFLAGS: 00010286
>  [10663.822182] RAX: 00000000fffffff4 RBX: ffff8801ad900800 RCX: 0000000000000000
>  [10663.822187] RDX: ffff880000000000 RSI: 0000000000000000 RDI: ffff88014ad63000
>  [10663.822191] RBP: ffff88014aebf808 R08: 0000000000000041 R09: 0000000000000041
>  [10663.822196] R10: 0000000000000000 R11: dead000000200200 R12: ffff88014aebf818
>  [10663.822201] R13: fffffffffffffffd R14: ffff88014aebf918 R15: ffff88014ad62000
>  [10663.822207] FS: 00007f00c487f700(0000) GS:ffff880001f80000(0000) knlGS:0000000000000000
>  [10663.822212] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>  [10663.822216] CR2: 000000000000006d CR3: 0000000231f19000 CR4: 00000000000026e0
>  [10663.822221] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>  [10663.822226] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
>  [10663.822231] Process ip (pid: 6000, threadinfo ffff88014aebe000, task ffff88014afb16e0)
>  [10663.822236] Stack:
>  [10663.822240] ffff88014aebf808 ffffffff814a2bb5 ffff88014aebf7e8 00000000a00ee8d6
>  [10663.822251] <0> 0000000000000000 ffffffffa00ef940 ffff8801ad900800 ffff88014aebf818
>  [10663.822265] <0> ffff88014aebf918 ffff8801ad900800 ffff88014aebf858 ffffffff8149c413
>  [10663.822281] Call Trace:
>  [10663.822290] [<ffffffff814a2bb5>] ? dev_addr_init+0x75/0xb0
>  [10663.822298] [<ffffffff8149c413>] dev_alloc_name+0x43/0x90
>  [10663.822307] [<ffffffff814a85ee>] rtnl_create_link+0xbe/0x1b0
>  [10663.822314] [<ffffffff814ab2aa>] rtnl_newlink+0x48a/0x570
>  [10663.822321] [<ffffffff814aafcc>] ? rtnl_newlink+0x1ac/0x570
>  [10663.822332] [<ffffffff81030064>] ? native_x2apic_icr_read+0x4/0x20
>  [10663.822339] [<ffffffff814a8c17>] rtnetlink_rcv_msg+0x177/0x290
>  [10663.822346] [<ffffffff814a8aa0>] ? rtnetlink_rcv_msg+0x0/0x290
>  [10663.822354] [<ffffffff814c25d9>] netlink_rcv_skb+0xa9/0xd0
>  [10663.822360] [<ffffffff814a8a85>] rtnetlink_rcv+0x25/0x40
>  [10663.822367] [<ffffffff814c223e>] netlink_unicast+0x2de/0x2f0
>  [10663.822374] [<ffffffff814c303e>] netlink_sendmsg+0x1fe/0x2e0
>  [10663.822383] [<ffffffff81488533>] sock_sendmsg+0xf3/0x120
>  [10663.822391] [<ffffffff815899fe>] ? _raw_spin_lock+0xe/0x20
>  [10663.822400] [<ffffffff81168656>] ? __d_lookup+0x136/0x150
>  [10663.822406] [<ffffffff815899fe>] ? _raw_spin_lock+0xe/0x20
>  [10663.822414] [<ffffffff812b7a0d>] ? _atomic_dec_and_lock+0x4d/0x80
>  [10663.822422] [<ffffffff8116ea90>] ? mntput_no_expire+0x30/0x110
>  [10663.822429] [<ffffffff81486ff5>] ? move_addr_to_kernel+0x65/0x70
>  [10663.822435] [<ffffffff81493308>] ? verify_iovec+0x88/0xe0
>  [10663.822442] [<ffffffff81489020>] sys_sendmsg+0x240/0x3a0
> [10663.822450] [<ffffffff8111e2a9>] ? __do_fault+0x479/0x560
>  [10663.822457] [<ffffffff815899fe>] ? _raw_spin_lock+0xe/0x20
>  [10663.822465] [<ffffffff8116cf4a>] ? alloc_fd+0x10a/0x150
>  [10663.822473] [<ffffffff8158d76e>] ? do_page_fault+0x15e/0x350
>  [10663.822482] [<ffffffff8100a0f2>] system_call_fastpath+0x16/0x1b
>  [10663.822487] Code: 90 48 8d 78 02 be 25 00 00 00 e8 92 1d e2 ff 48 85 c0 75 cf bf 20 00 00 00 e8 c3 b1 c6 ff 49 89 c7 b8 f4 ff ff ff 4d 85 ff 74 bd <4d> 8b 75 70 49 8d 45 70 48 89 45 b8 49 83 ee 58 eb 28 48 8d 55
>  [10663.822618] RIP [<ffffffff8149c2fa>] __dev_alloc_name+0x9a/0x170
>  [10663.822627] RSP <ffff88014aebf7b8>
>  [10663.822631] CR2: 000000000000006d
>  [10663.822636] ---[ end trace 3dfd6c3ad5327ca7 ]---

This bug was introduced in:
commit 81adee47dfb608df3ad0b91d230fb3cef75f0060
Author: Eric W. Biederman <ebiederm@aristanetworks.com>
Date:   Sun Nov 8 00:53:51 2009 -0800

    net: Support specifying the network namespace upon device creation.

    There is no good reason to not support userspace specifying the
    network namespace during device creation, and it makes it easier
    to create a network device and pass it to a child network namespace
    with a well known name.

    We have to be careful to ensure that the target network namespace
    for the new device exists through the life of the call.  To keep
    that logic clear I have factored out the network namespace grabbing
    logic into rtnl_link_get_net.

    In addtion we need to continue to pass the source network namespace
    to the rtnl_link_ops.newlink method so that we can find the base
    device source network namespace.

    Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
    Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Where apparently I forgot to add error handling to the path where we create
a new network device in a new network namespace, and pass in an invalid pid.

Cc: stable@kernel.org
Reported-by: Ed Swierk <eswierk@bigswitch.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 31459ef13ca2..2d65c6bb24c1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1671,6 +1671,9 @@ replay:
 			snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
 
 		dest_net = rtnl_link_get_net(net, tb);
+		if (IS_ERR(dest_net))
+			return PTR_ERR(dest_net);
+
 		dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
 
 		if (IS_ERR(dev))
-- 
cgit v1.2.2


From 709b46e8d90badda1898caea50483c12af178e96 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 29 Jan 2011 16:15:56 +0000
Subject: net: Add compat ioctl support for the ipv4 multicast ioctl
 SIOCGETSGCNT

SIOCGETSGCNT is not a unique ioctl value as it it maps tio SIOCPROTOPRIVATE +1,
which unfortunately means the existing infrastructure for compat networking
ioctls is insufficient.  A trivial compact ioctl implementation would conflict
with:

SIOCAX25ADDUID
SIOCAIPXPRISLT
SIOCGETSGCNT_IN6
SIOCGETSGCNT
SIOCRSSCAUSE
SIOCX25SSUBSCRIP
SIOCX25SDTEFACILITIES

To make this work I have updated the compat_ioctl decode path to mirror the
the normal ioctl decode path.  I have added an ipv4 inet_compat_ioctl function
so that I can have ipv4 specific compat ioctls.   I have added a compat_ioctl
function into struct proto so I can break out ioctls by which kind of ip socket
I am using.  I have added a compat_raw_ioctl function because SIOCGETSGCNT only
works on raw sockets.  I have added a ipmr_compat_ioctl that mirrors the normal
ipmr_ioctl.

This was necessary because unfortunately the struct layout for the SIOCGETSGCNT
has unsigned longs in it so changes between 32bit and 64bit kernels.

This change was sufficient to run a 32bit ip multicast routing daemon on a
64bit kernel.

Reported-by: Bill Fenner <fenner@aristanetworks.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 16 ++++++++++++++++
 net/ipv4/ipmr.c    | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/raw.c     | 19 +++++++++++++++++++
 3 files changed, 81 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f2b61107df6c..45b89d7bda5a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -880,6 +880,19 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 }
 EXPORT_SYMBOL(inet_ioctl);
 
+#ifdef CONFIG_COMPAT
+int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	int err = -ENOIOCTLCMD;
+
+	if (sk->sk_prot->compat_ioctl)
+		err = sk->sk_prot->compat_ioctl(sk, cmd, arg);
+
+	return err;
+}
+#endif
+
 const struct proto_ops inet_stream_ops = {
 	.family		   = PF_INET,
 	.owner		   = THIS_MODULE,
@@ -903,6 +916,7 @@ const struct proto_ops inet_stream_ops = {
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
+	.compat_ioctl	   = inet_compat_ioctl,
 #endif
 };
 EXPORT_SYMBOL(inet_stream_ops);
@@ -929,6 +943,7 @@ const struct proto_ops inet_dgram_ops = {
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
+	.compat_ioctl	   = inet_compat_ioctl,
 #endif
 };
 EXPORT_SYMBOL(inet_dgram_ops);
@@ -959,6 +974,7 @@ static const struct proto_ops inet_sockraw_ops = {
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
+	.compat_ioctl	   = inet_compat_ioctl,
 #endif
 };
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3f3a9afd73e0..7e41ac0b9260 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -60,6 +60,7 @@
 #include <linux/notifier.h>
 #include <linux/if_arp.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/compat.h>
 #include <net/ipip.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
@@ -1434,6 +1435,51 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	}
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_sioc_sg_req {
+	struct in_addr src;
+	struct in_addr grp;
+	compat_ulong_t pktcnt;
+	compat_ulong_t bytecnt;
+	compat_ulong_t wrong_if;
+};
+
+int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+	struct sioc_sg_req sr;
+	struct mfc_cache *c;
+	struct net *net = sock_net(sk);
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
+
+	switch (cmd) {
+	case SIOCGETSGCNT:
+		if (copy_from_user(&sr, arg, sizeof(sr)))
+			return -EFAULT;
+
+		rcu_read_lock();
+		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
+		if (c) {
+			sr.pktcnt = c->mfc_un.res.pkt;
+			sr.bytecnt = c->mfc_un.res.bytes;
+			sr.wrong_if = c->mfc_un.res.wrong_if;
+			rcu_read_unlock();
+
+			if (copy_to_user(arg, &sr, sizeof(sr)))
+				return -EFAULT;
+			return 0;
+		}
+		rcu_read_unlock();
+		return -EADDRNOTAVAIL;
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+#endif
+
 
 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a3d5ab786e81..6390ba299b3d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -76,6 +76,7 @@
 #include <linux/seq_file.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/compat.h>
 
 static struct raw_hashinfo raw_v4_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
@@ -838,6 +839,23 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
 	}
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case SIOCOUTQ:
+	case SIOCINQ:
+		return -ENOIOCTLCMD;
+	default:
+#ifdef CONFIG_IP_MROUTE
+		return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg));
+#else
+		return -ENOIOCTLCMD;
+#endif
+	}
+}
+#endif
+
 struct proto raw_prot = {
 	.name		   = "RAW",
 	.owner		   = THIS_MODULE,
@@ -860,6 +878,7 @@ struct proto raw_prot = {
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_raw_setsockopt,
 	.compat_getsockopt = compat_raw_getsockopt,
+	.compat_ioctl	   = compat_raw_ioctl,
 #endif
 };
 
-- 
cgit v1.2.2


From 2674c15870f888cb732a564fc504ce17654afc64 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 28 Jan 2011 18:34:05 +0100
Subject: batman-adv: Remove vis info on hashing errors

A newly created vis info object must be removed when it couldn't be
added to the hash. The old_info which has to be replaced was already
removed and isn't related to the hash anymore.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
---
 net/batman-adv/vis.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index f69a3748f8ae..0be55beba054 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -444,7 +444,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv,
 			      info);
 	if (hash_added < 0) {
 		/* did not work (for some reason) */
-		kref_put(&old_info->refcount, free_info);
+		kref_put(&info->refcount, free_info);
 		info = NULL;
 	}
 
-- 
cgit v1.2.2


From dda9fc6b2c59f056e7a2b313b8423b14a4df25a9 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 28 Jan 2011 18:34:06 +0100
Subject: batman-adv: Remove vis info element in free_info

The free_info function will be called when no reference to the info
object exists anymore. It must be ensured that the allocated memory
gets freed and not only the elements which are managed by the info
object.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
---
 net/batman-adv/vis.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 0be55beba054..988296cdf7c5 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -64,6 +64,7 @@ static void free_info(struct kref *ref)
 
 	spin_unlock_bh(&bat_priv->vis_list_lock);
 	kfree_skb(info->skb_packet);
+	kfree(info);
 }
 
 /* Compare two vis packets, used by the hashing algorithm */
-- 
cgit v1.2.2


From 1181e1daace88018b2ff66592aa10a4791d705ff Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 28 Jan 2011 18:34:07 +0100
Subject: batman-adv: Make vis info stack traversal threadsafe

The batman-adv vis server has to a stack which stores all information
about packets which should be send later. This stack is protected
with a spinlock that is used to prevent concurrent write access to it.

The send_vis_packets function has to take all elements from the stack
and send them to other hosts over the primary interface. The send will
be initiated without the lock which protects the stack.

The implementation using list_for_each_entry_safe has the problem that
it stores the next element as "safe ptr" to allow the deletion of the
current element in the list. The list may be modified during the
unlock/lock pair in the loop body which may make the safe pointer
not pointing to correct next element.

It is safer to remove and use the first element from the stack until no
elements are available. This does not need reduntant information which
would have to be validated each time the lock was removed.

Reported-by: Russell Senior <russell@personaltelco.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
---
 net/batman-adv/vis.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 988296cdf7c5..de1022cacaf7 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -816,7 +816,7 @@ static void send_vis_packets(struct work_struct *work)
 		container_of(work, struct delayed_work, work);
 	struct bat_priv *bat_priv =
 		container_of(delayed_work, struct bat_priv, vis_work);
-	struct vis_info *info, *temp;
+	struct vis_info *info;
 
 	spin_lock_bh(&bat_priv->vis_hash_lock);
 	purge_vis_packets(bat_priv);
@@ -826,8 +826,9 @@ static void send_vis_packets(struct work_struct *work)
 		send_list_add(bat_priv, bat_priv->my_vis_info);
 	}
 
-	list_for_each_entry_safe(info, temp, &bat_priv->vis_send_list,
-				 send_list) {
+	while (!list_empty(&bat_priv->vis_send_list)) {
+		info = list_first_entry(&bat_priv->vis_send_list,
+					typeof(*info), send_list);
 
 		kref_get(&info->refcount);
 		spin_unlock_bh(&bat_priv->vis_hash_lock);
-- 
cgit v1.2.2


From ec831ea72ee5d7d473899e27a86bd659482c4d0d Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Mon, 31 Jan 2011 13:16:00 -0800
Subject: net: Add default_mtu() methods to blackhole dst_ops

When an IPSEC SA is still being set up, __xfrm_lookup() will return
-EREMOTE and so ip_route_output_flow() will return a blackhole route.
This can happen in a sndmsg call, and after d33e455337ea ("net: Abstract
default MTU metric calculation behind an accessor.") this leads to a
crash in ip_append_data() because the blackhole dst_ops have no
default_mtu() method and so dst_mtu() calls a NULL pointer.

Fix this by adding default_mtu() methods (that simply return 0, matching
the old behavior) to the blackhole dst_ops.

The IPv4 part of this patch fixes a crash that I saw when using an IPSEC
VPN; the IPv6 part is untested because I don't have an IPv6 VPN, but it
looks to be needed as well.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 6 ++++++
 net/ipv6/route.c | 6 ++++++
 2 files changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 351dc4e85242..788a3e74834e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2707,6 +2707,11 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
 	return NULL;
 }
 
+static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst)
+{
+	return 0;
+}
+
 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
@@ -2716,6 +2721,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.protocol		=	cpu_to_be16(ETH_P_IP),
 	.destroy		=	ipv4_dst_destroy,
 	.check			=	ipv4_blackhole_dst_check,
+	.default_mtu		=	ipv4_blackhole_default_mtu,
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
 };
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 28a85fc63cb8..1c29f95695de 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -113,6 +113,11 @@ static struct dst_ops ip6_dst_ops_template = {
 	.local_out		=	__ip6_local_out,
 };
 
+static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
+{
+	return 0;
+}
+
 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
@@ -122,6 +127,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 	.destroy		=	ip6_dst_destroy,
 	.check			=	ip6_dst_check,
+	.default_mtu		=	ip6_blackhole_default_mtu,
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 };
 
-- 
cgit v1.2.2


From 8587523640441a9ff2564ebc6efeb39497ad6709 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 31 Jan 2011 16:23:42 -0800
Subject: net: Check rps_flow_table when RPS map length is 1

In get_rps_cpu, add check that the rps_flow_table for the device is
NULL when trying to take fast path when RPS map length is one.
Without this, RFS is effectively disabled if map length is one which
is not correct.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 93e44dbf6793..4c907895876b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2563,7 +2563,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 
 	map = rcu_dereference(rxqueue->rps_map);
 	if (map) {
-		if (map->len == 1) {
+		if (map->len == 1 &&
+		    !rcu_dereference_raw(rxqueue->rps_flow_table)) {
 			tcpu = map->cpus[0];
 			if (cpu_online(tcpu))
 				cpu = tcpu;
-- 
cgit v1.2.2


From bf36076a67db6d7423d09d861a072337866f0dd9 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 31 Jan 2011 20:54:17 -0800
Subject: net: Fix ipv6 neighbour unregister_sysctl_table warning

In my testing of 2.6.37 I was occassionally getting a warning about
sysctl table entries being unregistered in the wrong order.  Digging
in it turns out this dates back to the last great sysctl reorg done
where Al Viro introduced the requirement that sysctl directories
needed to be created before and destroyed after the files in them.

It turns out that in that great reorg /proc/sys/net/ipv6/neigh was
overlooked.  So this patch fixes that oversight and makes an annoying
warning message go away.

>------------[ cut here ]------------
>WARNING: at kernel/sysctl.c:1992 unregister_sysctl_table+0x134/0x164()
>Pid: 23951, comm: kworker/u:3 Not tainted 2.6.37-350888.2010AroraKernelBeta.fc14.x86_64 #1
>Call Trace:
> [<ffffffff8103e034>] warn_slowpath_common+0x80/0x98
> [<ffffffff8103e061>] warn_slowpath_null+0x15/0x17
> [<ffffffff810452f8>] unregister_sysctl_table+0x134/0x164
> [<ffffffff810e7834>] ? kfree+0xc4/0xd1
> [<ffffffff813439b2>] neigh_sysctl_unregister+0x22/0x3a
> [<ffffffffa02cd14e>] addrconf_ifdown+0x33f/0x37b [ipv6]
> [<ffffffff81331ec2>] ? skb_dequeue+0x5f/0x6b
> [<ffffffffa02ce4a5>] addrconf_notify+0x69b/0x75c [ipv6]
> [<ffffffffa02eb953>] ? ip6mr_device_event+0x98/0xa9 [ipv6]
> [<ffffffff813d2413>] notifier_call_chain+0x32/0x5e
> [<ffffffff8105bdea>] raw_notifier_call_chain+0xf/0x11
> [<ffffffff8133cdac>] call_netdevice_notifiers+0x45/0x4a
> [<ffffffff8133d2b0>] rollback_registered_many+0x118/0x201
> [<ffffffff8133d3af>] unregister_netdevice_many+0x16/0x6d
> [<ffffffff8133d571>] default_device_exit_batch+0xa4/0xb8
> [<ffffffff81337c42>] ? cleanup_net+0x0/0x194
> [<ffffffff81337a2a>] ops_exit_list+0x4e/0x56
> [<ffffffff81337d36>] cleanup_net+0xf4/0x194
> [<ffffffff81053318>] process_one_work+0x187/0x280
> [<ffffffff8105441b>] worker_thread+0xff/0x19f
> [<ffffffff8105431c>] ? worker_thread+0x0/0x19f
> [<ffffffff8105776d>] kthread+0x7d/0x85
> [<ffffffff81003824>] kernel_thread_helper+0x4/0x10
> [<ffffffff810576f0>] ? kthread+0x0/0x85
> [<ffffffff81003820>] ? kernel_thread_helper+0x0/0x10
>---[ end trace 8a7e9310b35e9486 ]---

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sysctl_net_ipv6.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index fa1d8f4e0051..7cb65ef79f9c 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -15,6 +15,8 @@
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
 
+static struct ctl_table empty[1];
+
 static ctl_table ipv6_table_template[] = {
 	{
 		.procname	= "route",
@@ -35,6 +37,12 @@ static ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "neigh",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= empty,
+	},
 	{ }
 };
 
@@ -152,7 +160,6 @@ static struct ctl_table_header *ip6_base;
 
 int ipv6_static_sysctl_register(void)
 {
-	static struct ctl_table empty[1];
 	ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty);
 	if (ip6_base == NULL)
 		return -ENOMEM;
-- 
cgit v1.2.2


From 9d0db8b6b1da9e3d4c696ef29449700c58d589db Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 1 Feb 2011 16:03:46 +0100
Subject: netfilter: arpt_mangle: fix return values of checkentry

In 135367b "netfilter: xtables: change xt_target.checkentry return type",
the type returned by checkentry was changed from boolean to int, but the
return values where not adjusted.

arptables: Input/output error

This broke arptables with the mangle target since it returns true
under success, which is interpreted by xtables as >0, thus
returning EIO.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arpt_mangle.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index b8ddcc480ed9..a5e52a9f0a12 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -60,12 +60,12 @@ static int checkentry(const struct xt_tgchk_param *par)
 
 	if (mangle->flags & ~ARPT_MANGLE_MASK ||
 	    !(mangle->flags & ARPT_MANGLE_MASK))
-		return false;
+		return -EINVAL;
 
 	if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
 	   mangle->target != XT_CONTINUE)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target arpt_mangle_reg __read_mostly = {
-- 
cgit v1.2.2


From 3db7e93d3308fb882884b9f024235d6fbf542034 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 1 Feb 2011 16:06:30 +0100
Subject: netfilter: ecache: always set events bits, filter them later

For the following rule:

iptables -I PREROUTING -t raw -j CT --ctevents assured

The event delivered looks like the following:

 [UPDATE] tcp      6 src=192.168.0.2 dst=192.168.1.2 sport=37041 dport=80 src=192.168.1.2 dst=192.168.1.100 sport=80 dport=37041 [ASSURED]

Note that the TCP protocol state is not included. For that reason
the CT event filtering is not very useful for conntrackd.

To resolve this issue, instead of conditionally setting the CT events
bits based on the ctmask, we always set them and perform the filtering
in the late stage, just before the delivery.

Thus, the event delivered looks like the following:

 [UPDATE] tcp      6 432000 ESTABLISHED src=192.168.0.2 dst=192.168.1.2 sport=37041 dport=80 src=192.168.1.2 dst=192.168.1.100 sport=80 dport=37041 [ASSURED]

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_ecache.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 5702de35e2bb..63a1b915a7e4 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -63,6 +63,9 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
 		 * this does not harm and it happens very rarely. */
 		unsigned long missed = e->missed;
 
+		if (!((events | missed) & e->ctmask))
+			goto out_unlock;
+
 		ret = notify->fcn(events | missed, &item);
 		if (unlikely(ret < 0 || missed)) {
 			spin_lock_bh(&ct->lock);
-- 
cgit v1.2.2


From 4334ec8518cec3f7a4feeb3dacb46acfb24904d4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 2 Feb 2011 16:58:06 +0100
Subject: mac80211: fix TX status cookie in HW offload case

When the off-channel TX is done with remain-on-channel
offloaded to hardware, the reported cookie is wrong as
in that case we shouldn't use the SKB as the cookie but
need to instead use the corresponding r-o-c cookie
(XOR'ed with 2 to prevent API mismatches).

Fix this by keeping track of the hw_roc_skb pointer
just for the status processing and use the correct
cookie to report in this case. We can't use the
hw_roc_skb pointer itself because it is NULL'ed when
the frame is transmitted to prevent it being used
twice.

This fixes a bug where the P2P state machine in the
supplicant gets stuck because it never gets a correct
result for its transmitted frame.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         | 2 ++
 net/mac80211/ieee80211_i.h | 2 +-
 net/mac80211/status.c      | 7 ++++++-
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4bc8a9250cfd..9cd73b11506e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1822,6 +1822,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 		*cookie ^= 2;
 		IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN;
 		local->hw_roc_skb = skb;
+		local->hw_roc_skb_for_status = skb;
 		mutex_unlock(&local->mtx);
 
 		return 0;
@@ -1875,6 +1876,7 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
 		if (ret == 0) {
 			kfree_skb(local->hw_roc_skb);
 			local->hw_roc_skb = NULL;
+			local->hw_roc_skb_for_status = NULL;
 		}
 
 		mutex_unlock(&local->mtx);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c47d7c0e48a4..533fd32f49ff 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -953,7 +953,7 @@ struct ieee80211_local {
 
 	struct ieee80211_channel *hw_roc_channel;
 	struct net_device *hw_roc_dev;
-	struct sk_buff *hw_roc_skb;
+	struct sk_buff *hw_roc_skb, *hw_roc_skb_for_status;
 	struct work_struct hw_roc_start, hw_roc_done;
 	enum nl80211_channel_type hw_roc_channel_type;
 	unsigned int hw_roc_duration;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 38a797217a91..071ac95c4aa0 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -323,6 +323,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) {
 		struct ieee80211_work *wk;
+		u64 cookie = (unsigned long)skb;
 
 		rcu_read_lock();
 		list_for_each_entry_rcu(wk, &local->work_list, list) {
@@ -334,8 +335,12 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			break;
 		}
 		rcu_read_unlock();
+		if (local->hw_roc_skb_for_status == skb) {
+			cookie = local->hw_roc_cookie ^ 2;
+			local->hw_roc_skb_for_status = NULL;
+		}
 		cfg80211_mgmt_tx_status(
-			skb->dev, (unsigned long) skb, skb->data, skb->len,
+			skb->dev, cookie, skb->data, skb->len,
 			!!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
 	}
 
-- 
cgit v1.2.2


From 6d152e23ad1a7a5b40fef1f42e017d66e6115159 Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <andy@greyhouse.net>
Date: Wed, 2 Feb 2011 14:53:25 -0800
Subject: gro: reset skb_iif on reuse

Like Herbert's change from a few days ago:

66c46d741e2e60f0e8b625b80edb0ab820c46d7a gro: Reset dev pointer on reuse

this may not be necessary at this point, but we should still clean up
the skb->skb_iif.  If not we may end up with an invalid valid for
skb->skb_iif when the skb is reused and the check is done in
__netif_receive_skb.

Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4c907895876b..b6d0bf875a8e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3426,6 +3426,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
 	skb->vlan_tci = 0;
 	skb->dev = napi->dev;
+	skb->skb_iif = 0;
 
 	napi->skb = skb;
 }
-- 
cgit v1.2.2


From 0033d5ad27a6db33a55ff39951d3ec61a8c13b89 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 3 Feb 2011 17:21:31 -0800
Subject: net: Fix bug in compat SIOCGETSGCNT handling.

Commit 709b46e8d90badda1898caea50483c12af178e96 ("net: Add compat
ioctl support for the ipv4 multicast ioctl SIOCGETSGCNT") added the
correct plumbing to handle SIOCGETSGCNT properly.

However, whilst definiting a proper "struct compat_sioc_sg_req" it
isn't actually used in ipmr_compat_ioctl().

Correct this oversight.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7e41ac0b9260..fce10a080bd6 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1446,7 +1446,7 @@ struct compat_sioc_sg_req {
 
 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 {
-	struct sioc_sg_req sr;
+	struct compat_sioc_sg_req sr;
 	struct mfc_cache *c;
 	struct net *net = sock_net(sk);
 	struct mr_table *mrt;
-- 
cgit v1.2.2


From ca6b8bb097c8e0ab6bce4fa04584074dee17c0d9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 3 Feb 2011 17:24:28 -0800
Subject: net: Support compat SIOCGETVIFCNT ioctl in ipv4.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index fce10a080bd6..8b65a12654e7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1444,9 +1444,19 @@ struct compat_sioc_sg_req {
 	compat_ulong_t wrong_if;
 };
 
+struct compat_sioc_vif_req {
+	vifi_t	vifi;		/* Which iface */
+	compat_ulong_t icount;
+	compat_ulong_t ocount;
+	compat_ulong_t ibytes;
+	compat_ulong_t obytes;
+};
+
 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 {
 	struct compat_sioc_sg_req sr;
+	struct compat_sioc_vif_req vr;
+	struct vif_device *vif;
 	struct mfc_cache *c;
 	struct net *net = sock_net(sk);
 	struct mr_table *mrt;
@@ -1456,6 +1466,26 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		return -ENOENT;
 
 	switch (cmd) {
+	case SIOCGETVIFCNT:
+		if (copy_from_user(&vr, arg, sizeof(vr)))
+			return -EFAULT;
+		if (vr.vifi >= mrt->maxvif)
+			return -EINVAL;
+		read_lock(&mrt_lock);
+		vif = &mrt->vif_table[vr.vifi];
+		if (VIF_EXISTS(mrt, vr.vifi)) {
+			vr.icount = vif->pkt_in;
+			vr.ocount = vif->pkt_out;
+			vr.ibytes = vif->bytes_in;
+			vr.obytes = vif->bytes_out;
+			read_unlock(&mrt_lock);
+
+			if (copy_to_user(arg, &vr, sizeof(vr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
 	case SIOCGETSGCNT:
 		if (copy_from_user(&sr, arg, sizeof(sr)))
 			return -EFAULT;
-- 
cgit v1.2.2


From e2d57766e6744f2956975dd2086d82957187b0f6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 3 Feb 2011 17:59:32 -0800
Subject: net: Provide compat support for SIOCGETMIFCNT_IN6 and
 SIOCGETSGCNT_IN6.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/raw.c   | 19 ++++++++++++++
 2 files changed, 94 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9fab274019c0..0e1d53bcf1e0 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -34,6 +34,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/compat.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -1804,6 +1805,80 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	}
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_sioc_sg_req6 {
+	struct sockaddr_in6 src;
+	struct sockaddr_in6 grp;
+	compat_ulong_t pktcnt;
+	compat_ulong_t bytecnt;
+	compat_ulong_t wrong_if;
+};
+
+struct compat_sioc_mif_req6 {
+	mifi_t	mifi;
+	compat_ulong_t icount;
+	compat_ulong_t ocount;
+	compat_ulong_t ibytes;
+	compat_ulong_t obytes;
+};
+
+int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+	struct compat_sioc_sg_req6 sr;
+	struct compat_sioc_mif_req6 vr;
+	struct mif_device *vif;
+	struct mfc6_cache *c;
+	struct net *net = sock_net(sk);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
+
+	switch (cmd) {
+	case SIOCGETMIFCNT_IN6:
+		if (copy_from_user(&vr, arg, sizeof(vr)))
+			return -EFAULT;
+		if (vr.mifi >= mrt->maxvif)
+			return -EINVAL;
+		read_lock(&mrt_lock);
+		vif = &mrt->vif6_table[vr.mifi];
+		if (MIF_EXISTS(mrt, vr.mifi)) {
+			vr.icount = vif->pkt_in;
+			vr.ocount = vif->pkt_out;
+			vr.ibytes = vif->bytes_in;
+			vr.obytes = vif->bytes_out;
+			read_unlock(&mrt_lock);
+
+			if (copy_to_user(arg, &vr, sizeof(vr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	case SIOCGETSGCNT_IN6:
+		if (copy_from_user(&sr, arg, sizeof(sr)))
+			return -EFAULT;
+
+		read_lock(&mrt_lock);
+		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+		if (c) {
+			sr.pktcnt = c->mfc_un.res.pkt;
+			sr.bytecnt = c->mfc_un.res.bytes;
+			sr.wrong_if = c->mfc_un.res.wrong_if;
+			read_unlock(&mrt_lock);
+
+			if (copy_to_user(arg, &sr, sizeof(sr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+#endif
 
 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
 {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 86c39526ba5e..c5b0915d106b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -31,6 +31,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 #include <linux/skbuff.h>
+#include <linux/compat.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
@@ -1157,6 +1158,23 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
 	}
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_rawv6_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case SIOCOUTQ:
+	case SIOCINQ:
+		return -ENOIOCTLCMD;
+	default:
+#ifdef CONFIG_IPV6_MROUTE
+		return ip6mr_compat_ioctl(sk, cmd, compat_ptr(arg));
+#else
+		return -ENOIOCTLCMD;
+#endif
+	}
+}
+#endif
+
 static void rawv6_close(struct sock *sk, long timeout)
 {
 	if (inet_sk(sk)->inet_num == IPPROTO_RAW)
@@ -1215,6 +1233,7 @@ struct proto rawv6_prot = {
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_rawv6_setsockopt,
 	.compat_getsockopt = compat_rawv6_getsockopt,
+	.compat_ioctl	   = compat_rawv6_ioctl,
 #endif
 };
 
-- 
cgit v1.2.2


From 1158f762e57c1cdcda631c1c5f339e4853caa82b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 4 Feb 2011 13:02:36 -0800
Subject: bridge: Don't put partly initialized fdb into hash

The fdb_create() puts a new fdb into hash with only addr set. This is
not good, since there are callers, that search the hash w/o the lock
and access all the other its fields.

Applies to current netdev tree.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 2872393b2939..88485cc74dc3 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -328,12 +328,12 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 	fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
 	if (fdb) {
 		memcpy(fdb->addr.addr, addr, ETH_ALEN);
-		hlist_add_head_rcu(&fdb->hlist, head);
-
 		fdb->dst = source;
 		fdb->is_local = is_local;
 		fdb->is_static = is_local;
 		fdb->ageing_timer = jiffies;
+
+		hlist_add_head_rcu(&fdb->hlist, head);
 	}
 	return fdb;
 }
-- 
cgit v1.2.2


From fc7c976dc7a565213393ce700d4a6105f037bf20 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 7 Feb 2011 12:05:00 +0100
Subject: mac80211: fix the skb cloned check in the tx path

Using skb_header_cloned to check if it's safe to write to the skb is not
enough - mac80211 also touches the tailroom of the skb.
Initially this check was only used to increase a counter, however this
commit changed the code to also skip skb data reallocation if no extra
head/tailroom was needed:

commit 4cd06a344db752f513437138953af191cbe9a691
mac80211: skip unnecessary pskb_expand_head calls

It added a regression at least with iwl3945, which is fixed by this patch.

Reported-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Tested-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b64b42bc774b..b0beaa58246b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1547,7 +1547,7 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
 		skb_orphan(skb);
 	}
 
-	if (skb_header_cloned(skb))
+	if (skb_cloned(skb))
 		I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
 	else if (head_need || tail_need)
 		I802_DEBUG_INC(local->tx_expand_skb_head);
-- 
cgit v1.2.2


From 95c3043008ca8449feb96aba5481fe31c2ea750b Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Mon, 7 Feb 2011 00:08:15 +0000
Subject: x25: possible skb leak on bad facilities

Originally x25_parse_facilities returned
-1 for an error
 0 meaning 0 length facilities
>0 the length of the facilities parsed.

5ef41308f94dc ("x25: Prevent crashing when parsing bad X.25 facilities") introduced more
error checking in x25_parse_facilities however used 0 to indicate bad parsing
a6331d6f9a429 ("memory corruption in X.25 facilities parsing") followed this further for
DTE facilities, again using 0 for bad parsing.

The meaning of 0 got confused in the callers.
If the facilities are messed up we can't determine where the data starts.
So patch makes all parsing errors return -1 and ensures callers close and don't use the skb further.

Reported-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_facilities.c | 28 +++++++++++++++++++---------
 net/x25/x25_in.c         | 14 +++++++++++---
 2 files changed, 30 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 55187c8f6420..406207515b5e 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -27,9 +27,19 @@
 #include <net/sock.h>
 #include <net/x25.h>
 
-/*
- * Parse a set of facilities into the facilities structures. Unrecognised
- *	facilities are written to the debug log file.
+/**
+ * x25_parse_facilities - Parse facilities from skb into the facilities structs
+ *
+ * @skb: sk_buff to parse
+ * @facilities: Regular facilites, updated as facilities are found
+ * @dte_facs: ITU DTE facilities, updated as DTE facilities are found
+ * @vc_fac_mask: mask is updated with all facilities found
+ *
+ * Return codes:
+ *  -1 - Parsing error, caller should drop call and clean up
+ *   0 - Parse OK, this skb has no facilities
+ *  >0 - Parse OK, returns the length of the facilities header
+ *
  */
 int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 		struct x25_dte_facilities *dte_facs, unsigned long *vc_fac_mask)
@@ -62,7 +72,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 		switch (*p & X25_FAC_CLASS_MASK) {
 		case X25_FAC_CLASS_A:
 			if (len < 2)
-				return 0;
+				return -1;
 			switch (*p) {
 			case X25_FAC_REVERSE:
 				if((p[1] & 0x81) == 0x81) {
@@ -107,7 +117,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 			break;
 		case X25_FAC_CLASS_B:
 			if (len < 3)
-				return 0;
+				return -1;
 			switch (*p) {
 			case X25_FAC_PACKET_SIZE:
 				facilities->pacsize_in  = p[1];
@@ -130,7 +140,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 			break;
 		case X25_FAC_CLASS_C:
 			if (len < 4)
-				return 0;
+				return -1;
 			printk(KERN_DEBUG "X.25: unknown facility %02X, "
 			       "values %02X, %02X, %02X\n",
 			       p[0], p[1], p[2], p[3]);
@@ -139,18 +149,18 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 			break;
 		case X25_FAC_CLASS_D:
 			if (len < p[1] + 2)
-				return 0;
+				return -1;
 			switch (*p) {
 			case X25_FAC_CALLING_AE:
 				if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
-					return 0;
+					return -1;
 				dte_facs->calling_len = p[2];
 				memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
 				*vc_fac_mask |= X25_MASK_CALLING_AE;
 				break;
 			case X25_FAC_CALLED_AE:
 				if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
-					return 0;
+					return -1;
 				dte_facs->called_len = p[2];
 				memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
 				*vc_fac_mask |= X25_MASK_CALLED_AE;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index f729f022be69..15de65f04719 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -91,10 +91,10 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 {
 	struct x25_address source_addr, dest_addr;
 	int len;
+	struct x25_sock *x25 = x25_sk(sk);
 
 	switch (frametype) {
 		case X25_CALL_ACCEPTED: {
-			struct x25_sock *x25 = x25_sk(sk);
 
 			x25_stop_timer(sk);
 			x25->condition = 0x00;
@@ -113,14 +113,16 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 						&dest_addr);
 			if (len > 0)
 				skb_pull(skb, len);
+			else if (len < 0)
+				goto out_clear;
 
 			len = x25_parse_facilities(skb, &x25->facilities,
 						&x25->dte_facilities,
 						&x25->vc_facil_mask);
 			if (len > 0)
 				skb_pull(skb, len);
-			else
-				return -1;
+			else if (len < 0)
+				goto out_clear;
 			/*
 			 *	Copy any Call User Data.
 			 */
@@ -144,6 +146,12 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 	}
 
 	return 0;
+
+out_clear:
+	x25_write_internal(sk, X25_CLEAR_REQUEST);
+	x25->state = X25_STATE_2;
+	x25_start_t23timer(sk);
+	return 0;
 }
 
 /*
-- 
cgit v1.2.2


From 531c9da8c854c5b075383253a57fdd4e0be82e99 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 6 Feb 2011 23:26:43 +0000
Subject: batman-adv: Linearize fragment packets before merge

We access the data inside the skbs of two fragments directly using memmove
during the merge. The data of the skb could span over multiple skb pages. An
direct access without knowledge about the pages would lead to an invalid memory
access.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
[lindner_marek@yahoo.de: Move return from function to the end]
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/unicast.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index ee41fef04b21..d1a611322549 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -50,12 +50,12 @@ static struct sk_buff *frag_merge_packet(struct list_head *head,
 		skb = tfp->skb;
 	}
 
+	if (skb_linearize(skb) < 0 || skb_linearize(tmp_skb) < 0)
+		goto err;
+
 	skb_pull(tmp_skb, sizeof(struct unicast_frag_packet));
-	if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) {
-		/* free buffered skb, skb will be freed later */
-		kfree_skb(tfp->skb);
-		return NULL;
-	}
+	if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0)
+		goto err;
 
 	/* move free entry to end */
 	tfp->skb = NULL;
@@ -70,6 +70,11 @@ static struct sk_buff *frag_merge_packet(struct list_head *head,
 	unicast_packet->packet_type = BAT_UNICAST;
 
 	return skb;
+
+err:
+	/* free buffered skb, skb will be freed later */
+	kfree_skb(tfp->skb);
+	return NULL;
 }
 
 static void frag_create_entry(struct list_head *head, struct sk_buff *skb)
-- 
cgit v1.2.2


From b2df5a8446e135f7648736b8bec8179c88ce360d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Feb 2011 14:31:31 -0800
Subject: net/caif: Fix dangling list pointer in freed object on error.

rtnl_link_ops->setup(), and the "setup" callback passed to alloc_netdev*(),
cannot make state changes which need to be undone on failure.  There is
no cleanup mechanism available at this point.

So we have to add the caif private instance to the global list once we
are sure that register_netdev() has succedded in ->newlink().

Otherwise, if register_netdev() fails, the caller will invoke free_netdev()
and we will have a reference to freed up memory on the chnl_net_list.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/chnl_net.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index fa9dab372b68..6008d6dc18a0 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -394,9 +394,7 @@ static void ipcaif_net_setup(struct net_device *dev)
 	priv->conn_req.sockaddr.u.dgm.connection_id = -1;
 	priv->flowenabled = false;
 
-	ASSERT_RTNL();
 	init_waitqueue_head(&priv->netmgmt_wq);
-	list_add(&priv->list_field, &chnl_net_list);
 }
 
 
@@ -453,6 +451,8 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
 	ret = register_netdevice(dev);
 	if (ret)
 		pr_warn("device rtml registration failed\n");
+	else
+		list_add(&caifdev->list_field, &chnl_net_list);
 	return ret;
 }
 
-- 
cgit v1.2.2


From 8d3bdbd55a7e2a3f2c148a4830aa26dd682b21c4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Feb 2011 15:02:50 -0800
Subject: net: Fix lockdep regression caused by initializing netdev queues too
 early.

In commit aa9421041128abb4d269ee1dc502ff65fb3b7d69 ("net: init ingress
queue") we moved the allocation and lock initialization of the queues
into alloc_netdev_mq() since register_netdevice() is way too late.

The problem is that dev->type is not setup until the setup()
callback is invoked by alloc_netdev_mq(), and the dev->type is
what determines the lockdep class to use for the locks in the
queues.

Fix this by doing the queue allocation after the setup() callback
runs.

This is safe because the setup() callback is not allowed to make any
state changes that need to be undone on error (memory allocations,
etc.).  It may, however, make state changes that are undone by
free_netdev() (such as netif_napi_add(), which is done by the
ipoib driver's setup routine).

The previous code also leaked a reference to the &init_net namespace
object on RX/TX queue allocation failures.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index b6d0bf875a8e..8e726cb47ed7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5660,30 +5660,35 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	dev_net_set(dev, &init_net);
 
+	dev->gso_max_size = GSO_MAX_SIZE;
+
+	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
+	dev->ethtool_ntuple_list.count = 0;
+	INIT_LIST_HEAD(&dev->napi_list);
+	INIT_LIST_HEAD(&dev->unreg_list);
+	INIT_LIST_HEAD(&dev->link_watch_list);
+	dev->priv_flags = IFF_XMIT_DST_RELEASE;
+	setup(dev);
+
 	dev->num_tx_queues = txqs;
 	dev->real_num_tx_queues = txqs;
 	if (netif_alloc_netdev_queues(dev))
-		goto free_pcpu;
+		goto free_all;
 
 #ifdef CONFIG_RPS
 	dev->num_rx_queues = rxqs;
 	dev->real_num_rx_queues = rxqs;
 	if (netif_alloc_rx_queues(dev))
-		goto free_pcpu;
+		goto free_all;
 #endif
 
-	dev->gso_max_size = GSO_MAX_SIZE;
-
-	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
-	dev->ethtool_ntuple_list.count = 0;
-	INIT_LIST_HEAD(&dev->napi_list);
-	INIT_LIST_HEAD(&dev->unreg_list);
-	INIT_LIST_HEAD(&dev->link_watch_list);
-	dev->priv_flags = IFF_XMIT_DST_RELEASE;
-	setup(dev);
 	strcpy(dev->name, name);
 	return dev;
 
+free_all:
+	free_netdev(dev);
+	return NULL;
+
 free_pcpu:
 	free_percpu(dev->pcpu_refcnt);
 	kfree(dev->_tx);
-- 
cgit v1.2.2


From c317428644c0af137d80069ab178cd797da3be45 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 9 Feb 2011 08:08:20 +0100
Subject: netfilter: nf_conntrack: set conntrack templates again if we return
 NF_REPEAT

The TCP tracking code has a special case that allows to return
NF_REPEAT if we receive a new SYN packet while in TIME_WAIT state.

In this situation, the TCP tracking code destroys the existing
conntrack to start a new clean session.

[DESTROY] tcp      6 src=192.168.0.2 dst=192.168.1.2 sport=38925 dport=8000 src=192.168.1.2 dst=192.168.1.100 sport=8000 dport=38925 [ASSURED]
    [NEW] tcp      6 120 SYN_SENT src=192.168.0.2 dst=192.168.1.2 sport=38925 dport=8000 [UNREPLIED] src=192.168.1.2 dst=192.168.1.100 sport=8000 dport=38925

However, this is a problem for the iptables' CT target event filtering
which will not work in this case since the conntrack template will not
be there for the new session. To fix this, we reassign the conntrack
template to the packet if we return NF_REPEAT.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e61511929c66..84f4fcc5884b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -942,8 +942,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
 		nf_conntrack_event_cache(IPCT_REPLY, ct);
 out:
-	if (tmpl)
-		nf_ct_put(tmpl);
+	if (tmpl) {
+		/* Special case: we have to repeat this hook, assign the
+		 * template again to this packet. We assume that this packet
+		 * has no conntrack assigned. This is used by nf_ct_tcp. */
+		if (ret == NF_REPEAT)
+			skb->nfct = (struct nf_conntrack *)tmpl;
+		else
+			nf_ct_put(tmpl);
+	}
 
 	return ret;
 }
-- 
cgit v1.2.2


From 96642d42f076101ba98866363d908cab706d156c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 9 Feb 2011 21:48:36 -0800
Subject: x25: Do not reference freed memory.

In x25_link_free(), we destroy 'nb' before dereferencing
'nb->dev'.  Don't do this, because 'nb' might be freed
by then.

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Tested-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_link.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 4cbc942f762a..21306928d47f 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -396,9 +396,12 @@ void __exit x25_link_free(void)
 	write_lock_bh(&x25_neigh_list_lock);
 
 	list_for_each_safe(entry, tmp, &x25_neigh_list) {
+		struct net_device *dev;
+
 		nb = list_entry(entry, struct x25_neigh, node);
+		dev = nb->dev;
 		__x25_remove_neigh(nb);
-		dev_put(nb->dev);
+		dev_put(dev);
 	}
 	write_unlock_bh(&x25_neigh_list_lock);
 }
-- 
cgit v1.2.2