From 62bac4af3d778f6d06d351c0442008967c512588 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 12:50:15 -0400 Subject: svcrpc: don't set then immediately clear XPT_DEFERRED There's no harm to doing this, since the only caller will immediately call svc_enqueue() afterwards, ensuring we don't miss the remaining deferred requests just because XPT_DEFERRED was briefly cleared. But why not just do this the simple way? Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c82fe739fbdc..a74cb67f15bf 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1059,14 +1059,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) return NULL; spin_lock(&xprt->xpt_lock); - clear_bit(XPT_DEFERRED, &xprt->xpt_flags); if (!list_empty(&xprt->xpt_deferred)) { dr = list_entry(xprt->xpt_deferred.next, struct svc_deferred_req, handle.recent); list_del_init(&dr->handle.recent); - set_bit(XPT_DEFERRED, &xprt->xpt_flags); - } + } else + clear_bit(XPT_DEFERRED, &xprt->xpt_flags); spin_unlock(&xprt->xpt_lock); return dr; } -- cgit v1.2.2 From ca7896cd83456082b1e78816cdf7e41658ef7bcd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 14:12:40 -0400 Subject: nfsd4: centralize more calls to svc_xprt_received Follow up on b48fa6b99100dc7772af3cd276035fcec9719ceb by moving all the svc_xprt_received() calls for the main xprt to one place. The clearing of XPT_BUSY here is critical to the correctness of the server, so I'd prefer it to be obvious where we do it. The only substantive result is moving svc_xprt_received() after svc_receive_deferred(). Other than a (likely insignificant) delay waking up the next thread, that should be harmless. Also reshuffle the exit code a little to skip a few other steps that we don't care about the in the svc_delete_xprt() case. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index a74cb67f15bf..dd61cd02461f 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -716,7 +716,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc_recv: found XPT_CLOSE\n"); svc_delete_xprt(xprt); - } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + /* Leave XPT_BUSY set on the dead xprt: */ + goto out; + } + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) { @@ -741,28 +744,23 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&serv->sv_lock); svc_xprt_received(newxpt); } - svc_xprt_received(xprt); } else { dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); rqstp->rq_deferred = svc_deferred_dequeue(xprt); - if (rqstp->rq_deferred) { - svc_xprt_received(xprt); + if (rqstp->rq_deferred) len = svc_deferred_recv(rqstp); - } else { + else len = xprt->xpt_ops->xpo_recvfrom(rqstp); - svc_xprt_received(xprt); - } dprintk("svc: got len=%d\n", len); } + svc_xprt_received(xprt); /* No data, incomplete (TCP) read, or accept() */ - if (len == 0 || len == -EAGAIN) { - rqstp->rq_res.len = 0; - svc_xprt_release(rqstp); - return -EAGAIN; - } + if (len == 0 || len == -EAGAIN) + goto out; + clear_bit(XPT_OLD, &xprt->xpt_flags); rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); @@ -771,6 +769,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (serv->sv_stats) serv->sv_stats->netcnt++; return len; +out: + rqstp->rq_res.len = 0; + svc_xprt_release(rqstp); + return -EAGAIN; } EXPORT_SYMBOL_GPL(svc_recv); -- cgit v1.2.2 From f8c0d226fef05226ff1a85055c8ed663022f40c1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 18:11:21 -0400 Subject: svcrpc: simplify svc_close_all There's no need to be fooling with XPT_BUSY now that all the threads are gone. The list_del_init() here could execute at the same time as the svc_xprt_enqueue()'s list_add_tail(), with undefined results. We don't really care at this point, but it might result in a spurious list-corruption warning or something. And svc_close() isn't adding any value; just call svc_delete_xprt() directly. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index dd61cd02461f..8c018df80692 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -941,16 +941,16 @@ void svc_close_all(struct list_head *xprt_list) struct svc_xprt *xprt; struct svc_xprt *tmp; + /* + * The server is shutting down, and no more threads are running. + * svc_xprt_enqueue() might still be running, but at worst it + * will re-add the xprt to sp_sockets, which will soon get + * freed. So we don't bother with any more locking, and don't + * leave the close to the (nonexistent) server threads: + */ list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { set_bit(XPT_CLOSE, &xprt->xpt_flags); - if (test_bit(XPT_BUSY, &xprt->xpt_flags)) { - /* Waiting to be processed, but no threads left, - * So just remove it from the waiting list - */ - list_del_init(&xprt->xpt_ready); - clear_bit(XPT_BUSY, &xprt->xpt_flags); - } - svc_close_xprt(xprt); + svc_delete_xprt(xprt); } } -- cgit v1.2.2 From b176331627fccc726d28f4fc4a357d1f3c19dbf0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 20:24:48 -0400 Subject: svcrpc: svc_close_xprt comment Neil Brown had to explain to me why we do this here; record the answer for posterity. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8c018df80692..bfbda676574a 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -931,7 +931,12 @@ void svc_close_xprt(struct svc_xprt *xprt) if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) /* someone else will have to effect the close */ return; - + /* + * We expect svc_close_xprt() to work even when no threads are + * running (e.g., while configuring the server before starting + * any threads), so if the transport isn't busy, we delete + * it ourself: + */ svc_delete_xprt(xprt); } EXPORT_SYMBOL_GPL(svc_close_xprt); -- cgit v1.2.2 From 9c335c0b8daf56b9f73479d00b1dd726e1fcca09 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 26 Oct 2010 11:32:03 -0400 Subject: svcrpc: fix wspace-checking race We call svc_xprt_enqueue() after something happens which we think may require handling from a server thread. To avoid such events being lost, svc_xprt_enqueue() must guarantee that there will be a svc_serv() call from a server thread following any such event. It does that by either waking up a server thread itself, or checking that XPT_BUSY is set (in which case somebody else is doing it). But the check of XPT_BUSY could occur just as someone finishes processing some other event, and just before they clear XPT_BUSY. Therefore it's important not to clear XPT_BUSY without subsequently doing another svc_export_enqueue() to check whether the xprt should be requeued. The xpo_wspace() check in svc_xprt_enqueue() breaks this rule, allowing an event to be missed in situations like: data arrives call svc_tcp_data_ready(): call svc_xprt_enqueue(): set BUSY find no write space svc_reserve(): free up write space call svc_enqueue(): test BUSY clear BUSY So, instead, check wspace in the same places that the state flags are checked: before taking BUSY, and in svc_receive(). Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bfbda676574a..b6f47da170b3 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -302,6 +302,15 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) list_del(&rqstp->rq_list); } +static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) +{ + if (xprt->xpt_flags & ((1<xpt_flags & ((1<xpt_ops->xpo_has_wspace(xprt); + return false; +} + /* * Queue up a transport with data pending. If there are idle nfsd * processes, wake 'em up. @@ -314,8 +323,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) struct svc_rqst *rqstp; int cpu; - if (!(xprt->xpt_flags & - ((1<xpt_pool != NULL); xprt->xpt_pool = pool; - /* Handle pending connection */ - if (test_bit(XPT_CONN, &xprt->xpt_flags)) - goto process; - - /* Handle close in-progress */ - if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) - goto process; - - /* Check if we have space to reply to a request */ - if (!xprt->xpt_ops->xpo_has_wspace(xprt)) { - /* Don't enqueue while not enough space for reply */ - dprintk("svc: no write space, transport %p not enqueued\n", - xprt); - xprt->xpt_pool = NULL; - clear_bit(XPT_BUSY, &xprt->xpt_flags); - goto out_unlock; - } - - process: if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, @@ -744,7 +733,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&serv->sv_lock); svc_xprt_received(newxpt); } - } else { + } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); -- cgit v1.2.2 From 7c96aef75949a56ec427fc6a2522dace2af33605 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 15 Nov 2010 11:27:01 +1100 Subject: sunrpc: remove xpt_pool The xpt_pool field is only used for reporting BUGs. And it isn't used correctly. In particular, when it is cleared in svc_xprt_received before XPT_BUSY is cleared, there is no guarantee that either the compiler or the CPU might not re-order to two assignments, just setting xpt_pool to NULL after XPT_BUSY is cleared. If a different cpu were running svc_xprt_enqueue at this moment, it might see XPT_BUSY clear and then xpt_pool non-NULL, and so BUG. This could be fixed by calling smp_mb__before_clear_bit() before the clear_bit. However as xpt_pool isn't really used, it seems safest to simply remove xpt_pool. Another alternate would be to change the clear_bit to clear_bit_unlock, and the test_and_set_bit to test_and_set_bit_lock. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5a75d23645c8..5eae53b1e306 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -351,8 +351,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) dprintk("svc: transport %p busy, not enqueued\n", xprt); goto out_unlock; } - BUG_ON(xprt->xpt_pool != NULL); - xprt->xpt_pool = pool; if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, @@ -370,13 +368,11 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); pool->sp_stats.threads_woken++; - BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); pool->sp_stats.sockets_queued++; - BUG_ON(xprt->xpt_pool != pool); } out_unlock: @@ -415,7 +411,6 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) void svc_xprt_received(struct svc_xprt *xprt) { BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); - xprt->xpt_pool = NULL; /* As soon as we clear busy, the xprt could be closed and * 'put', so we need a reference to call svc_xprt_enqueue with: */ -- cgit v1.2.2 From 3942302ea9e1dffa933021b20bf1642046e7641b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 15 Nov 2010 11:27:01 +1100 Subject: sunrpc: svc_sock_names should hold ref to socket being closed. Currently svc_sock_names calls svc_close_xprt on a svc_sock to which it does not own a reference. As soon as svc_close_xprt sets XPT_CLOSE, the socket could be freed by a separate thread (though this is a very unlikely race). It is safer to hold a reference while calling svc_close_xprt. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 07919e16be3e..52bd1130e83a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -324,19 +324,21 @@ int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, len = onelen; break; } - if (toclose && strcmp(toclose, buf + len) == 0) + if (toclose && strcmp(toclose, buf + len) == 0) { closesk = svsk; - else + svc_xprt_get(&closesk->sk_xprt); + } else len += onelen; } spin_unlock_bh(&serv->sv_lock); - if (closesk) + if (closesk) { /* Should unregister with portmap, but you cannot * unregister just one protocol... */ svc_close_xprt(&closesk->sk_xprt); - else if (toclose) + svc_xprt_put(&closesk->sk_xprt); + } else if (toclose) return -ENOENT; return len; } -- cgit v1.2.2 From 66c941f4aa8aef397a757001af61073db23b39e5 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 15 Dec 2010 14:47:20 +0800 Subject: net: sunrpc: kill unused macros These macros never be used for several years. Signed-off-by: Shan Wei Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 2 -- net/sunrpc/svcauth.c | 1 - net/sunrpc/svcauth_unix.c | 2 -- 3 files changed, 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index dec2a6fc7c12..bcdae78fdfc6 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -67,7 +67,6 @@ static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b) #define RSI_HASHBITS 6 #define RSI_HASHMAX (1< Date: Sun, 14 Nov 2010 18:08:11 -0800 Subject: net/sunrpc/auth_gss/gss_krb5_crypto.c: Use normal negative error value return And remove unnecessary double semicolon too. No effect to code, as test is != 0. Signed-off-by: Joe Perches Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 75ee993ea057..9576f35ab701 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -137,7 +137,7 @@ arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4]) ms_usage = 13; break; default: - return EINVAL;; + return -EINVAL; } salt[0] = (ms_usage >> 0) & 0xff; salt[1] = (ms_usage >> 8) & 0xff; -- cgit v1.2.2 From 31f7aa65f536995c6d933c57230919ae408952a5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 24 Dec 2010 14:03:38 -0500 Subject: svcrpc: modifying valid sunrpc cache entries is racy Once a sunrpc cache entry is VALID, we should be replacing it (and allowing any concurrent users to destroy it on last put) instead of trying to update it in place. Otherwise someone referencing the ip_map we're modifying here could try to use the m_client just as we're putting the last reference. The bug should only be seen by users of the legacy nfsd interfaces. (Thanks to Neil for suggestion to use sunrpc_invalidate.) Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index a04ac9193d59..59a7c524a8b1 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -401,8 +401,7 @@ struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) return NULL; if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { - if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0) - auth_domain_put(&ipm->m_client->h); + sunrpc_invalidate(&ipm->h, sn->ip_map_cache); rv = NULL; } else { rv = &ipm->m_client->h; -- cgit v1.2.2 From bdd5f05d91e8ae68075b812ce244c918d3d752cd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Jan 2011 13:31:45 -0500 Subject: SUNRPC: Remove more code when NFSD_DEPRECATED is not configured Signed-off-by: NeilBrown [bfields@redhat.com: moved svcauth_unix_purge outside ifdef's.] Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 59a7c524a8b1..30916b06c12b 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -30,7 +30,9 @@ struct unix_domain { struct auth_domain h; +#ifdef CONFIG_NFSD_DEPRECATED int addr_changes; +#endif /* CONFIG_NFSD_DEPRECATED */ /* other stuff later */ }; @@ -64,7 +66,9 @@ struct auth_domain *unix_domain_find(char *name) return NULL; } new->h.flavour = &svcauth_unix; +#ifdef CONFIG_NFSD_DEPRECATED new->addr_changes = 0; +#endif /* CONFIG_NFSD_DEPRECATED */ rv = auth_domain_lookup(name, &new->h); } } @@ -91,7 +95,9 @@ struct ip_map { char m_class[8]; /* e.g. "nfsd" */ struct in6_addr m_addr; struct unix_domain *m_client; +#ifdef CONFIG_NFSD_DEPRECATED int m_add_change; +#endif /* CONFIG_NFSD_DEPRECATED */ }; static void ip_map_put(struct kref *kref) @@ -145,7 +151,9 @@ static void update(struct cache_head *cnew, struct cache_head *citem) kref_get(&item->m_client->h.ref); new->m_client = item->m_client; +#ifdef CONFIG_NFSD_DEPRECATED new->m_add_change = item->m_add_change; +#endif /* CONFIG_NFSD_DEPRECATED */ } static struct cache_head *ip_map_alloc(void) { @@ -330,6 +338,7 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, ip.h.flags = 0; if (!udom) set_bit(CACHE_NEGATIVE, &ip.h.flags); +#ifdef CONFIG_NFSD_DEPRECATED else { ip.m_add_change = udom->addr_changes; /* if this is from the legacy set_client system call, @@ -338,6 +347,7 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, if (expiry == NEVER) ip.m_add_change++; } +#endif /* CONFIG_NFSD_DEPRECATED */ ip.h.expiry_time = expiry; ch = sunrpc_cache_update(cd, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ @@ -357,6 +367,7 @@ static inline int ip_map_update(struct net *net, struct ip_map *ipm, return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry); } +#ifdef CONFIG_NFSD_DEPRECATED int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom) { struct unix_domain *udom; @@ -411,6 +422,7 @@ struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) return rv; } EXPORT_SYMBOL_GPL(auth_unix_lookup); +#endif /* CONFIG_NFSD_DEPRECATED */ void svcauth_unix_purge(void) { -- cgit v1.2.2 From d76d1815f3e72fb627ad7f95ef63120b0a557c9c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 2 Jan 2011 21:28:34 -0500 Subject: svcrpc: avoid double reply caused by deferral race Commit d29068c431599fa "sunrpc: Simplify cache_defer_req and related functions." asserted that cache_check() could determine success or failure of cache_defer_req() by checking the CACHE_PENDING bit. This isn't quite right. We need to know whether cache_defer_req() created a deferred request, in which case sending an rpc reply has become the responsibility of the deferred request, and it is important that we not send our own reply, resulting in two different replies to the same request. And the CACHE_PENDING bit doesn't tell us that; we could have succesfully created a deferred request at the same time as another thread cleared the CACHE_PENDING bit. So, partially revert that commit, to ensure that cache_check() returns -EAGAIN if and only if a deferred request has been created. Signed-off-by: J. Bruce Fields Acked-by: NeilBrown --- net/sunrpc/cache.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index e433e7580e27..0d6002f26d82 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -37,7 +37,7 @@ #define RPCDBG_FACILITY RPCDBG_CACHE -static void cache_defer_req(struct cache_req *req, struct cache_head *item); +static bool cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) @@ -268,9 +268,11 @@ int cache_check(struct cache_detail *detail, } if (rv == -EAGAIN) { - cache_defer_req(rqstp, h); - if (!test_bit(CACHE_PENDING, &h->flags)) { - /* Request is not deferred */ + if (!cache_defer_req(rqstp, h)) { + /* + * Request was not deferred; handle it as best + * we can ourselves: + */ rv = cache_is_valid(detail, h); if (rv == -EAGAIN) rv = -ETIMEDOUT; @@ -618,18 +620,19 @@ static void cache_limit_defers(void) discard->revisit(discard, 1); } -static void cache_defer_req(struct cache_req *req, struct cache_head *item) +/* Return true if and only if a deferred request is queued. */ +static bool cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; if (req->thread_wait) { cache_wait_req(req, item); if (!test_bit(CACHE_PENDING, &item->flags)) - return; + return false; } dreq = req->defer(req); if (dreq == NULL) - return; + return false; setup_deferral(dreq, item, 1); if (!test_bit(CACHE_PENDING, &item->flags)) /* Bit could have been cleared before we managed to @@ -638,6 +641,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) cache_revisit_request(item); cache_limit_defers(); + return true; } static void cache_revisit_request(struct cache_head *item) -- cgit v1.2.2 From 9e701c610923aaeac8b38b9202a686d1cc9ee35d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 2 Jan 2011 21:56:36 -0500 Subject: svcrpc: simpler request dropping Currently we use -EAGAIN returns to determine when to drop a deferred request. On its own, that is error-prone, as it makes us treat -EAGAIN returns from other functions specially to prevent inadvertent dropping. So, use a flag on the request instead. Returning an error on request deferral is still required, to prevent further processing, but we no longer need worry that an error return on its own could result in a drop. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc.c | 3 ++- net/sunrpc/svc_xprt.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 6359c42c4941..df1931f8ae98 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1005,6 +1005,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) rqstp->rq_splice_ok = 1; /* Will be turned off only when NFSv4 Sessions are used */ rqstp->rq_usedeferral = 1; + rqstp->rq_dropme = false; /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); @@ -1106,7 +1107,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); /* Encode reply */ - if (*statp == rpc_drop_reply) { + if (rqstp->rq_dropme) { if (procp->pc_release) procp->pc_release(rqstp, NULL, rqstp->rq_resp); goto dropit; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5eae53b1e306..173f3b975d49 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1019,6 +1019,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) } svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; + rqstp->rq_dropme = true; dr->handle.revisit = svc_revisit; return &dr->handle; -- cgit v1.2.2 From 6bab93f87ec703bf6650875881b11f9f27d7da56 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 3 Jan 2011 15:10:27 -0500 Subject: svcrpc: take lock on turning entry NEGATIVE in cache_check We attempt to turn a cache entry negative in place. But that entry may already have been filled in by some other task since we last checked whether it was valid, so we could be modifying an already-valid entry. If nothing else there's a likely leak in such a case when the entry is eventually put() and contents are not freed because it has CACHE_NEGATIVE set. So, take the cache_lock just as sunrpc_cache_update() does. Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 0d6002f26d82..a6c57334fa13 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -213,6 +213,23 @@ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head } } +static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h) +{ + int rv; + + write_lock(&detail->hash_lock); + rv = cache_is_valid(detail, h); + if (rv != -EAGAIN) { + write_unlock(&detail->hash_lock); + return rv; + } + set_bit(CACHE_NEGATIVE, &h->flags); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); + write_unlock(&detail->hash_lock); + cache_fresh_unlocked(h, detail); + return -ENOENT; +} + /* * This is the generic cache management routine for all * the authentication caches. @@ -251,14 +268,8 @@ int cache_check(struct cache_detail *detail, case -EINVAL: clear_bit(CACHE_PENDING, &h->flags); cache_revisit_request(h); - if (rv == -EAGAIN) { - set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); - cache_fresh_unlocked(h, detail); - rv = -ENOENT; - } + rv = try_to_negate_entry(detail, h); break; - case -EAGAIN: clear_bit(CACHE_PENDING, &h->flags); cache_revisit_request(h); -- cgit v1.2.2 From fdef7aa5d4020fd94ffcbf0078d6bd9e5a111e19 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Jan 2011 14:12:47 -0500 Subject: svcrpc: ensure cache_check caller sees updated entry Supposes cache_check runs simultaneously with an update on a different CPU: cache_check task doing update ^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^ 1. test for CACHE_VALID 1'. set entry->data & !CACHE_NEGATIVE 2. use entry->data 2'. set CACHE_VALID If the two memory writes performed in step 1' and 2' appear misordered with respect to the reads in step 1 and 2, then the caller could get stale data at step 2 even though it saw CACHE_VALID set on the cache entry. Add memory barriers to prevent this. Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index a6c57334fa13..72ad836e4fe0 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -128,6 +128,7 @@ static void cache_fresh_locked(struct cache_head *head, time_t expiry) { head->expiry_time = expiry; head->last_refresh = seconds_since_boot(); + smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */ set_bit(CACHE_VALID, &head->flags); } @@ -208,8 +209,16 @@ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head /* entry is valid */ if (test_bit(CACHE_NEGATIVE, &h->flags)) return -ENOENT; - else + else { + /* + * In combination with write barrier in + * sunrpc_cache_update, ensures that anyone + * using the cache entry after this sees the + * updated contents: + */ + smp_rmb(); return 0; + } } } -- cgit v1.2.2 From d75faea330dbd1873c9094e9926ae306590c0998 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 30 Nov 2010 19:15:01 -0500 Subject: rpc: move sk_bc_xprt to svc_xprt This seems obviously transport-level information even if it's currently used only by the server socket code. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 10 ++++++---- net/sunrpc/xprtsock.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 52bd1130e83a..3bb400f86eae 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -987,15 +987,17 @@ static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, vec[0] = rqstp->rq_arg.head[0]; } else { /* REPLY */ - if (svsk->sk_bc_xprt) - req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid); + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; + + if (bc_xprt) + req = xprt_lookup_rqst(bc_xprt, xid); if (!req) { printk(KERN_NOTICE "%s: Got unrecognized reply: " - "calldir 0x%x sk_bc_xprt %p xid %08x\n", + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", __func__, ntohl(calldir), - svsk->sk_bc_xprt, xid); + bc_xprt, xid); vec[0] = rqstp->rq_arg.head[0]; goto out; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index dfcab5ac65af..18dc42eb5597 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2379,9 +2379,9 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) * The backchannel uses the same socket connection as the * forechannel */ + args->bc_xprt->xpt_bc_xprt = xprt; xprt->bc_xprt = args->bc_xprt; bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); - bc_sock->sk_bc_xprt = xprt; transport->sock = bc_sock->sk_sock; transport->inet = bc_sock->sk_sk; -- cgit v1.2.2 From 99de8ea962bbc11a51ad4c52e3dc93bee5f6ba70 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 8 Dec 2010 12:45:44 -0500 Subject: rpc: keep backchannel xprt as long as server connection Multiple backchannels can share the same tcp connection; from rfc 5661 section 2.10.3.1: A connection's association with a session is not exclusive. A connection associated with the channel(s) of one session may be simultaneously associated with the channel(s) of other sessions including sessions associated with other client IDs. However, multiple backchannels share a connection, they must all share the same xid stream (hence the same rpc_xprt); the only way we have to match replies with calls at the rpc layer is using the xid. So, keep the rpc_xprt around as long as the connection lasts, in case we're asked to use the connection as a backchannel again. Requests to create new backchannel clients over a given server connection should results in creating new clients that reuse the existing rpc_xprt. But to start, just reject attempts to associate multiple rpc_xprt's with the same underlying bc_xprt. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 4 ++++ net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtsock.c | 34 ++++++++++++++++++++++++---------- 3 files changed, 29 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 173f3b975d49..ab86b7927f84 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -13,6 +13,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -128,6 +129,9 @@ static void svc_xprt_free(struct kref *kref) if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) svcauth_unix_info_release(xprt); put_net(xprt->xpt_net); + /* See comment on corresponding get in xs_setup_bc_tcp(): */ + if (xprt->xpt_bc_xprt) + xprt_put(xprt->xpt_bc_xprt); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4c8f18aff7c3..749ad15ae305 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -965,6 +965,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) xprt = kzalloc(size, GFP_KERNEL); if (xprt == NULL) goto out; + kref_init(&xprt->kref); xprt->max_reqs = max_req; xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); @@ -1102,7 +1103,6 @@ found: return xprt; } - kref_init(&xprt->kref); spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 18dc42eb5597..0ef4dd4414ec 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2375,16 +2375,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) xprt->reestablish_timeout = 0; xprt->idle_timeout = 0; - /* - * The backchannel uses the same socket connection as the - * forechannel - */ - args->bc_xprt->xpt_bc_xprt = xprt; - xprt->bc_xprt = args->bc_xprt; - bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); - transport->sock = bc_sock->sk_sock; - transport->inet = bc_sock->sk_sk; - xprt->ops = &bc_tcp_ops; switch (addr->sa_family) { @@ -2406,6 +2396,29 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) xprt->address_strings[RPC_DISPLAY_PORT], xprt->address_strings[RPC_DISPLAY_PROTO]); + /* + * The backchannel uses the same socket connection as the + * forechannel + */ + if (args->bc_xprt->xpt_bc_xprt) { + /* XXX: actually, want to catch this case... */ + ret = ERR_PTR(-EINVAL); + goto out_err; + } + /* + * Once we've associated a backchannel xprt with a connection, + * we want to keep it around as long as long as the connection + * lasts, in case we need to start using it for a backchannel + * again; this reference won't be dropped until bc_xprt is + * destroyed. + */ + xprt_get(xprt); + args->bc_xprt->xpt_bc_xprt = xprt; + xprt->bc_xprt = args->bc_xprt; + bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); + transport->sock = bc_sock->sk_sock; + transport->inet = bc_sock->sk_sk; + /* * Since we don't want connections for the backchannel, we set * the xprt status to connected @@ -2415,6 +2428,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) if (try_module_get(THIS_MODULE)) return xprt; + xprt_put(xprt); ret = ERR_PTR(-EINVAL); out_err: xprt_free(xprt); -- cgit v1.2.2 From f0418aa4b1103f959d64dc18273efa04ee0140e9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 8 Dec 2010 13:48:19 -0500 Subject: rpc: allow xprt_class->setup to return a preexisting xprt This allows us to reuse the xprt associated with a server connection if one has already been set up. Signed-off-by: J. Bruce Fields --- net/sunrpc/xprt.c | 3 +++ net/sunrpc/xprtsock.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 749ad15ae305..856274d7e85c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1102,6 +1102,9 @@ found: -PTR_ERR(xprt)); return xprt; } + if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state)) + /* ->setup returned a pre-initialized xprt: */ + return xprt; spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0ef4dd4414ec..ee091c869fcd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2359,6 +2359,15 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) struct svc_sock *bc_sock; struct rpc_xprt *ret; + if (args->bc_xprt->xpt_bc_xprt) { + /* + * This server connection already has a backchannel + * export; we can't create a new one, as we wouldn't be + * able to match replies based on xid any more. So, + * reuse the already-existing one: + */ + return args->bc_xprt->xpt_bc_xprt; + } xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; @@ -2396,15 +2405,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) xprt->address_strings[RPC_DISPLAY_PORT], xprt->address_strings[RPC_DISPLAY_PROTO]); - /* - * The backchannel uses the same socket connection as the - * forechannel - */ - if (args->bc_xprt->xpt_bc_xprt) { - /* XXX: actually, want to catch this case... */ - ret = ERR_PTR(-EINVAL); - goto out_err; - } /* * Once we've associated a backchannel xprt with a connection, * we want to keep it around as long as long as the connection -- cgit v1.2.2 From 5b919f833d9d60588d026ad82d17f17e8872c7a9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 Jan 2011 00:34:49 -0800 Subject: net: ax25: fix information leak to userland harder Commit fe10ae53384e48c51996941b7720ee16995cbcb7 adds a memset() to clear the structure being sent back to userspace, but accidentally used the wrong size. Reported-by: Brad Spengler Signed-off-by: Kees Cook Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index bb86d2932394..6da5daeebab7 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1392,7 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, ax25_cb *ax25; int err = 0; - memset(fsa, 0, sizeof(fsa)); + memset(fsa, 0, sizeof(*fsa)); lock_sock(sk); ax25 = ax25_sk(sk); -- cgit v1.2.2 From 2fc72c7b84002ffb3c66918e2a7b0ee607d8b5aa Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 12 Jan 2011 20:25:08 +0100 Subject: netfilter: fix compilation when conntrack is disabled but tproxy is enabled The IPv6 tproxy patches split IPv6 defragmentation off of conntrack, but failed to update the #ifdef stanzas guarding the defragmentation related fields and code in skbuff and conntrack related code in nf_defrag_ipv6.c. This patch adds the required #ifdefs so that IPv6 tproxy can truly be used without connection tracking. Original report: http://marc.info/?l=linux-netdev&m=129010118516341&w=2 Reported-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: KOVACS Krisztian Signed-off-by: Pablo Neira Ayuso --- net/core/skbuff.c | 2 ++ net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 19d6c21220fd..d31bb36ae0dc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -380,6 +380,8 @@ static void skb_release_head_state(struct sk_buff *skb) } #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb->nfct); +#endif +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED nf_conntrack_put_reasm(skb->nfct_reasm); #endif #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 99abfb53bab9..97c5b21b9674 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -19,13 +19,15 @@ #include #include +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #include #include #include #include -#include #include +#endif +#include #include static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, @@ -33,8 +35,10 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, { u16 zone = NF_CT_DEFAULT_ZONE; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); +#endif #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && @@ -56,9 +60,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum, { struct sk_buff *reasm; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) /* Previously seen (loopback)? */ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; +#endif reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); /* queued */ -- cgit v1.2.2 From 72b43d0898e97f588293b4a24b33c58c46633d81 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Wed, 12 Jan 2011 08:34:08 +0000 Subject: inet6: prevent network storms caused by linux IPv6 routers Linux IPv6 forwards unicast packets, which are link layer multicasts... The hole was present since day one. I was 100% this check is there, but it is not. The problem shows itself, f.e. when Microsoft Network Load Balancer runs on a network. This software resolves IPv6 unicast addresses to multicast MAC addresses. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 94b5bf132b2e..5f8d242be3f3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -401,6 +401,9 @@ int ip6_forward(struct sk_buff *skb) goto drop; } + if (skb->pkt_type != PACKET_HOST) + goto drop; + skb_forward_csum(skb); /* -- cgit v1.2.2 From 3806b4f3b6115ce324b7125844f9e6acc80d34ec Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 12 Jan 2011 14:50:51 +0000 Subject: eth: fix new kernel-doc warning Fix new kernel-doc warning (copy-paste typo): Warning(net/ethernet/eth.c:366): No description found for parameter 'rxqs' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/ethernet/eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f9d7ac924f15..44d2b42fda56 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -351,7 +351,7 @@ EXPORT_SYMBOL(ether_setup); * @sizeof_priv: Size of additional driver-private structure to be allocated * for this Ethernet device * @txqs: The number of TX queues this device has. - * @txqs: The number of RX queues this device has. + * @rxqs: The number of RX queues this device has. * * Fill in the fields of the device structure with Ethernet-generic * values. Basically does everything except registering the device. -- cgit v1.2.2 From f31e8d4982653b39fe312f9938be0f49dd9ab5fa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Jan 2011 14:19:55 +0100 Subject: netfilter: ctnetlink: fix loop in ctnetlink_get_conntrack() This patch fixes a loop in ctnetlink_get_conntrack() that can be triggered if you use the same socket to receive events and to perform a GET operation. Under heavy load, netlink_unicast() may return -EAGAIN, this error code is reserved in nfnetlink for the module load-on-demand. Instead, we return -ENOBUFS which is the appropriate error code that has to be propagated to user-space. Reported-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5cb8d3027b18..2b7eef37875c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -972,7 +972,8 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, free: kfree_skb(skb2); out: - return err; + /* this avoids a loop in nfnetlink. */ + return err == -EAGAIN ? -ENOBUFS : err; } #ifdef CONFIG_NF_NAT_NEEDED -- cgit v1.2.2 From 681c4d07dd5b2ce2ad9f6dbbf7841e479fbc7754 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jan 2011 13:40:33 +0100 Subject: mac80211: fix lockdep warning Since the introduction of the fixes for the reorder timer, mac80211 will cause lockdep warnings because lockdep confuses local->skb_queue and local->rx_skb_queue and treats their lock as the same. However, their locks are different, and are valid in different contexts (the former is used in IRQ context, the latter in BH only) and the only thing to be done is mark the former as a different lock class so that lockdep can tell the difference. Reported-by: Larry Finger Reported-by: Sujith Reported-by: Miles Lane Tested-by: Sujith Tested-by: Johannes Berg Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/main.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 485d36bc9a46..a46ff06d7cb8 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -39,6 +39,8 @@ module_param(ieee80211_disable_40mhz_24ghz, bool, 0644); MODULE_PARM_DESC(ieee80211_disable_40mhz_24ghz, "Disable 40MHz support in the 2.4GHz band"); +static struct lock_class_key ieee80211_rx_skb_queue_class; + void ieee80211_configure_filter(struct ieee80211_local *local) { u64 mc; @@ -569,7 +571,15 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, spin_lock_init(&local->filter_lock); spin_lock_init(&local->queue_stop_reason_lock); - skb_queue_head_init(&local->rx_skb_queue); + /* + * The rx_skb_queue is only accessed from tasklets, + * but other SKB queues are used from within IRQ + * context. Therefore, this one needs a different + * locking class so our direct, non-irq-safe use of + * the queue's lock doesn't throw lockdep warnings. + */ + skb_queue_head_init_class(&local->rx_skb_queue, + &ieee80211_rx_skb_queue_class); INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); -- cgit v1.2.2 From 82694f764dad783a123394e2220b92b9be721b43 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 12 Jan 2011 15:18:11 +0200 Subject: mac80211: use maximum number of AMPDU frames as default in BA RX When the buffer size is set to zero in the block ack parameter set field, we should use the maximum supported number of subframes. The existing code was bogus and was doing some unnecessary calculations that lead to wrong values. Thanks Johannes for helping me figure this one out. Cc: stable@kernel.org Cc: Johannes Berg Signed-off-by: Luciano Coelho Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-rx.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index f138b195d657..227ca82eef72 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -185,8 +185,6 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee80211_hw *hw = &local->hw; - struct ieee80211_conf *conf = &hw->conf; struct tid_ampdu_rx *tid_agg_rx; u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status; u8 dialog_token; @@ -231,13 +229,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, goto end_no_lock; } /* determine default buffer size */ - if (buf_size == 0) { - struct ieee80211_supported_band *sband; - - sband = local->hw.wiphy->bands[conf->channel->band]; - buf_size = IEEE80211_MIN_AMPDU_BUF; - buf_size = buf_size << sband->ht_cap.ampdu_factor; - } + if (buf_size == 0) + buf_size = IEEE80211_MAX_AMPDU_BUF; /* examine state machine */ -- cgit v1.2.2 From ed7809d9c41b514115ddffaa860694393c2016b3 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 13 Jan 2011 21:53:38 +0100 Subject: batman-adv: Even Batman should not dereference NULL pointers There's a problem in net/batman-adv/unicast.c::frag_send_skb(). dev_alloc_skb() allocates memory and may fail, thus returning NULL. If this happens we'll pass a NULL pointer on to skb_split() which in turn hands it to skb_split_inside_header() from where it gets passed to skb_put() that lets skb_tail_pointer() play with it and that function dereferences it. And thus the bat dies. While I was at it I also moved the call to dev_alloc_skb() above the assignment to 'unicast_packet' since there's no reason to do that assignment if the memory allocation fails. Signed-off-by: Jesper Juhl Signed-off-by: Sven Eckelmann --- net/batman-adv/unicast.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index dc2e28bed844..ee41fef04b21 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -229,10 +229,12 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, if (!bat_priv->primary_if) goto dropped; - unicast_packet = (struct unicast_packet *) skb->data; + frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); + if (!frag_skb) + goto dropped; + unicast_packet = (struct unicast_packet *) skb->data; memcpy(&tmp_uc, unicast_packet, uc_hdr_len); - frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); skb_split(skb, frag_skb, data_len / 2); if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 || -- cgit v1.2.2 From 1ac9ad1394fa542ac7ae0dc943ee3cda678799fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jan 2011 12:13:14 +0000 Subject: net: remove dev_txq_stats_fold() After recent changes, (percpu stats on vlan/tunnels...), we dont need anymore per struct netdev_queue tx_bytes/tx_packets/tx_dropped counters. Only remaining users are ixgbe, sch_teql, gianfar & macvlan : 1) ixgbe can be converted to use existing tx_ring counters. 2) macvlan incremented txq->tx_dropped, it can use the dev->stats.tx_dropped counter. 3) sch_teql : almost revert ab35cd4b8f42 (Use net_device internal stats) Now we have ndo_get_stats64(), use it, even for "unsigned long" fields (No need to bring back a struct net_device_stats) 4) gianfar adds a stats structure per tx queue to hold tx_bytes/tx_packets This removes a lockdep warning (and possible lockup) in rndis gadget, calling dev_get_stats() from hard IRQ context. Ref: http://www.spinics.net/lists/netdev/msg149202.html Reported-by: Neil Jones Signed-off-by: Eric Dumazet CC: Jarek Poplawski CC: Alexander Duyck CC: Jeff Kirsher CC: Sandeep Gopalpet CC: Michal Nazarewicz Signed-off-by: David S. Miller --- net/core/dev.c | 29 ----------------------------- net/sched/sch_teql.c | 26 +++++++++++++++++++++----- 2 files changed, 21 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a3ef808b5e36..83507c265e48 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5523,34 +5523,6 @@ void netdev_run_todo(void) } } -/** - * dev_txq_stats_fold - fold tx_queues stats - * @dev: device to get statistics from - * @stats: struct rtnl_link_stats64 to hold results - */ -void dev_txq_stats_fold(const struct net_device *dev, - struct rtnl_link_stats64 *stats) -{ - u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0; - unsigned int i; - struct netdev_queue *txq; - - for (i = 0; i < dev->num_tx_queues; i++) { - txq = netdev_get_tx_queue(dev, i); - spin_lock_bh(&txq->_xmit_lock); - tx_bytes += txq->tx_bytes; - tx_packets += txq->tx_packets; - tx_dropped += txq->tx_dropped; - spin_unlock_bh(&txq->_xmit_lock); - } - if (tx_bytes || tx_packets || tx_dropped) { - stats->tx_bytes = tx_bytes; - stats->tx_packets = tx_packets; - stats->tx_dropped = tx_dropped; - } -} -EXPORT_SYMBOL(dev_txq_stats_fold); - /* Convert net_device_stats to rtnl_link_stats64. They have the same * fields in the same order, with only the type differing. */ @@ -5594,7 +5566,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); } else { netdev_stats_to_stats64(storage, &dev->stats); - dev_txq_stats_fold(dev, storage); } storage->rx_dropped += atomic_long_read(&dev->rx_dropped); return storage; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index af9360d1f6eb..84ce48eadff4 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -59,6 +59,10 @@ struct teql_master struct net_device *dev; struct Qdisc *slaves; struct list_head master_list; + unsigned long tx_bytes; + unsigned long tx_packets; + unsigned long tx_errors; + unsigned long tx_dropped; }; struct teql_sched_data @@ -274,7 +278,6 @@ static inline int teql_resolve(struct sk_buff *skb, static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) { struct teql_master *master = netdev_priv(dev); - struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct Qdisc *start, *q; int busy; int nores; @@ -314,8 +317,8 @@ restart: __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); - txq->tx_packets++; - txq->tx_bytes += length; + master->tx_packets++; + master->tx_bytes += length; return NETDEV_TX_OK; } __netif_tx_unlock(slave_txq); @@ -342,10 +345,10 @@ restart: netif_stop_queue(dev); return NETDEV_TX_BUSY; } - dev->stats.tx_errors++; + master->tx_errors++; drop: - txq->tx_dropped++; + master->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -398,6 +401,18 @@ static int teql_master_close(struct net_device *dev) return 0; } +static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct teql_master *m = netdev_priv(dev); + + stats->tx_packets = m->tx_packets; + stats->tx_bytes = m->tx_bytes; + stats->tx_errors = m->tx_errors; + stats->tx_dropped = m->tx_dropped; + return stats; +} + static int teql_master_mtu(struct net_device *dev, int new_mtu) { struct teql_master *m = netdev_priv(dev); @@ -422,6 +437,7 @@ static const struct net_device_ops teql_netdev_ops = { .ndo_open = teql_master_open, .ndo_stop = teql_master_close, .ndo_start_xmit = teql_master_xmit, + .ndo_get_stats64 = teql_master_stats64, .ndo_change_mtu = teql_master_mtu, }; -- cgit v1.2.2 From e1fcc7e2a719d139322fab3f47cfbd4340cf3d82 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Jan 2011 15:56:31 +0000 Subject: rxrpc: rxrpc_workqueue isn't used during memory reclaim rxrpc_workqueue isn't depended upon while reclaiming memory. Convert to alloc_workqueue() without WQ_MEM_RECLAIM. Signed-off-by: Tejun Heo Signed-off-by: David Howells Cc: linux-afs@lists.infradead.org Signed-off-by: Linus Torvalds --- net/rxrpc/af_rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0b9bb2085ce4..74c064c0dfdd 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -808,7 +808,7 @@ static int __init af_rxrpc_init(void) goto error_call_jar; } - rxrpc_workqueue = create_workqueue("krxrpcd"); + rxrpc_workqueue = alloc_workqueue("krxrpcd", 0, 1); if (!rxrpc_workqueue) { printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n"); goto error_work_queue; -- cgit v1.2.2 From aa0adb1a85e159cf57f0e11282bc6c9e3606a5f3 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 15 Jan 2011 14:39:43 +0000 Subject: batman-adv: Use "__attribute__" shortcut macros Linux 2.6.21 defines different macros for __attribute__ which are also used inside batman-adv. The next version of checkpatch.pl warns about the usage of __attribute__((packed))). Linux 2.6.33 defines an extra macro __always_unused which is used to assist source code analyzers and can be used to removed the last existing __attribute__ inside the source code. Signed-off-by: Sven Eckelmann --- net/batman-adv/main.h | 6 +++--- net/batman-adv/packet.h | 14 +++++++------- net/batman-adv/types.h | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index d4d9926c2201..65106fb61b8f 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -151,9 +151,9 @@ int debug_log(struct bat_priv *bat_priv, char *fmt, ...); } \ while (0) #else /* !CONFIG_BATMAN_ADV_DEBUG */ -static inline void bat_dbg(char type __attribute__((unused)), - struct bat_priv *bat_priv __attribute__((unused)), - char *fmt __attribute__((unused)), ...) +static inline void bat_dbg(char type __always_unused, + struct bat_priv *bat_priv __always_unused, + char *fmt __always_unused, ...) { } #endif diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index b49fdf70a6d5..2284e8129cb2 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -63,7 +63,7 @@ struct batman_packet { uint8_t num_hna; uint8_t gw_flags; /* flags related to gateway class */ uint8_t align; -} __attribute__((packed)); +} __packed; #define BAT_PACKET_LEN sizeof(struct batman_packet) @@ -76,7 +76,7 @@ struct icmp_packet { uint8_t orig[6]; uint16_t seqno; uint8_t uid; -} __attribute__((packed)); +} __packed; #define BAT_RR_LEN 16 @@ -93,14 +93,14 @@ struct icmp_packet_rr { uint8_t uid; uint8_t rr_cur; uint8_t rr[BAT_RR_LEN][ETH_ALEN]; -} __attribute__((packed)); +} __packed; struct unicast_packet { uint8_t packet_type; uint8_t version; /* batman version field */ uint8_t dest[6]; uint8_t ttl; -} __attribute__((packed)); +} __packed; struct unicast_frag_packet { uint8_t packet_type; @@ -110,7 +110,7 @@ struct unicast_frag_packet { uint8_t flags; uint8_t orig[6]; uint16_t seqno; -} __attribute__((packed)); +} __packed; struct bcast_packet { uint8_t packet_type; @@ -118,7 +118,7 @@ struct bcast_packet { uint8_t orig[6]; uint8_t ttl; uint32_t seqno; -} __attribute__((packed)); +} __packed; struct vis_packet { uint8_t packet_type; @@ -131,6 +131,6 @@ struct vis_packet { * neighbors */ uint8_t target_orig[6]; /* who should receive this packet */ uint8_t sender_orig[6]; /* who sent or rebroadcasted this packet */ -} __attribute__((packed)); +} __packed; #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 97cb23dd3e69..bf3f6f5a12c4 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -246,13 +246,13 @@ struct vis_info { /* this packet might be part of the vis send queue. */ struct sk_buff *skb_packet; /* vis_info may follow here*/ -} __attribute__((packed)); +} __packed; struct vis_info_entry { uint8_t src[ETH_ALEN]; uint8_t dest[ETH_ALEN]; uint8_t quality; /* quality = 0 means HNA */ -} __attribute__((packed)); +} __packed; struct recvlist_node { struct list_head list; -- cgit v1.2.2 From 5e5073280379d38e86ade471daa7443b553fc839 Mon Sep 17 00:00:00 2001 From: Kurt Van Dijck Date: Sat, 15 Jan 2011 20:56:42 -0800 Subject: can: test size of struct sockaddr in sendmsg This patch makes the CAN socket code conform to the manpage of sendmsg. Signed-off-by: Kurt Van Dijck Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 3 +++ net/can/raw.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index 9d5e8accfab1..092dc88a7c64 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1256,6 +1256,9 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, struct sockaddr_can *addr = (struct sockaddr_can *)msg->msg_name; + if (msg->msg_namelen < sizeof(*addr)) + return -EINVAL; + if (addr->can_family != AF_CAN) return -EINVAL; diff --git a/net/can/raw.c b/net/can/raw.c index e88f610fdb7b..883e9d74fddf 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -649,6 +649,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, struct sockaddr_can *addr = (struct sockaddr_can *)msg->msg_name; + if (msg->msg_namelen < sizeof(*addr)) + return -EINVAL; + if (addr->can_family != AF_CAN) return -EINVAL; -- cgit v1.2.2 From 01a859014b35deb6cc63b1dc2808ca7a0e10a4de Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 15 Jan 2011 03:06:39 +0000 Subject: caif: checking the wrong variable In the original code we check if (servl == NULL) twice. The first time should print the message that cfmuxl_remove_uplayer() failed and set "ret" correctly, but instead it just returns success. The second check should be checking the value of "ret" instead of "servl". Signed-off-by: Dan Carpenter Acked-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/cfcnfg.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 21ede141018a..c665de778b60 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -191,6 +191,7 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer) struct cflayer *servl = NULL; struct cfcnfg_phyinfo *phyinfo = NULL; u8 phyid = 0; + caif_assert(adap_layer != NULL); channel_id = adap_layer->id; if (adap_layer->dn == NULL || channel_id == 0) { @@ -199,16 +200,16 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer) goto end; } servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id); - if (servl == NULL) - goto end; - layer_set_up(servl, NULL); - ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); if (servl == NULL) { pr_err("PROTOCOL ERROR - Error removing service_layer Channel_Id(%d)", channel_id); ret = -EINVAL; goto end; } + layer_set_up(servl, NULL); + ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); + if (ret) + goto end; caif_assert(channel_id == servl->id); if (adap_layer->dn != NULL) { phyid = cfsrvl_getphyid(adap_layer->dn); -- cgit v1.2.2 From 2fdc1c8093255f9da877d7b9ce3f46c2098377dc Mon Sep 17 00:00:00 2001 From: Romain Francoise Date: Mon, 17 Jan 2011 07:59:18 +0000 Subject: ipv6: Silence privacy extensions initialization When a network namespace is created (via CLONE_NEWNET), the loopback interface is automatically added to the new namespace, triggering a printk in ipv6_add_dev() if CONFIG_IPV6_PRIVACY is set. This is problematic for applications which use CLONE_NEWNET as part of a sandbox, like Chromium's suid sandbox or recent versions of vsftpd. On a busy machine, it can lead to thousands of useless "lo: Disabled Privacy Extensions" messages appearing in dmesg. It's easy enough to check the status of privacy extensions via the use_tempaddr sysctl, so just removing the printk seems like the most sensible solution. Signed-off-by: Romain Francoise Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5b189c97c2fc..24a1cf110d80 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -420,9 +420,6 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) dev->type == ARPHRD_TUNNEL6 || dev->type == ARPHRD_SIT || dev->type == ARPHRD_NONE) { - printk(KERN_INFO - "%s: Disabled Privacy Extensions\n", - dev->name); ndev->cnf.use_tempaddr = -1; } else { in6_dev_hold(ndev); -- cgit v1.2.2 From 6ee400aafb60289b78fcde5ebccd8c4973fc53f4 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 17 Jan 2011 20:46:00 +0000 Subject: net offloading: Do not mask out NETIF_F_HW_VLAN_TX for vlan. In netif_skb_features() we return only the features that are valid for vlans if we have a vlan packet. However, we should not mask out NETIF_F_HW_VLAN_TX since it enables transmission of vlan tags and is obviously valid. Reported-by: Eric Dumazet Signed-off-by: Jesse Gross Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 83507c265e48..4c58d11d3b68 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2023,13 +2023,13 @@ int netif_skb_features(struct sk_buff *skb) return harmonize_features(skb, protocol, features); } - features &= skb->dev->vlan_features; + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); if (protocol != htons(ETH_P_8021Q)) { return harmonize_features(skb, protocol, features); } else { features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM; + NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; return harmonize_features(skb, protocol, features); } } -- cgit v1.2.2 From 4571928fc73589e9c5217cd069d2c0b4ff1818a8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 14 Jan 2011 14:59:44 +0100 Subject: Bluetooth: l2cap: fix misuse of logical operation in place of bitop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CC: Marcel Holtmann CC: "Gustavo F. Padovan" CC: João Paulo Rechi Vita Signed-off-by: David Sterba Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index c791fcda7b2d..4fd88eb0a464 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1893,8 +1893,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms if (pi->mode == L2CAP_MODE_STREAMING) { l2cap_streaming_send(sk); } else { - if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && - pi->conn_state && L2CAP_CONN_WAIT_F) { + if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && + (pi->conn_state & L2CAP_CONN_WAIT_F)) { err = len; break; } -- cgit v1.2.2 From e2e0cacbd4b0c7c69c7591d37c243f2363aeaa71 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 4 Jan 2011 12:08:50 +0200 Subject: Bluetooth: Fix leaking blacklist when unregistering a hci device The blacklist should be freed before the hci device gets unregistered. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8b602d881fd7..9c4541bc488a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1011,6 +1011,10 @@ int hci_unregister_dev(struct hci_dev *hdev) destroy_workqueue(hdev->workqueue); + hci_dev_lock_bh(hdev); + hci_blacklist_clear(hdev); + hci_dev_unlock_bh(hdev); + __hci_dev_put(hdev); return 0; -- cgit v1.2.2 From 683d949a7fbf33c244670e34d35c460e0d6558cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Turek?= <8an@praha12.net> Date: Wed, 5 Jan 2011 02:43:59 +0100 Subject: Bluetooth: Never deallocate a session when some DLC points to it Fix a bug introduced in commit 9cf5b0ea3a7f1432c61029f7aaf4b8b338628884: function rfcomm_recv_ua calls rfcomm_session_put without checking that the session is not referenced by some DLC. If the session is freed, that DLC would refer to deallocated memory, causing an oops later, as shown in this bug report: https://bugzilla.kernel.org/show_bug.cgi?id=15994 Signed-off-by: Lukas Turek <8an@praha12.net> Signed-off-by: Gustavo F. Padovan --- net/bluetooth/rfcomm/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ff8aaa736650..6b83776534fb 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1164,7 +1164,8 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) * initiator rfcomm_process_rx already calls * rfcomm_session_put() */ if (s->sock->sk->sk_state != BT_CLOSED) - rfcomm_session_put(s); + if (list_empty(&s->dlcs)) + rfcomm_session_put(s); break; } } -- cgit v1.2.2 From 88644bb9fee591b2743a881923263bc28df4cded Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 19 Jan 2011 12:06:48 +0530 Subject: Revert "Bluetooth: Update sec_level/auth_type for already existing connections" This reverts commit 045309820afe047920a50de25634dab46a1e851d. That commit is wrong for two reasons: - The conn->sec_level shouldn't be updated without performing authentication first (as it's supposed to represent the level of security that the existing connection has) - A higher auth_type value doesn't mean "more secure" like the commit seems to assume. E.g. dedicated bonding with MITM protection is 0x03 whereas general bonding without MITM protection is 0x04. hci_conn_auth already takes care of updating conn->auth_type so hci_connect doesn't need to do it. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6b90a4191734..65a3fb5678eb 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -382,11 +382,6 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 acl->sec_level = sec_level; acl->auth_type = auth_type; hci_acl_connect(acl); - } else { - if (acl->sec_level < sec_level) - acl->sec_level = sec_level; - if (acl->auth_type < auth_type) - acl->auth_type = auth_type; } if (type == ACL_LINK) -- cgit v1.2.2 From 65cf686ee102b7eb0477a4bab82ff227071a0258 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 19 Jan 2011 12:06:49 +0530 Subject: Bluetooth: Fix MITM protection requirement preservation If an existing connection has a MITM protection requirement (the first bit of the auth_type) then that requirement should not be cleared by new sockets that reuse the ACL but don't have that requirement. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 65a3fb5678eb..fe712a89a856 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -442,6 +442,9 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) else if (conn->link_mode & HCI_LM_AUTH) return 1; + /* Make sure we preserve an existing MITM requirement*/ + auth_type |= (conn->auth_type & 0x01); + conn->auth_type = auth_type; if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { -- cgit v1.2.2 From 8556edd32f01c50a3c99e44dc2c3b1252ea59605 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 19 Jan 2011 12:06:50 +0530 Subject: Bluetooth: Create a unified auth_type evaluation function The logic for determining the needed auth_type for an L2CAP socket is rather complicated and has so far been duplicated in l2cap_check_security as well as l2cap_do_connect. Additionally the l2cap_check_security code was completely missing the handling of SOCK_RAW type sockets. This patch creates a unified function for the evaluation and makes l2cap_do_connect and l2cap_check_security use that function. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap.c | 77 +++++++++++++++++++-------------------------------- 1 file changed, 28 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 4fd88eb0a464..ae227bf25563 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -305,33 +305,44 @@ static void l2cap_chan_del(struct sock *sk, int err) } } -/* Service level security */ -static inline int l2cap_check_security(struct sock *sk) +static inline u8 l2cap_get_auth_type(struct sock *sk) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - __u8 auth_type; + if (sk->sk_type == SOCK_RAW) { + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_HIGH: + return HCI_AT_DEDICATED_BONDING_MITM; + case BT_SECURITY_MEDIUM: + return HCI_AT_DEDICATED_BONDING; + default: + return HCI_AT_NO_BONDING; + } + } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { + if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) + l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; - if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) - auth_type = HCI_AT_NO_BONDING_MITM; + return HCI_AT_NO_BONDING_MITM; else - auth_type = HCI_AT_NO_BONDING; - - if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) - l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; + return HCI_AT_NO_BONDING; } else { switch (l2cap_pi(sk)->sec_level) { case BT_SECURITY_HIGH: - auth_type = HCI_AT_GENERAL_BONDING_MITM; - break; + return HCI_AT_GENERAL_BONDING_MITM; case BT_SECURITY_MEDIUM: - auth_type = HCI_AT_GENERAL_BONDING; - break; + return HCI_AT_GENERAL_BONDING; default: - auth_type = HCI_AT_NO_BONDING; - break; + return HCI_AT_NO_BONDING; } } +} + +/* Service level security */ +static inline int l2cap_check_security(struct sock *sk) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + __u8 auth_type; + + auth_type = l2cap_get_auth_type(sk); return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level, auth_type); @@ -1068,39 +1079,7 @@ static int l2cap_do_connect(struct sock *sk) err = -ENOMEM; - if (sk->sk_type == SOCK_RAW) { - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_HIGH: - auth_type = HCI_AT_DEDICATED_BONDING_MITM; - break; - case BT_SECURITY_MEDIUM: - auth_type = HCI_AT_DEDICATED_BONDING; - break; - default: - auth_type = HCI_AT_NO_BONDING; - break; - } - } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { - if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) - auth_type = HCI_AT_NO_BONDING_MITM; - else - auth_type = HCI_AT_NO_BONDING; - - if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) - l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; - } else { - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_HIGH: - auth_type = HCI_AT_GENERAL_BONDING_MITM; - break; - case BT_SECURITY_MEDIUM: - auth_type = HCI_AT_GENERAL_BONDING; - break; - default: - auth_type = HCI_AT_NO_BONDING; - break; - } - } + auth_type = l2cap_get_auth_type(sk); hcon = hci_connect(hdev, ACL_LINK, dst, l2cap_pi(sk)->sec_level, auth_type); -- cgit v1.2.2 From d00ef24fc2923b65fdd440dc6445903e965841ac Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 19 Jan 2011 12:06:51 +0530 Subject: Bluetooth: Fix authentication request for L2CAP raw sockets When there is an existing connection l2cap_check_security needs to be called to ensure that the security level of the new socket is fulfilled. Normally l2cap_do_start takes care of this, but that function doesn't get called for SOCK_RAW type sockets. This patch adds the necessary l2cap_check_security call to the appropriate branch in l2cap_do_connect. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index ae227bf25563..7550abb0c96a 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1106,7 +1106,8 @@ static int l2cap_do_connect(struct sock *sk) if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM) { l2cap_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; + if (l2cap_check_security(sk)) + sk->sk_state = BT_CONNECTED; } else l2cap_do_start(sk); } -- cgit v1.2.2 From 765c2a964b49bd06b61a52991519281c85d82b67 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 19 Jan 2011 12:06:52 +0530 Subject: Bluetooth: Fix race condition with conn->sec_level The conn->sec_level value is supposed to represent the current level of security that the connection has. However, by assigning to it before requesting authentication it will have the wrong value during the authentication procedure. To fix this a pending_sec_level variable is added which is used to track the desired security level while making sure that sec_level always represents the current level of security. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 8 ++++++-- net/bluetooth/hci_event.c | 9 +++++---- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index fe712a89a856..99cd8d9d891b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -379,7 +379,8 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 hci_conn_hold(acl); if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { - acl->sec_level = sec_level; + acl->sec_level = BT_SECURITY_LOW; + acl->pending_sec_level = sec_level; acl->auth_type = auth_type; hci_acl_connect(acl); } @@ -437,8 +438,11 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("conn %p", conn); + if (conn->pending_sec_level > sec_level) + sec_level = conn->pending_sec_level; + if (sec_level > conn->sec_level) - conn->sec_level = sec_level; + conn->pending_sec_level = sec_level; else if (conn->link_mode & HCI_LM_AUTH) return 1; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 38100170d380..a290854fdaa6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -692,13 +692,13 @@ static int hci_outgoing_auth_needed(struct hci_dev *hdev, if (conn->state != BT_CONFIG || !conn->out) return 0; - if (conn->sec_level == BT_SECURITY_SDP) + if (conn->pending_sec_level == BT_SECURITY_SDP) return 0; /* Only request authentication for SSP connections or non-SSP * devices with sec_level HIGH */ if (!(hdev->ssp_mode > 0 && conn->ssp_mode > 0) && - conn->sec_level != BT_SECURITY_HIGH) + conn->pending_sec_level != BT_SECURITY_HIGH) return 0; return 1; @@ -1095,9 +1095,10 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { - if (!ev->status) + if (!ev->status) { conn->link_mode |= HCI_LM_AUTH; - else + conn->sec_level = conn->pending_sec_level; + } else conn->sec_level = BT_SECURITY_LOW; clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); -- cgit v1.2.2 From b8f3ab4290f1e720166e888ea2a1d1d44c4d15dd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 18 Jan 2011 12:40:38 -0800 Subject: Revert "netlink: test for all flags of the NLM_F_DUMP composite" This reverts commit 0ab03c2b1478f2438d2c80204f7fef65b1bca9cf. It breaks several things including the avahi daemon. Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 4 ++-- net/netlink/genetlink.c | 2 +- net/xfrm/xfrm_user.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a5f7535aab5b..750db57f3bb3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1820,7 +1820,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2746c1fa6417..2ada17129fce 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -858,7 +858,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) nlmsg_len(nlh) < hdrlen) return -EINVAL; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (nlh->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(nlh, hdrlen)) { struct nlattr *attr; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2b7eef37875c..93297aaceb2b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -924,7 +924,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) + if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, ctnetlink_done); @@ -1787,7 +1787,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, ctnetlink_exp_dump_table, ctnetlink_exp_done); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f83cb370292b..1781d99145e2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -519,7 +519,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (nlh->nlmsg_flags & NLM_F_DUMP) { if (ops->dumpit == NULL) return -EOPNOTSUPP; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d5e1e0b08890..61291965c5f6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2189,7 +2189,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && - (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + (nlh->nlmsg_flags & NLM_F_DUMP)) { if (link->dump == NULL) return -EINVAL; -- cgit v1.2.2 From d402786ea4f8433774a812d6b8635e737425cddd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jan 2011 00:51:36 +0000 Subject: net: fix can_checksum_protocol() arguments swap commit 0363466866d901fbc (net offloading: Convert checksums to use centrally computed features.) mistakenly swapped can_checksum_protocol() arguments. This broke IPv6 on bnx2 for instance, on NIC without TCPv6 checksum offloads. Reported-by: Hans de Bruin Signed-off-by: Eric Dumazet Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 4c58d11d3b68..8393ec408cd4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2001,7 +2001,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) { - if (!can_checksum_protocol(protocol, features)) { + if (!can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { -- cgit v1.2.2 From 4580ccc04ddd8c17a470573a7fdb8def2e036dfa Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 18 Jan 2011 22:39:00 +0000 Subject: sctp: user perfect name for Delayed SACK Timer option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The option name of Delayed SACK Timer should be SCTP_DELAYED_SACK, not SCTP_DELAYED_ACK. Left SCTP_DELAYED_ACK be concomitant with SCTP_DELAYED_SACK, for making compatibility with existing applications. Reference: 8.1.19. Get or Set Delayed SACK Timer (SCTP_DELAYED_SACK) (http://tools.ietf.org/html/draft-ietf-tsvwg-sctpsocket-25) Signed-off-by: Shan Wei Acked-by: Wei Yongjun Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a09b0dd25f50..8e02550ff3e8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3428,7 +3428,7 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); break; - case SCTP_DELAYED_ACK: + case SCTP_DELAYED_SACK: retval = sctp_setsockopt_delayed_ack(sk, optval, optlen); break; case SCTP_PARTIAL_DELIVERY_POINT: @@ -5333,7 +5333,7 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_peer_addr_params(sk, len, optval, optlen); break; - case SCTP_DELAYED_ACK: + case SCTP_DELAYED_SACK: retval = sctp_getsockopt_delayed_ack(sk, len, optval, optlen); break; -- cgit v1.2.2 From 6a108a14fa356ef607be308b68337939e56ea94e Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 20 Jan 2011 14:44:16 -0800 Subject: kconfig: rename CONFIG_EMBEDDED to CONFIG_EXPERT The meaning of CONFIG_EMBEDDED has long since been obsoleted; the option is used to configure any non-standard kernel with a much larger scope than only small devices. This patch renames the option to CONFIG_EXPERT in init/Kconfig and fixes references to the option throughout the kernel. A new CONFIG_EMBEDDED option is added that automatically selects CONFIG_EXPERT when enabled and can be used in the future to isolate options that should only be considered for embedded systems (RISC architectures, SLOB, etc). Calling the option "EXPERT" more accurately represents its intention: only expert users who understand the impact of the configuration changes they are making should enable it. Reviewed-by: Ingo Molnar Acked-by: David Woodhouse Signed-off-by: David Rientjes Cc: Greg KH Cc: "David S. Miller" Cc: Jens Axboe Cc: Arnd Bergmann Cc: Robin Holt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/mac80211/Kconfig | 6 +++--- net/rfkill/Kconfig | 4 ++-- net/wireless/Kconfig | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 9109262abd24..c766056d0488 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -20,7 +20,7 @@ config MAC80211_HAS_RC def_bool n config MAC80211_RC_PID - bool "PID controller based rate control algorithm" if EMBEDDED + bool "PID controller based rate control algorithm" if EXPERT select MAC80211_HAS_RC ---help--- This option enables a TX rate control algorithm for @@ -28,14 +28,14 @@ config MAC80211_RC_PID rate. config MAC80211_RC_MINSTREL - bool "Minstrel" if EMBEDDED + bool "Minstrel" if EXPERT select MAC80211_HAS_RC default y ---help--- This option enables the 'minstrel' TX rate control algorithm config MAC80211_RC_MINSTREL_HT - bool "Minstrel 802.11n support" if EMBEDDED + bool "Minstrel 802.11n support" if EXPERT depends on MAC80211_RC_MINSTREL default y ---help--- diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index eaf765876458..7fce6dfd2180 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -18,7 +18,7 @@ config RFKILL_LEDS default y config RFKILL_INPUT - bool "RF switch input support" if EMBEDDED + bool "RF switch input support" if EXPERT depends on RFKILL depends on INPUT = y || RFKILL = INPUT - default y if !EMBEDDED + default y if !EXPERT diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index d0ee29063e5d..1f1ef70f34f2 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -95,7 +95,7 @@ config CFG80211_DEBUGFS If unsure, say N. config CFG80211_INTERNAL_REGDB - bool "use statically compiled regulatory rules database" if EMBEDDED + bool "use statically compiled regulatory rules database" if EXPERT default n depends on CFG80211 ---help--- -- cgit v1.2.2 From 9190b3b3208d052d98cb601fcc192f3f71a5658b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 23:31:33 -0800 Subject: net_sched: accurate bytes/packets stats/rates In commit 44b8288308ac9d (net_sched: pfifo_head_drop problem), we fixed a problem with pfifo_head drops that incorrectly decreased sch->bstats.bytes and sch->bstats.packets Several qdiscs (CHOKe, SFQ, pfifo_head, ...) are able to drop a previously enqueued packet, and bstats cannot be changed, so bstats/rates are not accurate (over estimated) This patch changes the qdisc_bstats updates to be done at dequeue() time instead of enqueue() time. bstats counters no longer account for dropped frames, and rates are more correct, since enqueue() bursts dont have effect on dequeue() rate. Signed-off-by: Eric Dumazet Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 3 +-- net/sched/sch_drr.c | 2 +- net/sched/sch_dsmark.c | 2 +- net/sched/sch_fifo.c | 5 +---- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 12 +++++------- net/sched/sch_multiq.c | 2 +- net/sched/sch_netem.c | 3 +-- net/sched/sch_prio.c | 2 +- net/sched/sch_red.c | 11 ++++++----- net/sched/sch_sfq.c | 5 ++--- net/sched/sch_tbf.c | 2 +- net/sched/sch_teql.c | 3 ++- 13 files changed, 24 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index c80d1c210c5d..5f63ec58942c 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -390,7 +390,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, cl->q); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; - qdisc_bstats_update(sch, skb); cbq_mark_toplevel(q, cl); if (!cl->next_alive) cbq_activate_class(cl); @@ -649,7 +648,6 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) ret = qdisc_enqueue(skb, cl->q); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; - qdisc_bstats_update(sch, skb); if (!cl->next_alive) cbq_activate_class(cl); return 0; @@ -971,6 +969,7 @@ cbq_dequeue(struct Qdisc *sch) skb = cbq_dequeue_1(sch); if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; sch->flags &= ~TCQ_F_THROTTLED; return skb; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index de55e642eafc..6b7fe4a84f13 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -376,7 +376,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) } bstats_update(&cl->bstats, skb); - qdisc_bstats_update(sch, skb); sch->q.qlen++; return err; @@ -403,6 +402,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) skb = qdisc_dequeue_peeked(cl->qdisc); if (cl->qdisc->q.qlen == 0) list_del(&cl->alist); + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 60f4bdd4408e..0f7bf3fdfea5 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -260,7 +260,6 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -283,6 +282,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) if (skb == NULL) return NULL; + qdisc_bstats_update(sch, skb); sch->q.qlen--; index = skb->tc_index & (p->indices - 1); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index aa4d6337e43c..d468b479aa93 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -46,17 +46,14 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) { - struct sk_buff *skb_head; struct fifo_sched_data *q = qdisc_priv(sch); if (likely(skb_queue_len(&sch->q) < q->limit)) return qdisc_enqueue_tail(skb, sch); /* queue full, remove one skb to fulfill the limit */ - skb_head = qdisc_dequeue_head(sch); + __qdisc_queue_drop_head(sch, &sch->q); sch->qstats.drops++; - kfree_skb(skb_head); - qdisc_enqueue_tail(skb, sch); return NET_XMIT_CN; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 2e45791d4f6c..14a799de1c35 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1600,7 +1600,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) set_active(cl, qdisc_pkt_len(skb)); bstats_update(&cl->bstats, skb); - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1666,6 +1665,7 @@ hfsc_dequeue(struct Qdisc *sch) } sch->flags &= ~TCQ_F_THROTTLED; + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 984c1b0c6836..fc12fe6f5597 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -574,7 +574,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) } sch->q.qlen++; - qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } @@ -842,7 +841,7 @@ next: static struct sk_buff *htb_dequeue(struct Qdisc *sch) { - struct sk_buff *skb = NULL; + struct sk_buff *skb; struct htb_sched *q = qdisc_priv(sch); int level; psched_time_t next_event; @@ -851,6 +850,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); if (skb != NULL) { +ok: + qdisc_bstats_update(sch, skb); sch->flags &= ~TCQ_F_THROTTLED; sch->q.qlen--; return skb; @@ -884,11 +885,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) int prio = ffz(m); m |= 1 << prio; skb = htb_dequeue_tree(q, prio, level); - if (likely(skb != NULL)) { - sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; - goto fin; - } + if (likely(skb != NULL)) + goto ok; } } sch->qstats.overlimits++; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 21f13da24763..436a2e75b322 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -83,7 +83,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -112,6 +111,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) qdisc = q->queues[q->curband]; skb = qdisc->dequeue(qdisc); if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 1c4bce863479..6a3006b38dc5 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -240,7 +240,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (likely(ret == NET_XMIT_SUCCESS)) { sch->q.qlen++; - qdisc_bstats_update(sch, skb); } else if (net_xmit_drop_count(ret)) { sch->qstats.drops++; } @@ -289,6 +288,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) skb->tstamp.tv64 = 0; #endif pr_debug("netem_dequeue: return skb=%p\n", skb); + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } @@ -476,7 +476,6 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) __skb_queue_after(list, skb, nskb); sch->qstats.backlog += qdisc_pkt_len(nskb); - qdisc_bstats_update(sch, nskb); return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 966158d49dd1..fbd710d619bf 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -84,7 +84,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -116,6 +115,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch) struct Qdisc *qdisc = q->queues[prio]; struct sk_buff *skb = qdisc->dequeue(qdisc); if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index a6009c5a2c97..9f98dbd32d4c 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -94,7 +94,6 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_bstats_update(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -114,11 +113,13 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch) struct Qdisc *child = q->qdisc; skb = child->dequeue(child); - if (skb) + if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; - else if (!red_is_idling(&q->parms)) - red_start_of_idle_period(&q->parms); - + } else { + if (!red_is_idling(&q->parms)) + red_start_of_idle_period(&q->parms); + } return skb; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 239ec53a634d..edea8cefec6c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -402,10 +402,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->tail = slot; slot->allot = q->scaled_quantum; } - if (++sch->q.qlen <= q->limit) { - qdisc_bstats_update(sch, skb); + if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; - } sfq_drop(sch); return NET_XMIT_CN; @@ -445,6 +443,7 @@ next_slot: } skb = slot_dequeue_head(slot); sfq_dec(q, a); + qdisc_bstats_update(sch, skb); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 77565e721811..e93165820c3f 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -134,7 +134,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) } sch->q.qlen++; - qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } @@ -187,6 +186,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) q->ptokens = ptoks; sch->q.qlen--; sch->flags &= ~TCQ_F_THROTTLED; + qdisc_bstats_update(sch, skb); return skb; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 84ce48eadff4..d84e7329660f 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -87,7 +87,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) if (q->q.qlen < dev->tx_queue_len) { __skb_queue_tail(&q->q, skb); - qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } @@ -111,6 +110,8 @@ teql_dequeue(struct Qdisc* sch) dat->m->slaves = sch; netif_wake_queue(m); } + } else { + qdisc_bstats_update(sch, skb); } sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; return skb; -- cgit v1.2.2 From 577d6a7c3a0e273e115c65a148b71be6c1950f69 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 24 Jan 2011 14:32:52 -0600 Subject: module: fix missing semicolons in MODULE macro usage You always needed them when you were a module, but the builtin versions of the macros used to be more lenient. Reported-by: Stephen Rothwell Signed-off-by: Rusty Russell --- net/dsa/dsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0c877a74e1f4..3fb14b7c13cf 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -428,7 +428,7 @@ static void __exit dsa_cleanup_module(void) } module_exit(dsa_cleanup_module); -MODULE_AUTHOR("Lennert Buytenhek ") +MODULE_AUTHOR("Lennert Buytenhek "); MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:dsa"); -- cgit v1.2.2 From c71caf4114a0e1da3451cc92fba6a152929cd4c2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 24 Jan 2011 19:01:07 +0100 Subject: netfilter: ctnetlink: fix missing refcount increment during dumps In 13ee6ac netfilter: fix race in conntrack between dump_table and destroy, we recovered spinlocks to protect the dump of the conntrack table according to reports from Stephen and acknowledgments on the issue from Eric. In that patch, the refcount bump that allows to keep a reference to the current ct object was removed. However, we still decrement the refcount for that object in the output path of ctnetlink_dump_table(): if (last) nf_ct_put(last) Cc: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso Acked-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 93297aaceb2b..eead9db6f899 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -667,6 +667,7 @@ restart: if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, IPCTNL_MSG_CT_NEW, ct) < 0) { + nf_conntrack_get(&ct->ct_general); cb->args[1] = (unsigned long)ct; goto out; } -- cgit v1.2.2 From 08b5194b5d6485d12ebf24cf6ee389fc55691122 Mon Sep 17 00:00:00 2001 From: Thomas Jacob Date: Mon, 24 Jan 2011 21:35:36 +0100 Subject: netfilter: xt_iprange: Incorrect xt_iprange boundary check for IPv6 iprange_ipv6_sub was substracting 2 unsigned ints and then casting the result to int to find out whether they are lt, eq or gt each other, this doesn't work if the full 32 bits of each part can be used in IPv6 addresses. Patch should remedy that without significant performance penalties. Also number of ntohl calls can be reduced this way (Jozsef Kadlecsik). Signed-off-by: Thomas Jacob Signed-off-by: Patrick McHardy --- net/netfilter/xt_iprange.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index 88f7c3511c72..73c33a42f87f 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -53,15 +53,13 @@ iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par) } static inline int -iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b) +iprange_ipv6_lt(const struct in6_addr *a, const struct in6_addr *b) { unsigned int i; - int r; for (i = 0; i < 4; ++i) { - r = ntohl(a->s6_addr32[i]) - ntohl(b->s6_addr32[i]); - if (r != 0) - return r; + if (a->s6_addr32[i] != b->s6_addr32[i]) + return ntohl(a->s6_addr32[i]) < ntohl(b->s6_addr32[i]); } return 0; @@ -75,15 +73,15 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par) bool m; if (info->flags & IPRANGE_SRC) { - m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0; - m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0; + m = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6); + m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr); m ^= !!(info->flags & IPRANGE_SRC_INV); if (m) return false; } if (info->flags & IPRANGE_DST) { - m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0; - m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0; + m = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6); + m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr); m ^= !!(info->flags & IPRANGE_DST_INV); if (m) return false; -- cgit v1.2.2 From c506653d35249bb4738bb139c24362e1ae724bc1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2011 13:16:16 -0800 Subject: net: arp_ioctl() must hold RTNL Commit 941666c2e3e0 "net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()" introduced a regression, reported by Jamie Heilman. "arp -Ds 192.168.2.41 eth0 pub" triggered the ASSERT_RTNL() assert in pneigh_lookup() Removing RTNL requirement from arp_ioctl() was a mistake, just revert that part. Reported-by: Jamie Heilman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- net/ipv4/arp.c | 11 +++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 8393ec408cd4..1e5077d2cca6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -749,7 +749,8 @@ EXPORT_SYMBOL(dev_get_by_index); * @ha: hardware address * * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold RCU + * is not found or a pointer to the device. + * The caller must hold RCU or RTNL. * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 04c8b69fd426..7927589813b5 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1017,14 +1017,13 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } - if (__in_dev_get_rcu(dev)) { - IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on); + if (__in_dev_get_rtnl(dev)) { + IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); return 0; } return -ENXIO; } -/* must be called with rcu_read_lock() */ static int arp_req_set_public(struct net *net, struct arpreq *r, struct net_device *dev) { @@ -1233,10 +1232,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!(r.arp_flags & ATF_NETMASK)) ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = htonl(0xFFFFFFFFUL); - rcu_read_lock(); + rtnl_lock(); if (r.arp_dev[0]) { err = -ENODEV; - dev = dev_get_by_name_rcu(net, r.arp_dev); + dev = __dev_get_by_name(net, r.arp_dev); if (dev == NULL) goto out; @@ -1263,7 +1262,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; } out: - rcu_read_unlock(); + rtnl_unlock(); if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) err = -EFAULT; return err; -- cgit v1.2.2 From d1dc7abf2fafa34b0ffcd070fd59405aa9c0a4d8 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 24 Jan 2011 12:08:48 +0000 Subject: GRO: fix merging a paged skb after non-paged skbs Suppose that several linear skbs of the same flow were received by GRO. They were thus merged into one skb with a frag_list. Then a new skb of the same flow arrives, but it is a paged skb with data starting in its frags[]. Before adding the skb to the frag_list skb_gro_receive() will of course adjust the skb to throw away the headers. It correctly modifies the page_offset and size of the frag, but it leaves incorrect information in the skb: ->data_len is not decreased at all. ->len is decreased only by headlen, as if no change were done to the frag. Later in a receiving process this causes skb_copy_datagram_iovec() to return -EFAULT and this is seen in userspace as the result of the recv() syscall. In practice the bug can be reproduced with the sfc driver. By default the driver uses an adaptive scheme when it switches between using napi_gro_receive() (with skbs) and napi_gro_frags() (with pages). The bug is reproduced when under rx load with enough successful GRO merging the driver decides to switch from the former to the latter. Manual control is also possible, so reproducing this is easy with netcat: - on machine1 (with sfc): nc -l 12345 > /dev/null - on machine2: nc machine1 12345 < /dev/zero - on machine1: echo 1 > /sys/module/sfc/parameters/rx_alloc_method # use skbs echo 2 > /sys/module/sfc/parameters/rx_alloc_method # use pages - See that nc has quit suddenly. [v2: Modified by Eric Dumazet to avoid advancing skb->data past the end and to use a temporary variable.] Signed-off-by: Michal Schmidt Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d31bb36ae0dc..7cd1bc86d591 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2744,8 +2744,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) merge: if (offset > headlen) { - skbinfo->frags[0].page_offset += offset - headlen; - skbinfo->frags[0].size -= offset - headlen; + unsigned int eat = offset - headlen; + + skbinfo->frags[0].page_offset += eat; + skbinfo->frags[0].size -= eat; + skb->data_len -= eat; + skb->len -= eat; offset = headlen; } -- cgit v1.2.2 From 3408404a4c2a4eead9d73b0bbbfe3f225b65f492 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Jan 2011 14:37:46 -0800 Subject: inetpeer: Use correct AVL tree base pointer in inet_getpeer(). Family was hard-coded to AF_INET but should be daddr->family. This fixes crashes when unlinking ipv6 peer entries, since the unlink code was looking up the base pointer properly. Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d9bc85751c74..a96e65674ac3 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -475,7 +475,7 @@ static int cleanup_once(unsigned long ttl) struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(AF_INET); + struct inet_peer_base *base = family_to_base(daddr->family); struct inet_peer *p; /* Look up for the address quickly, lockless. -- cgit v1.2.2 From fd0273c5033630b8673554cd39660435d1ab2ac4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2011 14:41:20 -0800 Subject: tcp: fix bug in listening_get_next() commit a8b690f98baf9fb19 (tcp: Fix slowness in read /proc/net/tcp) introduced a bug in handling of SYN_RECV sockets. st->offset represents number of sockets found since beginning of listening_hash[st->bucket]. We should not reset st->offset when iterating through syn_table[st->sbucket], or else if more than ~25 sockets (if PAGE_SIZE=4096) are in SYN_RECV state, we exit from listening_get_next() with a too small st->offset Next time we enter tcp_seek_last_pos(), we are not able to seek past already found sockets. Reported-by: PK CC: Tom Herbert Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 856f68466d49..02f583b3744a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1994,7 +1994,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur) } req = req->dl_next; } - st->offset = 0; if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) break; get_req: -- cgit v1.2.2 From 3dce38a02d6370dca690cd923619d4b00024b723 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 21 Jan 2011 16:35:18 +0000 Subject: dcbnl: make get_app handling symmetric for IEEE and CEE DCBx The IEEE get/set app handlers use generic routines and do not require the net_device to implement the dcbnl_ops routines. This patch makes it symmetric so user space and drivers do not have to handle the CEE version and IEEE DCBx versions differently. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index d900ab99814a..6b03f561caec 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -583,7 +583,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb, u8 up, idtype; int ret = -EINVAL; - if (!tb[DCB_ATTR_APP] || !netdev->dcbnl_ops->getapp) + if (!tb[DCB_ATTR_APP]) goto out; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], @@ -604,7 +604,16 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb, goto out; id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]); - up = netdev->dcbnl_ops->getapp(netdev, idtype, id); + + if (netdev->dcbnl_ops->getapp) { + up = netdev->dcbnl_ops->getapp(netdev, idtype, id); + } else { + struct dcb_app app = { + .selector = idtype, + .protocol = id, + }; + up = dcb_getapp(netdev, &app); + } /* send this back */ dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); -- cgit v1.2.2 From d80bc0fd262ef840ed4e82593ad6416fa1ba3fc4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Jan 2011 16:01:58 -0800 Subject: ipv6: Always clone offlink routes. Do not handle PMTU vs. route lookup creation any differently wrt. offlink routes, always clone them. Reported-by: PK Signed-off-by: David S. Miller --- net/ipv6/route.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 373bd0416f69..1534508f6c68 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -72,8 +72,6 @@ #define RT6_TRACE(x...) do { ; } while (0) #endif -#define CLONE_OFFLINK_ROUTE 0 - static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); @@ -738,13 +736,8 @@ restart: if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); - else { -#if CLONE_OFFLINK_ROUTE + else nrt = rt6_alloc_clone(rt, &fl->fl6_dst); -#else - goto out2; -#endif - } dst_release(&rt->dst); rt = nrt ? : net->ipv6.ip6_null_entry; -- cgit v1.2.2 From b7c7d01aaed1f71d9afe815a569f0a81465a1744 Mon Sep 17 00:00:00 2001 From: Eugene Teo Date: Mon, 24 Jan 2011 21:05:17 -0800 Subject: net: clear heap allocation for ethtool_get_regs() There is a conflict between commit b00916b1 and a77f5db3. This patch resolves the conflict by clearing the heap allocation in ethtool_get_regs(). Cc: stable@kernel.org Signed-off-by: Eugene Teo Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 17741782a345..ff2302910b5e 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -817,7 +817,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) if (regs.len > reglen) regs.len = reglen; - regbuf = vmalloc(reglen); + regbuf = vzalloc(reglen); if (!regbuf) return -ENOMEM; -- cgit v1.2.2 From 778be232a207e79088ba70d832ac25dfea6fbf1a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 25 Jan 2011 15:38:01 +0000 Subject: NFS do not find client in NFSv4 pg_authenticate The information required to find the nfs_client cooresponding to the incoming back channel request is contained in the NFS layer. Perform minimal checking in the RPC layer pg_authenticate method, and push more detailed checking into the NFS layer where the nfs_client can be found. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- net/sunrpc/svcsock.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7bd3bbba4710..d802e941d365 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1609,9 +1609,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, */ static void svc_bc_sock_free(struct svc_xprt *xprt) { - if (xprt) { - kfree(xprt->xpt_bc_sid); + if (xprt) kfree(container_of(xprt, struct svc_sock, sk_xprt)); - } } #endif /* CONFIG_NFS_V4_1 */ -- cgit v1.2.2 From 73a8bd74e2618990dbb218c3d82f53e60acd9af0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 23 Jan 2011 23:27:15 -0800 Subject: ipv6: Revert 'administrative down' address handling changes. This reverts the following set of commits: d1ed113f1669390da9898da3beddcc058d938587 ("ipv6: remove duplicate neigh_ifdown") 29ba5fed1bbd09c2cba890798c8f9eaab251401d ("ipv6: don't flush routes when setting loopback down") 9d82ca98f71fd686ef2f3017c5e3e6a4871b6e46 ("ipv6: fix missing in6_ifa_put in addrconf") 2de795707294972f6c34bae9de713e502c431296 ("ipv6: addrconf: don't remove address state on ifdown if the address is being kept") 8595805aafc8b077e01804c9a3668e9aa3510e89 ("IPv6: only notify protocols if address is compeletely gone") 27bdb2abcc5edb3526e25407b74bf17d1872c329 ("IPv6: keep tentative addresses in hash table") 93fa159abe50d3c55c7f83622d3f5c09b6e06f4b ("IPv6: keep route for tentative address") 8f37ada5b5f6bfb4d251a7f510f249cb855b77b3 ("IPv6: fix race between cleanup and add/delete address") 84e8b803f1e16f3a2b8b80f80a63fa2f2f8a9be6 ("IPv6: addrconf notify when address is unavailable") dc2b99f71ef477a31020511876ab4403fb7c4420 ("IPv6: keep permanent addresses on admin down") because the core semantic change to ipv6 address handling on ifdown has broken some things, in particular "disable_ipv6" sysctl handling. Stephen has made several attempts to get things back in working order, but nothing has restored disable_ipv6 fully yet. Reported-by: Eric W. Biederman Tested-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 81 ++++++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 24a1cf110d80..fd6782e3a038 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2661,14 +2661,12 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct net *net = dev_net(dev); struct inet6_dev *idev; struct inet6_ifaddr *ifa; - LIST_HEAD(keep_list); - int state; + int state, i; ASSERT_RTNL(); - /* Flush routes if device is being removed or it is not loopback */ - if (how || !(dev->flags & IFF_LOOPBACK)) - rt6_ifdown(net, dev); + rt6_ifdown(net, dev); + neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); if (idev == NULL) @@ -2689,6 +2687,23 @@ static int addrconf_ifdown(struct net_device *dev, int how) } + /* Step 2: clear hash table */ + for (i = 0; i < IN6_ADDR_HSIZE; i++) { + struct hlist_head *h = &inet6_addr_lst[i]; + struct hlist_node *n; + + spin_lock_bh(&addrconf_hash_lock); + restart: + hlist_for_each_entry_rcu(ifa, n, h, addr_lst) { + if (ifa->idev == idev) { + hlist_del_init_rcu(&ifa->addr_lst); + addrconf_del_timer(ifa); + goto restart; + } + } + spin_unlock_bh(&addrconf_hash_lock); + } + write_lock_bh(&idev->lock); /* Step 2: clear flags for stateless addrconf */ @@ -2722,52 +2737,23 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct inet6_ifaddr, if_list); addrconf_del_timer(ifa); - /* If just doing link down, and address is permanent - and not link-local, then retain it. */ - if (!how && - (ifa->flags&IFA_F_PERMANENT) && - !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { - list_move_tail(&ifa->if_list, &keep_list); - - /* If not doing DAD on this address, just keep it. */ - if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || - idev->cnf.accept_dad <= 0 || - (ifa->flags & IFA_F_NODAD)) - continue; + list_del(&ifa->if_list); - /* If it was tentative already, no need to notify */ - if (ifa->flags & IFA_F_TENTATIVE) - continue; + write_unlock_bh(&idev->lock); - /* Flag it for later restoration when link comes up */ - ifa->flags |= IFA_F_TENTATIVE; - ifa->state = INET6_IFADDR_STATE_DAD; - } else { - list_del(&ifa->if_list); - - /* clear hash table */ - spin_lock_bh(&addrconf_hash_lock); - hlist_del_init_rcu(&ifa->addr_lst); - spin_unlock_bh(&addrconf_hash_lock); - - write_unlock_bh(&idev->lock); - spin_lock_bh(&ifa->state_lock); - state = ifa->state; - ifa->state = INET6_IFADDR_STATE_DEAD; - spin_unlock_bh(&ifa->state_lock); - - if (state != INET6_IFADDR_STATE_DEAD) { - __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, - NETDEV_DOWN, ifa); - } + spin_lock_bh(&ifa->state_lock); + state = ifa->state; + ifa->state = INET6_IFADDR_STATE_DEAD; + spin_unlock_bh(&ifa->state_lock); - in6_ifa_put(ifa); - write_lock_bh(&idev->lock); + if (state != INET6_IFADDR_STATE_DEAD) { + __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); } - } + in6_ifa_put(ifa); - list_splice(&keep_list, &idev->addr_list); + write_lock_bh(&idev->lock); + } write_unlock_bh(&idev->lock); @@ -4156,8 +4142,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) addrconf_leave_solict(ifp->idev, &ifp->addr); dst_hold(&ifp->rt->dst); - if (ifp->state == INET6_IFADDR_STATE_DEAD && - ip6_del_rt(ifp->rt)) + if (ip6_del_rt(ifp->rt)) dst_free(&ifp->rt->dst); break; } -- cgit v1.2.2 From eb3e554b4b3a56386ef5214dbe0e3935a350178b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 24 Jan 2011 19:28:49 +0100 Subject: mac80211: fix a crash in ieee80211_beacon_get_tim on change_interface Some drivers (e.g. ath9k) do not always disable beacons when they're supposed to. When an interface is changed using the change_interface op, the mode specific sdata part is in an undefined state and trying to get a beacon at this point can produce weird crashes. To fix this, add a check for ieee80211_sdata_running before using anything from the sdata. Signed-off-by: Felix Fietkau Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/tx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5950e3abead9..b64b42bc774b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2230,6 +2230,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, sdata = vif_to_sdata(vif); + if (!ieee80211_sdata_running(sdata)) + goto out; + if (tim_offset) *tim_offset = 0; if (tim_length) -- cgit v1.2.2 From 44f5324b5d13ef2187729d949eca442689627f39 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Tue, 25 Jan 2011 13:46:30 -0800 Subject: TCP: fix a bug that triggers large number of TCP RST by mistake This patch fixes a bug that causes TCP RST packets to be generated on otherwise correctly behaved applications, e.g., no unread data on close,..., etc. To trigger the bug, at least two conditions must be met: 1. The FIN flag is set on the last data packet, i.e., it's not on a separate, FIN only packet. 2. The size of the last data chunk on the receive side matches exactly with the size of buffer posted by the receiver, and the receiver closes the socket without any further read attempt. This bug was first noticed on our netperf based testbed for our IW10 proposal to IETF where a large number of RST packets were observed. netperf's read side code meets the condition 2 above 100%. Before the fix, tcp_data_queue() will queue the last skb that meets condition 1 to sk_receive_queue even though it has fully copied out (skb_copy_datagram_iovec()) the data. Then if condition 2 is also met, tcp_recvmsg() often returns all the copied out data successfully without actually consuming the skb, due to a check "if ((chunk = len - tp->ucopy.len) != 0) {" and "len -= chunk;" after tcp_prequeue_process() that causes "len" to become 0 and an early exit from the big while loop. I don't see any reason not to free the skb whose data have been fully consumed in tcp_data_queue(), regardless of the FIN flag. We won't get there if MSG_PEEK is on. Am I missing some arcane cases related to urgent data? Signed-off-by: H.K. Jerry Chu Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2549b29b062d..eb7f82ebf4a3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4399,7 +4399,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) { tp->ucopy.len -= chunk; tp->copied_seq += chunk; - eaten = (chunk == skb->len && !th->fin); + eaten = (chunk == skb->len); tcp_rcv_space_adjust(sk); } local_bh_disable(); -- cgit v1.2.2 From dd58ddc6928f711d8fb7101182215a0f23cf41f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 25 Jan 2011 21:56:16 +0000 Subject: batman-adv: Fix kernel panic when fetching vis data on a vis server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hash_iterate removal introduced a bug leading to a kernel panic when fetching the vis data on a vis server. That commit forgot to rename one variable name, which this commit fixes now. Reported-by: Russell Senior Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann --- net/batman-adv/vis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index cd4c4231fa48..f69a3748f8ae 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -268,10 +268,10 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) buff_pos += sprintf(buff + buff_pos, "%pM,", entry->addr); - for (i = 0; i < packet->entries; i++) + for (j = 0; j < packet->entries; j++) buff_pos += vis_data_read_entry( buff + buff_pos, - &entries[i], + &entries[j], entry->addr, entry->primary); -- cgit v1.2.2 From 7cc2edb83447775a34ed3bf9d29d8295a434b523 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Jan 2011 13:41:03 -0800 Subject: xfrm6: Don't forget to propagate peer into ipsec route. Like ipv4, we have to propagate the ipv6 route peer into the ipsec top-level route during instantiation. Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 7e74023ea6e4..da87428681cc 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -98,6 +98,10 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, if (!xdst->u.rt6.rt6i_idev) return -ENODEV; + xdst->u.rt6.rt6i_peer = rt->rt6i_peer; + if (rt->rt6i_peer) + atomic_inc(&rt->rt6i_peer->refcnt); + /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | @@ -216,6 +220,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); + if (likely(xdst->u.rt6.rt6i_peer)) + inet_putpeer(xdst->u.rt6.rt6i_peer); xfrm_dst_destroy(xdst); } -- cgit v1.2.2 From 389f2a18c6a2a5531ac5a155c3ba25784065b1cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Jan 2011 00:04:18 +0000 Subject: econet: remove compiler warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/econet/af_econet.c: In function ‘econet_sendmsg’: net/econet/af_econet.c:494: warning: label ‘error’ defined but not used net/econet/af_econet.c:268: warning: unused variable ‘sk’ Signed-off-by: Eric Dumazet Acked-by: Phil Blundell Signed-off-by: David S. Miller --- net/econet/af_econet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 15dcc1a586b4..0c2826337919 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -265,13 +265,13 @@ static void ec_tx_done(struct sk_buff *skb, int result) static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock *sk = sock->sk; struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name; struct net_device *dev; struct ec_addr addr; int err; unsigned char port, cb; #if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE) + struct sock *sk = sock->sk; struct sk_buff *skb; struct ec_cb *eb; #endif @@ -488,10 +488,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, error_free_buf: vfree(userbuf); +error: #else err = -EPROTOTYPE; #endif - error: mutex_unlock(&econet_mutex); return err; -- cgit v1.2.2 From 6d3a9a685465986d7653c5abbc0b24681e7c44d7 Mon Sep 17 00:00:00 2001 From: Kurt Van Dijck Date: Wed, 26 Jan 2011 04:55:24 +0000 Subject: net: fix validate_link_af in rtnetlink core I'm testing an API that uses IFLA_AF_SPEC attribute. In the rtnetlink core , the set_link_af() member of the rtnl_af_ops struct receives the nested attribute (as I expected), but the validate_link_af() member receives the parent attribute. IMO, this patch fixes this. Signed-off-by: Kurt Van Dijck Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 750db57f3bb3..31459ef13ca2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1121,8 +1121,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EOPNOTSUPP; if (af_ops->validate_link_af) { - err = af_ops->validate_link_af(dev, - tb[IFLA_AF_SPEC]); + err = af_ops->validate_link_af(dev, af); if (err < 0) return err; } -- cgit v1.2.2 From c2aa3665cf8510b1665ee2f5a9525cf7be6dec4f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Jan 2011 23:18:38 +0000 Subject: net: add kmemcheck annotation in __alloc_skb() pskb_expand_head() triggers a kmemcheck warning when copy of skb_shared_info is done in pskb_expand_head() This is because destructor_arg field is not necessarily initialized at this point. Add kmemcheck_annotate_variable() call in __alloc_skb() to instruct kmemcheck this is a normal situation. Resolves bugzilla.kernel.org 27212 Reference: https://bugzilla.kernel.org/show_bug.cgi?id=27212 Reported-by: Christian Casteyde Signed-off-by: Eric Dumazet CC: Andrew Morton Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7cd1bc86d591..d883dcc78b6b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -210,6 +210,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); + kmemcheck_annotate_variable(shinfo->destructor_arg); if (fclone) { struct sk_buff *child = skb + 1; -- cgit v1.2.2 From 8f2771f2b85aea4d0f9a0137ad3b63d1173c0962 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 27 Jan 2011 14:55:22 -0800 Subject: ipv6: Remove route peer binding assertions. They are bogus. The basic idea is that I wanted to make sure that prefixed routes never bind to peers. The test I used was whether RTF_CACHE was set. But first of all, the RTF_CACHE flag is set at different spots depending upon which ip6_rt_copy() caller you're talking about. I've validated all of the code paths, and even in the future where we bind peers more aggressively (for route metric COW'ing) we never bind to prefix'd routes, only fully specified ones. This even applies when addrconf or icmp6 routes are allocated. Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1534508f6c68..28a85fc63cb8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -194,7 +194,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } if (peer) { - BUG_ON(!(rt->rt6i_flags & RTF_CACHE)); rt->rt6i_peer = NULL; inet_putpeer(peer); } @@ -204,9 +203,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create) { struct inet_peer *peer; - if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE))) - return; - peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) inet_putpeer(peer); -- cgit v1.2.2 From 66c46d741e2e60f0e8b625b80edb0ab820c46d7a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 29 Jan 2011 20:44:54 -0800 Subject: gro: Reset dev pointer on reuse On older kernels the VLAN code may zero skb->dev before dropping it and causing it to be reused by GRO. Unfortunately we didn't reset skb->dev in that case which causes the next GRO user to get a bogus skb->dev pointer. This particular problem no longer happens with the current upstream kernel due to changes in VLAN processing. However, for correctness we should still reset the skb->dev pointer in the GRO reuse function in case a future user does the same thing. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 24ea2d71e7ea..93e44dbf6793 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3424,6 +3424,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) __skb_pull(skb, skb_headlen(skb)); skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; + skb->dev = napi->dev; napi->skb = skb; } -- cgit v1.2.2 From 13ad17745c2cbd437d9e24b2d97393e0be11c439 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 29 Jan 2011 14:57:22 +0000 Subject: net: Fix ip link add netns oops Ed Swierk writes: > On 2.6.35.7 > ip link add link eth0 netns 9999 type macvlan > where 9999 is a nonexistent PID triggers an oops and causes all network functions to hang: > [10663.821898] BUG: unable to handle kernel NULL pointer dereference at 000000000000006d > [10663.821917] IP: [] __dev_alloc_name+0x9a/0x170 > [10663.821933] PGD 1d3927067 PUD 22f5c5067 PMD 0 > [10663.821944] Oops: 0000 [#1] SMP > [10663.821953] last sysfs file: /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq > [10663.821959] CPU 3 > [10663.821963] Modules linked in: macvlan ip6table_filter ip6_tables rfcomm ipt_MASQUERADE binfmt_misc iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack sco ipt_REJECT bnep l2cap xt_tcpudp iptable_filter ip_tables x_tables bridge stp vboxnetadp vboxnetflt vboxdrv kvm_intel kvm parport_pc ppdev snd_hda_codec_intelhdmi snd_hda_codec_conexant arc4 iwlagn iwlcore mac80211 snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_seq_midi snd_rawmidi i915 snd_seq_midi_event snd_seq thinkpad_acpi drm_kms_helper btusb tpm_tis nvram uvcvideo snd_timer snd_seq_device bluetooth videodev v4l1_compat v4l2_compat_ioctl32 tpm drm tpm_bios snd cfg80211 psmouse serio_raw intel_ips soundcore snd_page_alloc intel_agp i2c_algo_bit video output netconsole configfs lp parport usbhid hid e1000e sdhci_pci ahci libahci sdhci led_class > [10663.822155] > [10663.822161] Pid: 6000, comm: ip Not tainted 2.6.35-23-generic #41-Ubuntu 2901CTO/2901CTO > [10663.822167] RIP: 0010:[] [] __dev_alloc_name+0x9a/0x170 > [10663.822177] RSP: 0018:ffff88014aebf7b8 EFLAGS: 00010286 > [10663.822182] RAX: 00000000fffffff4 RBX: ffff8801ad900800 RCX: 0000000000000000 > [10663.822187] RDX: ffff880000000000 RSI: 0000000000000000 RDI: ffff88014ad63000 > [10663.822191] RBP: ffff88014aebf808 R08: 0000000000000041 R09: 0000000000000041 > [10663.822196] R10: 0000000000000000 R11: dead000000200200 R12: ffff88014aebf818 > [10663.822201] R13: fffffffffffffffd R14: ffff88014aebf918 R15: ffff88014ad62000 > [10663.822207] FS: 00007f00c487f700(0000) GS:ffff880001f80000(0000) knlGS:0000000000000000 > [10663.822212] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [10663.822216] CR2: 000000000000006d CR3: 0000000231f19000 CR4: 00000000000026e0 > [10663.822221] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [10663.822226] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > [10663.822231] Process ip (pid: 6000, threadinfo ffff88014aebe000, task ffff88014afb16e0) > [10663.822236] Stack: > [10663.822240] ffff88014aebf808 ffffffff814a2bb5 ffff88014aebf7e8 00000000a00ee8d6 > [10663.822251] <0> 0000000000000000 ffffffffa00ef940 ffff8801ad900800 ffff88014aebf818 > [10663.822265] <0> ffff88014aebf918 ffff8801ad900800 ffff88014aebf858 ffffffff8149c413 > [10663.822281] Call Trace: > [10663.822290] [] ? dev_addr_init+0x75/0xb0 > [10663.822298] [] dev_alloc_name+0x43/0x90 > [10663.822307] [] rtnl_create_link+0xbe/0x1b0 > [10663.822314] [] rtnl_newlink+0x48a/0x570 > [10663.822321] [] ? rtnl_newlink+0x1ac/0x570 > [10663.822332] [] ? native_x2apic_icr_read+0x4/0x20 > [10663.822339] [] rtnetlink_rcv_msg+0x177/0x290 > [10663.822346] [] ? rtnetlink_rcv_msg+0x0/0x290 > [10663.822354] [] netlink_rcv_skb+0xa9/0xd0 > [10663.822360] [] rtnetlink_rcv+0x25/0x40 > [10663.822367] [] netlink_unicast+0x2de/0x2f0 > [10663.822374] [] netlink_sendmsg+0x1fe/0x2e0 > [10663.822383] [] sock_sendmsg+0xf3/0x120 > [10663.822391] [] ? _raw_spin_lock+0xe/0x20 > [10663.822400] [] ? __d_lookup+0x136/0x150 > [10663.822406] [] ? _raw_spin_lock+0xe/0x20 > [10663.822414] [] ? _atomic_dec_and_lock+0x4d/0x80 > [10663.822422] [] ? mntput_no_expire+0x30/0x110 > [10663.822429] [] ? move_addr_to_kernel+0x65/0x70 > [10663.822435] [] ? verify_iovec+0x88/0xe0 > [10663.822442] [] sys_sendmsg+0x240/0x3a0 > [10663.822450] [] ? __do_fault+0x479/0x560 > [10663.822457] [] ? _raw_spin_lock+0xe/0x20 > [10663.822465] [] ? alloc_fd+0x10a/0x150 > [10663.822473] [] ? do_page_fault+0x15e/0x350 > [10663.822482] [] system_call_fastpath+0x16/0x1b > [10663.822487] Code: 90 48 8d 78 02 be 25 00 00 00 e8 92 1d e2 ff 48 85 c0 75 cf bf 20 00 00 00 e8 c3 b1 c6 ff 49 89 c7 b8 f4 ff ff ff 4d 85 ff 74 bd <4d> 8b 75 70 49 8d 45 70 48 89 45 b8 49 83 ee 58 eb 28 48 8d 55 > [10663.822618] RIP [] __dev_alloc_name+0x9a/0x170 > [10663.822627] RSP > [10663.822631] CR2: 000000000000006d > [10663.822636] ---[ end trace 3dfd6c3ad5327ca7 ]--- This bug was introduced in: commit 81adee47dfb608df3ad0b91d230fb3cef75f0060 Author: Eric W. Biederman Date: Sun Nov 8 00:53:51 2009 -0800 net: Support specifying the network namespace upon device creation. There is no good reason to not support userspace specifying the network namespace during device creation, and it makes it easier to create a network device and pass it to a child network namespace with a well known name. We have to be careful to ensure that the target network namespace for the new device exists through the life of the call. To keep that logic clear I have factored out the network namespace grabbing logic into rtnl_link_get_net. In addtion we need to continue to pass the source network namespace to the rtnl_link_ops.newlink method so that we can find the base device source network namespace. Signed-off-by: Eric W. Biederman Acked-by: Eric Dumazet Where apparently I forgot to add error handling to the path where we create a new network device in a new network namespace, and pass in an invalid pid. Cc: stable@kernel.org Reported-by: Ed Swierk Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 31459ef13ca2..2d65c6bb24c1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1671,6 +1671,9 @@ replay: snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); dest_net = rtnl_link_get_net(net, tb); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + dev = rtnl_create_link(net, dest_net, ifname, ops, tb); if (IS_ERR(dev)) -- cgit v1.2.2 From 709b46e8d90badda1898caea50483c12af178e96 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 29 Jan 2011 16:15:56 +0000 Subject: net: Add compat ioctl support for the ipv4 multicast ioctl SIOCGETSGCNT SIOCGETSGCNT is not a unique ioctl value as it it maps tio SIOCPROTOPRIVATE +1, which unfortunately means the existing infrastructure for compat networking ioctls is insufficient. A trivial compact ioctl implementation would conflict with: SIOCAX25ADDUID SIOCAIPXPRISLT SIOCGETSGCNT_IN6 SIOCGETSGCNT SIOCRSSCAUSE SIOCX25SSUBSCRIP SIOCX25SDTEFACILITIES To make this work I have updated the compat_ioctl decode path to mirror the the normal ioctl decode path. I have added an ipv4 inet_compat_ioctl function so that I can have ipv4 specific compat ioctls. I have added a compat_ioctl function into struct proto so I can break out ioctls by which kind of ip socket I am using. I have added a compat_raw_ioctl function because SIOCGETSGCNT only works on raw sockets. I have added a ipmr_compat_ioctl that mirrors the normal ipmr_ioctl. This was necessary because unfortunately the struct layout for the SIOCGETSGCNT has unsigned longs in it so changes between 32bit and 64bit kernels. This change was sufficient to run a 32bit ip multicast routing daemon on a 64bit kernel. Reported-by: Bill Fenner Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 16 ++++++++++++++++ net/ipv4/ipmr.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/raw.c | 19 +++++++++++++++++++ 3 files changed, 81 insertions(+) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2b61107df6c..45b89d7bda5a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -880,6 +880,19 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL(inet_ioctl); +#ifdef CONFIG_COMPAT +int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + int err = -ENOIOCTLCMD; + + if (sk->sk_prot->compat_ioctl) + err = sk->sk_prot->compat_ioctl(sk, cmd, arg); + + return err; +} +#endif + const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, @@ -903,6 +916,7 @@ const struct proto_ops inet_stream_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; EXPORT_SYMBOL(inet_stream_ops); @@ -929,6 +943,7 @@ const struct proto_ops inet_dgram_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; EXPORT_SYMBOL(inet_dgram_ops); @@ -959,6 +974,7 @@ static const struct proto_ops inet_sockraw_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3f3a9afd73e0..7e41ac0b9260 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -1434,6 +1435,51 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) } } +#ifdef CONFIG_COMPAT +struct compat_sioc_sg_req { + struct in_addr src; + struct in_addr grp; + compat_ulong_t pktcnt; + compat_ulong_t bytecnt; + compat_ulong_t wrong_if; +}; + +int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) +{ + struct sioc_sg_req sr; + struct mfc_cache *c; + struct net *net = sock_net(sk); + struct mr_table *mrt; + + mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); + if (mrt == NULL) + return -ENOENT; + + switch (cmd) { + case SIOCGETSGCNT: + if (copy_from_user(&sr, arg, sizeof(sr))) + return -EFAULT; + + rcu_read_lock(); + c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); + if (c) { + sr.pktcnt = c->mfc_un.res.pkt; + sr.bytecnt = c->mfc_un.res.bytes; + sr.wrong_if = c->mfc_un.res.wrong_if; + rcu_read_unlock(); + + if (copy_to_user(arg, &sr, sizeof(sr))) + return -EFAULT; + return 0; + } + rcu_read_unlock(); + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} +#endif + static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a3d5ab786e81..6390ba299b3d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -76,6 +76,7 @@ #include #include #include +#include static struct raw_hashinfo raw_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), @@ -838,6 +839,23 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) } } +#ifdef CONFIG_COMPAT +static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + case SIOCINQ: + return -ENOIOCTLCMD; + default: +#ifdef CONFIG_IP_MROUTE + return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg)); +#else + return -ENOIOCTLCMD; +#endif + } +} +#endif + struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, @@ -860,6 +878,7 @@ struct proto raw_prot = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_raw_setsockopt, .compat_getsockopt = compat_raw_getsockopt, + .compat_ioctl = compat_raw_ioctl, #endif }; -- cgit v1.2.2 From 2674c15870f888cb732a564fc504ce17654afc64 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 28 Jan 2011 18:34:05 +0100 Subject: batman-adv: Remove vis info on hashing errors A newly created vis info object must be removed when it couldn't be added to the hash. The old_info which has to be replaced was already removed and isn't related to the hash anymore. Signed-off-by: Sven Eckelmann --- net/batman-adv/vis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index f69a3748f8ae..0be55beba054 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -444,7 +444,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, info); if (hash_added < 0) { /* did not work (for some reason) */ - kref_put(&old_info->refcount, free_info); + kref_put(&info->refcount, free_info); info = NULL; } -- cgit v1.2.2 From dda9fc6b2c59f056e7a2b313b8423b14a4df25a9 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 28 Jan 2011 18:34:06 +0100 Subject: batman-adv: Remove vis info element in free_info The free_info function will be called when no reference to the info object exists anymore. It must be ensured that the allocated memory gets freed and not only the elements which are managed by the info object. Signed-off-by: Sven Eckelmann --- net/batman-adv/vis.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 0be55beba054..988296cdf7c5 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -64,6 +64,7 @@ static void free_info(struct kref *ref) spin_unlock_bh(&bat_priv->vis_list_lock); kfree_skb(info->skb_packet); + kfree(info); } /* Compare two vis packets, used by the hashing algorithm */ -- cgit v1.2.2 From 1181e1daace88018b2ff66592aa10a4791d705ff Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 28 Jan 2011 18:34:07 +0100 Subject: batman-adv: Make vis info stack traversal threadsafe The batman-adv vis server has to a stack which stores all information about packets which should be send later. This stack is protected with a spinlock that is used to prevent concurrent write access to it. The send_vis_packets function has to take all elements from the stack and send them to other hosts over the primary interface. The send will be initiated without the lock which protects the stack. The implementation using list_for_each_entry_safe has the problem that it stores the next element as "safe ptr" to allow the deletion of the current element in the list. The list may be modified during the unlock/lock pair in the loop body which may make the safe pointer not pointing to correct next element. It is safer to remove and use the first element from the stack until no elements are available. This does not need reduntant information which would have to be validated each time the lock was removed. Reported-by: Russell Senior Signed-off-by: Sven Eckelmann --- net/batman-adv/vis.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 988296cdf7c5..de1022cacaf7 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -816,7 +816,7 @@ static void send_vis_packets(struct work_struct *work) container_of(work, struct delayed_work, work); struct bat_priv *bat_priv = container_of(delayed_work, struct bat_priv, vis_work); - struct vis_info *info, *temp; + struct vis_info *info; spin_lock_bh(&bat_priv->vis_hash_lock); purge_vis_packets(bat_priv); @@ -826,8 +826,9 @@ static void send_vis_packets(struct work_struct *work) send_list_add(bat_priv, bat_priv->my_vis_info); } - list_for_each_entry_safe(info, temp, &bat_priv->vis_send_list, - send_list) { + while (!list_empty(&bat_priv->vis_send_list)) { + info = list_first_entry(&bat_priv->vis_send_list, + typeof(*info), send_list); kref_get(&info->refcount); spin_unlock_bh(&bat_priv->vis_hash_lock); -- cgit v1.2.2 From ec831ea72ee5d7d473899e27a86bd659482c4d0d Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 31 Jan 2011 13:16:00 -0800 Subject: net: Add default_mtu() methods to blackhole dst_ops When an IPSEC SA is still being set up, __xfrm_lookup() will return -EREMOTE and so ip_route_output_flow() will return a blackhole route. This can happen in a sndmsg call, and after d33e455337ea ("net: Abstract default MTU metric calculation behind an accessor.") this leads to a crash in ip_append_data() because the blackhole dst_ops have no default_mtu() method and so dst_mtu() calls a NULL pointer. Fix this by adding default_mtu() methods (that simply return 0, matching the old behavior) to the blackhole dst_ops. The IPv4 part of this patch fixes a crash that I saw when using an IPSEC VPN; the IPv6 part is untested because I don't have an IPv6 VPN, but it looks to be needed as well. Signed-off-by: Roland Dreier Signed-off-by: David S. Miller --- net/ipv4/route.c | 6 ++++++ net/ipv6/route.c | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 351dc4e85242..788a3e74834e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2707,6 +2707,11 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo return NULL; } +static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) +{ + return 0; +} + static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -2716,6 +2721,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, .check = ipv4_blackhole_dst_check, + .default_mtu = ipv4_blackhole_default_mtu, .update_pmtu = ipv4_rt_blackhole_update_pmtu, }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 28a85fc63cb8..1c29f95695de 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -113,6 +113,11 @@ static struct dst_ops ip6_dst_ops_template = { .local_out = __ip6_local_out, }; +static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) +{ + return 0; +} + static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -122,6 +127,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, + .default_mtu = ip6_blackhole_default_mtu, .update_pmtu = ip6_rt_blackhole_update_pmtu, }; -- cgit v1.2.2 From 8587523640441a9ff2564ebc6efeb39497ad6709 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 31 Jan 2011 16:23:42 -0800 Subject: net: Check rps_flow_table when RPS map length is 1 In get_rps_cpu, add check that the rps_flow_table for the device is NULL when trying to take fast path when RPS map length is one. Without this, RFS is effectively disabled if map length is one which is not correct. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 93e44dbf6793..4c907895876b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2563,7 +2563,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, map = rcu_dereference(rxqueue->rps_map); if (map) { - if (map->len == 1) { + if (map->len == 1 && + !rcu_dereference_raw(rxqueue->rps_flow_table)) { tcpu = map->cpus[0]; if (cpu_online(tcpu)) cpu = tcpu; -- cgit v1.2.2 From bf36076a67db6d7423d09d861a072337866f0dd9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 31 Jan 2011 20:54:17 -0800 Subject: net: Fix ipv6 neighbour unregister_sysctl_table warning In my testing of 2.6.37 I was occassionally getting a warning about sysctl table entries being unregistered in the wrong order. Digging in it turns out this dates back to the last great sysctl reorg done where Al Viro introduced the requirement that sysctl directories needed to be created before and destroyed after the files in them. It turns out that in that great reorg /proc/sys/net/ipv6/neigh was overlooked. So this patch fixes that oversight and makes an annoying warning message go away. >------------[ cut here ]------------ >WARNING: at kernel/sysctl.c:1992 unregister_sysctl_table+0x134/0x164() >Pid: 23951, comm: kworker/u:3 Not tainted 2.6.37-350888.2010AroraKernelBeta.fc14.x86_64 #1 >Call Trace: > [] warn_slowpath_common+0x80/0x98 > [] warn_slowpath_null+0x15/0x17 > [] unregister_sysctl_table+0x134/0x164 > [] ? kfree+0xc4/0xd1 > [] neigh_sysctl_unregister+0x22/0x3a > [] addrconf_ifdown+0x33f/0x37b [ipv6] > [] ? skb_dequeue+0x5f/0x6b > [] addrconf_notify+0x69b/0x75c [ipv6] > [] ? ip6mr_device_event+0x98/0xa9 [ipv6] > [] notifier_call_chain+0x32/0x5e > [] raw_notifier_call_chain+0xf/0x11 > [] call_netdevice_notifiers+0x45/0x4a > [] rollback_registered_many+0x118/0x201 > [] unregister_netdevice_many+0x16/0x6d > [] default_device_exit_batch+0xa4/0xb8 > [] ? cleanup_net+0x0/0x194 > [] ops_exit_list+0x4e/0x56 > [] cleanup_net+0xf4/0x194 > [] process_one_work+0x187/0x280 > [] worker_thread+0xff/0x19f > [] ? worker_thread+0x0/0x19f > [] kthread+0x7d/0x85 > [] kernel_thread_helper+0x4/0x10 > [] ? kthread+0x0/0x85 > [] ? kernel_thread_helper+0x0/0x10 >---[ end trace 8a7e9310b35e9486 ]--- Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv6/sysctl_net_ipv6.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index fa1d8f4e0051..7cb65ef79f9c 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -15,6 +15,8 @@ #include #include +static struct ctl_table empty[1]; + static ctl_table ipv6_table_template[] = { { .procname = "route", @@ -35,6 +37,12 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "neigh", + .maxlen = 0, + .mode = 0555, + .child = empty, + }, { } }; @@ -152,7 +160,6 @@ static struct ctl_table_header *ip6_base; int ipv6_static_sysctl_register(void) { - static struct ctl_table empty[1]; ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty); if (ip6_base == NULL) return -ENOMEM; -- cgit v1.2.2 From 9d0db8b6b1da9e3d4c696ef29449700c58d589db Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 1 Feb 2011 16:03:46 +0100 Subject: netfilter: arpt_mangle: fix return values of checkentry In 135367b "netfilter: xtables: change xt_target.checkentry return type", the type returned by checkentry was changed from boolean to int, but the return values where not adjusted. arptables: Input/output error This broke arptables with the mangle target since it returns true under success, which is interpreted by xtables as >0, thus returning EIO. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arpt_mangle.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index b8ddcc480ed9..a5e52a9f0a12 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -60,12 +60,12 @@ static int checkentry(const struct xt_tgchk_param *par) if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) - return false; + return -EINVAL; if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && mangle->target != XT_CONTINUE) - return false; - return true; + return -EINVAL; + return 0; } static struct xt_target arpt_mangle_reg __read_mostly = { -- cgit v1.2.2 From 3db7e93d3308fb882884b9f024235d6fbf542034 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 1 Feb 2011 16:06:30 +0100 Subject: netfilter: ecache: always set events bits, filter them later For the following rule: iptables -I PREROUTING -t raw -j CT --ctevents assured The event delivered looks like the following: [UPDATE] tcp 6 src=192.168.0.2 dst=192.168.1.2 sport=37041 dport=80 src=192.168.1.2 dst=192.168.1.100 sport=80 dport=37041 [ASSURED] Note that the TCP protocol state is not included. For that reason the CT event filtering is not very useful for conntrackd. To resolve this issue, instead of conditionally setting the CT events bits based on the ctmask, we always set them and perform the filtering in the late stage, just before the delivery. Thus, the event delivered looks like the following: [UPDATE] tcp 6 432000 ESTABLISHED src=192.168.0.2 dst=192.168.1.2 sport=37041 dport=80 src=192.168.1.2 dst=192.168.1.100 sport=80 dport=37041 [ASSURED] Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_ecache.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 5702de35e2bb..63a1b915a7e4 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -63,6 +63,9 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) * this does not harm and it happens very rarely. */ unsigned long missed = e->missed; + if (!((events | missed) & e->ctmask)) + goto out_unlock; + ret = notify->fcn(events | missed, &item); if (unlikely(ret < 0 || missed)) { spin_lock_bh(&ct->lock); -- cgit v1.2.2 From 4334ec8518cec3f7a4feeb3dacb46acfb24904d4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 2 Feb 2011 16:58:06 +0100 Subject: mac80211: fix TX status cookie in HW offload case When the off-channel TX is done with remain-on-channel offloaded to hardware, the reported cookie is wrong as in that case we shouldn't use the SKB as the cookie but need to instead use the corresponding r-o-c cookie (XOR'ed with 2 to prevent API mismatches). Fix this by keeping track of the hw_roc_skb pointer just for the status processing and use the correct cookie to report in this case. We can't use the hw_roc_skb pointer itself because it is NULL'ed when the frame is transmitted to prevent it being used twice. This fixes a bug where the P2P state machine in the supplicant gets stuck because it never gets a correct result for its transmitted frame. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 2 ++ net/mac80211/ieee80211_i.h | 2 +- net/mac80211/status.c | 7 ++++++- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4bc8a9250cfd..9cd73b11506e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1822,6 +1822,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, *cookie ^= 2; IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN; local->hw_roc_skb = skb; + local->hw_roc_skb_for_status = skb; mutex_unlock(&local->mtx); return 0; @@ -1875,6 +1876,7 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, if (ret == 0) { kfree_skb(local->hw_roc_skb); local->hw_roc_skb = NULL; + local->hw_roc_skb_for_status = NULL; } mutex_unlock(&local->mtx); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c47d7c0e48a4..533fd32f49ff 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -953,7 +953,7 @@ struct ieee80211_local { struct ieee80211_channel *hw_roc_channel; struct net_device *hw_roc_dev; - struct sk_buff *hw_roc_skb; + struct sk_buff *hw_roc_skb, *hw_roc_skb_for_status; struct work_struct hw_roc_start, hw_roc_done; enum nl80211_channel_type hw_roc_channel_type; unsigned int hw_roc_duration; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 38a797217a91..071ac95c4aa0 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -323,6 +323,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { struct ieee80211_work *wk; + u64 cookie = (unsigned long)skb; rcu_read_lock(); list_for_each_entry_rcu(wk, &local->work_list, list) { @@ -334,8 +335,12 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) break; } rcu_read_unlock(); + if (local->hw_roc_skb_for_status == skb) { + cookie = local->hw_roc_cookie ^ 2; + local->hw_roc_skb_for_status = NULL; + } cfg80211_mgmt_tx_status( - skb->dev, (unsigned long) skb, skb->data, skb->len, + skb->dev, cookie, skb->data, skb->len, !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); } -- cgit v1.2.2 From 6d152e23ad1a7a5b40fef1f42e017d66e6115159 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Wed, 2 Feb 2011 14:53:25 -0800 Subject: gro: reset skb_iif on reuse Like Herbert's change from a few days ago: 66c46d741e2e60f0e8b625b80edb0ab820c46d7a gro: Reset dev pointer on reuse this may not be necessary at this point, but we should still clean up the skb->skb_iif. If not we may end up with an invalid valid for skb->skb_iif when the skb is reused and the check is done in __netif_receive_skb. Signed-off-by: Andy Gospodarek Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 4c907895876b..b6d0bf875a8e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3426,6 +3426,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; skb->dev = napi->dev; + skb->skb_iif = 0; napi->skb = skb; } -- cgit v1.2.2 From 0033d5ad27a6db33a55ff39951d3ec61a8c13b89 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 3 Feb 2011 17:21:31 -0800 Subject: net: Fix bug in compat SIOCGETSGCNT handling. Commit 709b46e8d90badda1898caea50483c12af178e96 ("net: Add compat ioctl support for the ipv4 multicast ioctl SIOCGETSGCNT") added the correct plumbing to handle SIOCGETSGCNT properly. However, whilst definiting a proper "struct compat_sioc_sg_req" it isn't actually used in ipmr_compat_ioctl(). Correct this oversight. Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7e41ac0b9260..fce10a080bd6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1446,7 +1446,7 @@ struct compat_sioc_sg_req { int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) { - struct sioc_sg_req sr; + struct compat_sioc_sg_req sr; struct mfc_cache *c; struct net *net = sock_net(sk); struct mr_table *mrt; -- cgit v1.2.2 From ca6b8bb097c8e0ab6bce4fa04584074dee17c0d9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 3 Feb 2011 17:24:28 -0800 Subject: net: Support compat SIOCGETVIFCNT ioctl in ipv4. Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index fce10a080bd6..8b65a12654e7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1444,9 +1444,19 @@ struct compat_sioc_sg_req { compat_ulong_t wrong_if; }; +struct compat_sioc_vif_req { + vifi_t vifi; /* Which iface */ + compat_ulong_t icount; + compat_ulong_t ocount; + compat_ulong_t ibytes; + compat_ulong_t obytes; +}; + int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) { struct compat_sioc_sg_req sr; + struct compat_sioc_vif_req vr; + struct vif_device *vif; struct mfc_cache *c; struct net *net = sock_net(sk); struct mr_table *mrt; @@ -1456,6 +1466,26 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) return -ENOENT; switch (cmd) { + case SIOCGETVIFCNT: + if (copy_from_user(&vr, arg, sizeof(vr))) + return -EFAULT; + if (vr.vifi >= mrt->maxvif) + return -EINVAL; + read_lock(&mrt_lock); + vif = &mrt->vif_table[vr.vifi]; + if (VIF_EXISTS(mrt, vr.vifi)) { + vr.icount = vif->pkt_in; + vr.ocount = vif->pkt_out; + vr.ibytes = vif->bytes_in; + vr.obytes = vif->bytes_out; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &vr, sizeof(vr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; case SIOCGETSGCNT: if (copy_from_user(&sr, arg, sizeof(sr))) return -EFAULT; -- cgit v1.2.2 From e2d57766e6744f2956975dd2086d82957187b0f6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 3 Feb 2011 17:59:32 -0800 Subject: net: Provide compat support for SIOCGETMIFCNT_IN6 and SIOCGETSGCNT_IN6. Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/raw.c | 19 ++++++++++++++ 2 files changed, 94 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9fab274019c0..0e1d53bcf1e0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1804,6 +1805,80 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) } } +#ifdef CONFIG_COMPAT +struct compat_sioc_sg_req6 { + struct sockaddr_in6 src; + struct sockaddr_in6 grp; + compat_ulong_t pktcnt; + compat_ulong_t bytecnt; + compat_ulong_t wrong_if; +}; + +struct compat_sioc_mif_req6 { + mifi_t mifi; + compat_ulong_t icount; + compat_ulong_t ocount; + compat_ulong_t ibytes; + compat_ulong_t obytes; +}; + +int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) +{ + struct compat_sioc_sg_req6 sr; + struct compat_sioc_mif_req6 vr; + struct mif_device *vif; + struct mfc6_cache *c; + struct net *net = sock_net(sk); + struct mr6_table *mrt; + + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); + if (mrt == NULL) + return -ENOENT; + + switch (cmd) { + case SIOCGETMIFCNT_IN6: + if (copy_from_user(&vr, arg, sizeof(vr))) + return -EFAULT; + if (vr.mifi >= mrt->maxvif) + return -EINVAL; + read_lock(&mrt_lock); + vif = &mrt->vif6_table[vr.mifi]; + if (MIF_EXISTS(mrt, vr.mifi)) { + vr.icount = vif->pkt_in; + vr.ocount = vif->pkt_out; + vr.ibytes = vif->bytes_in; + vr.obytes = vif->bytes_out; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &vr, sizeof(vr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + case SIOCGETSGCNT_IN6: + if (copy_from_user(&sr, arg, sizeof(sr))) + return -EFAULT; + + read_lock(&mrt_lock); + c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); + if (c) { + sr.pktcnt = c->mfc_un.res.pkt; + sr.bytecnt = c->mfc_un.res.bytes; + sr.wrong_if = c->mfc_un.res.wrong_if; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &sr, sizeof(sr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} +#endif static inline int ip6mr_forward2_finish(struct sk_buff *skb) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 86c39526ba5e..c5b0915d106b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -1157,6 +1158,23 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) } } +#ifdef CONFIG_COMPAT +static int compat_rawv6_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + case SIOCINQ: + return -ENOIOCTLCMD; + default: +#ifdef CONFIG_IPV6_MROUTE + return ip6mr_compat_ioctl(sk, cmd, compat_ptr(arg)); +#else + return -ENOIOCTLCMD; +#endif + } +} +#endif + static void rawv6_close(struct sock *sk, long timeout) { if (inet_sk(sk)->inet_num == IPPROTO_RAW) @@ -1215,6 +1233,7 @@ struct proto rawv6_prot = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_rawv6_setsockopt, .compat_getsockopt = compat_rawv6_getsockopt, + .compat_ioctl = compat_rawv6_ioctl, #endif }; -- cgit v1.2.2 From 1158f762e57c1cdcda631c1c5f339e4853caa82b Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 4 Feb 2011 13:02:36 -0800 Subject: bridge: Don't put partly initialized fdb into hash The fdb_create() puts a new fdb into hash with only addr set. This is not good, since there are callers, that search the hash w/o the lock and access all the other its fields. Applies to current netdev tree. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 2872393b2939..88485cc74dc3 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -328,12 +328,12 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); if (fdb) { memcpy(fdb->addr.addr, addr, ETH_ALEN); - hlist_add_head_rcu(&fdb->hlist, head); - fdb->dst = source; fdb->is_local = is_local; fdb->is_static = is_local; fdb->ageing_timer = jiffies; + + hlist_add_head_rcu(&fdb->hlist, head); } return fdb; } -- cgit v1.2.2 From fc7c976dc7a565213393ce700d4a6105f037bf20 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 7 Feb 2011 12:05:00 +0100 Subject: mac80211: fix the skb cloned check in the tx path Using skb_header_cloned to check if it's safe to write to the skb is not enough - mac80211 also touches the tailroom of the skb. Initially this check was only used to increase a counter, however this commit changed the code to also skip skb data reallocation if no extra head/tailroom was needed: commit 4cd06a344db752f513437138953af191cbe9a691 mac80211: skip unnecessary pskb_expand_head calls It added a regression at least with iwl3945, which is fixed by this patch. Reported-by: Dmitry Torokhov Signed-off-by: Felix Fietkau Tested-by: Dmitry Torokhov Signed-off-by: John W. Linville --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b64b42bc774b..b0beaa58246b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1547,7 +1547,7 @@ static int ieee80211_skb_resize(struct ieee80211_local *local, skb_orphan(skb); } - if (skb_header_cloned(skb)) + if (skb_cloned(skb)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); -- cgit v1.2.2 From 95c3043008ca8449feb96aba5481fe31c2ea750b Mon Sep 17 00:00:00 2001 From: andrew hendry Date: Mon, 7 Feb 2011 00:08:15 +0000 Subject: x25: possible skb leak on bad facilities Originally x25_parse_facilities returned -1 for an error 0 meaning 0 length facilities >0 the length of the facilities parsed. 5ef41308f94dc ("x25: Prevent crashing when parsing bad X.25 facilities") introduced more error checking in x25_parse_facilities however used 0 to indicate bad parsing a6331d6f9a429 ("memory corruption in X.25 facilities parsing") followed this further for DTE facilities, again using 0 for bad parsing. The meaning of 0 got confused in the callers. If the facilities are messed up we can't determine where the data starts. So patch makes all parsing errors return -1 and ensures callers close and don't use the skb further. Reported-by: Andy Whitcroft Signed-off-by: Andrew Hendry Signed-off-by: David S. Miller --- net/x25/x25_facilities.c | 28 +++++++++++++++++++--------- net/x25/x25_in.c | 14 +++++++++++--- 2 files changed, 30 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 55187c8f6420..406207515b5e 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -27,9 +27,19 @@ #include #include -/* - * Parse a set of facilities into the facilities structures. Unrecognised - * facilities are written to the debug log file. +/** + * x25_parse_facilities - Parse facilities from skb into the facilities structs + * + * @skb: sk_buff to parse + * @facilities: Regular facilites, updated as facilities are found + * @dte_facs: ITU DTE facilities, updated as DTE facilities are found + * @vc_fac_mask: mask is updated with all facilities found + * + * Return codes: + * -1 - Parsing error, caller should drop call and clean up + * 0 - Parse OK, this skb has no facilities + * >0 - Parse OK, returns the length of the facilities header + * */ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, struct x25_dte_facilities *dte_facs, unsigned long *vc_fac_mask) @@ -62,7 +72,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, switch (*p & X25_FAC_CLASS_MASK) { case X25_FAC_CLASS_A: if (len < 2) - return 0; + return -1; switch (*p) { case X25_FAC_REVERSE: if((p[1] & 0x81) == 0x81) { @@ -107,7 +117,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, break; case X25_FAC_CLASS_B: if (len < 3) - return 0; + return -1; switch (*p) { case X25_FAC_PACKET_SIZE: facilities->pacsize_in = p[1]; @@ -130,7 +140,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, break; case X25_FAC_CLASS_C: if (len < 4) - return 0; + return -1; printk(KERN_DEBUG "X.25: unknown facility %02X, " "values %02X, %02X, %02X\n", p[0], p[1], p[2], p[3]); @@ -139,18 +149,18 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, break; case X25_FAC_CLASS_D: if (len < p[1] + 2) - return 0; + return -1; switch (*p) { case X25_FAC_CALLING_AE: if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) - return 0; + return -1; dte_facs->calling_len = p[2]; memcpy(dte_facs->calling_ae, &p[3], p[1] - 1); *vc_fac_mask |= X25_MASK_CALLING_AE; break; case X25_FAC_CALLED_AE: if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) - return 0; + return -1; dte_facs->called_len = p[2]; memcpy(dte_facs->called_ae, &p[3], p[1] - 1); *vc_fac_mask |= X25_MASK_CALLED_AE; diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index f729f022be69..15de65f04719 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -91,10 +91,10 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp { struct x25_address source_addr, dest_addr; int len; + struct x25_sock *x25 = x25_sk(sk); switch (frametype) { case X25_CALL_ACCEPTED: { - struct x25_sock *x25 = x25_sk(sk); x25_stop_timer(sk); x25->condition = 0x00; @@ -113,14 +113,16 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp &dest_addr); if (len > 0) skb_pull(skb, len); + else if (len < 0) + goto out_clear; len = x25_parse_facilities(skb, &x25->facilities, &x25->dte_facilities, &x25->vc_facil_mask); if (len > 0) skb_pull(skb, len); - else - return -1; + else if (len < 0) + goto out_clear; /* * Copy any Call User Data. */ @@ -144,6 +146,12 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp } return 0; + +out_clear: + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25->state = X25_STATE_2; + x25_start_t23timer(sk); + return 0; } /* -- cgit v1.2.2 From 531c9da8c854c5b075383253a57fdd4e0be82e99 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 6 Feb 2011 23:26:43 +0000 Subject: batman-adv: Linearize fragment packets before merge We access the data inside the skbs of two fragments directly using memmove during the merge. The data of the skb could span over multiple skb pages. An direct access without knowledge about the pages would lead to an invalid memory access. Signed-off-by: Sven Eckelmann [lindner_marek@yahoo.de: Move return from function to the end] Signed-off-by: Marek Lindner --- net/batman-adv/unicast.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index ee41fef04b21..d1a611322549 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -50,12 +50,12 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, skb = tfp->skb; } + if (skb_linearize(skb) < 0 || skb_linearize(tmp_skb) < 0) + goto err; + skb_pull(tmp_skb, sizeof(struct unicast_frag_packet)); - if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) { - /* free buffered skb, skb will be freed later */ - kfree_skb(tfp->skb); - return NULL; - } + if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) + goto err; /* move free entry to end */ tfp->skb = NULL; @@ -70,6 +70,11 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, unicast_packet->packet_type = BAT_UNICAST; return skb; + +err: + /* free buffered skb, skb will be freed later */ + kfree_skb(tfp->skb); + return NULL; } static void frag_create_entry(struct list_head *head, struct sk_buff *skb) -- cgit v1.2.2 From b2df5a8446e135f7648736b8bec8179c88ce360d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Feb 2011 14:31:31 -0800 Subject: net/caif: Fix dangling list pointer in freed object on error. rtnl_link_ops->setup(), and the "setup" callback passed to alloc_netdev*(), cannot make state changes which need to be undone on failure. There is no cleanup mechanism available at this point. So we have to add the caif private instance to the global list once we are sure that register_netdev() has succedded in ->newlink(). Otherwise, if register_netdev() fails, the caller will invoke free_netdev() and we will have a reference to freed up memory on the chnl_net_list. Signed-off-by: David S. Miller --- net/caif/chnl_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index fa9dab372b68..6008d6dc18a0 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -394,9 +394,7 @@ static void ipcaif_net_setup(struct net_device *dev) priv->conn_req.sockaddr.u.dgm.connection_id = -1; priv->flowenabled = false; - ASSERT_RTNL(); init_waitqueue_head(&priv->netmgmt_wq); - list_add(&priv->list_field, &chnl_net_list); } @@ -453,6 +451,8 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev, ret = register_netdevice(dev); if (ret) pr_warn("device rtml registration failed\n"); + else + list_add(&caifdev->list_field, &chnl_net_list); return ret; } -- cgit v1.2.2 From 8d3bdbd55a7e2a3f2c148a4830aa26dd682b21c4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Feb 2011 15:02:50 -0800 Subject: net: Fix lockdep regression caused by initializing netdev queues too early. In commit aa9421041128abb4d269ee1dc502ff65fb3b7d69 ("net: init ingress queue") we moved the allocation and lock initialization of the queues into alloc_netdev_mq() since register_netdevice() is way too late. The problem is that dev->type is not setup until the setup() callback is invoked by alloc_netdev_mq(), and the dev->type is what determines the lockdep class to use for the locks in the queues. Fix this by doing the queue allocation after the setup() callback runs. This is safe because the setup() callback is not allowed to make any state changes that need to be undone on error (memory allocations, etc.). It may, however, make state changes that are undone by free_netdev() (such as netif_napi_add(), which is done by the ipoib driver's setup routine). The previous code also leaked a reference to the &init_net namespace object on RX/TX queue allocation failures. Signed-off-by: David S. Miller --- net/core/dev.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b6d0bf875a8e..8e726cb47ed7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5660,30 +5660,35 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); + dev->gso_max_size = GSO_MAX_SIZE; + + INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); + dev->ethtool_ntuple_list.count = 0; + INIT_LIST_HEAD(&dev->napi_list); + INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->link_watch_list); + dev->priv_flags = IFF_XMIT_DST_RELEASE; + setup(dev); + dev->num_tx_queues = txqs; dev->real_num_tx_queues = txqs; if (netif_alloc_netdev_queues(dev)) - goto free_pcpu; + goto free_all; #ifdef CONFIG_RPS dev->num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) - goto free_pcpu; + goto free_all; #endif - dev->gso_max_size = GSO_MAX_SIZE; - - INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); - dev->ethtool_ntuple_list.count = 0; - INIT_LIST_HEAD(&dev->napi_list); - INIT_LIST_HEAD(&dev->unreg_list); - INIT_LIST_HEAD(&dev->link_watch_list); - dev->priv_flags = IFF_XMIT_DST_RELEASE; - setup(dev); strcpy(dev->name, name); return dev; +free_all: + free_netdev(dev); + return NULL; + free_pcpu: free_percpu(dev->pcpu_refcnt); kfree(dev->_tx); -- cgit v1.2.2 From c317428644c0af137d80069ab178cd797da3be45 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 9 Feb 2011 08:08:20 +0100 Subject: netfilter: nf_conntrack: set conntrack templates again if we return NF_REPEAT The TCP tracking code has a special case that allows to return NF_REPEAT if we receive a new SYN packet while in TIME_WAIT state. In this situation, the TCP tracking code destroys the existing conntrack to start a new clean session. [DESTROY] tcp 6 src=192.168.0.2 dst=192.168.1.2 sport=38925 dport=8000 src=192.168.1.2 dst=192.168.1.100 sport=8000 dport=38925 [ASSURED] [NEW] tcp 6 120 SYN_SENT src=192.168.0.2 dst=192.168.1.2 sport=38925 dport=8000 [UNREPLIED] src=192.168.1.2 dst=192.168.1.100 sport=8000 dport=38925 However, this is a problem for the iptables' CT target event filtering which will not work in this case since the conntrack template will not be there for the new session. To fix this, we reassign the conntrack template to the packet if we return NF_REPEAT. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e61511929c66..84f4fcc5884b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -942,8 +942,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); out: - if (tmpl) - nf_ct_put(tmpl); + if (tmpl) { + /* Special case: we have to repeat this hook, assign the + * template again to this packet. We assume that this packet + * has no conntrack assigned. This is used by nf_ct_tcp. */ + if (ret == NF_REPEAT) + skb->nfct = (struct nf_conntrack *)tmpl; + else + nf_ct_put(tmpl); + } return ret; } -- cgit v1.2.2 From 96642d42f076101ba98866363d908cab706d156c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 9 Feb 2011 21:48:36 -0800 Subject: x25: Do not reference freed memory. In x25_link_free(), we destroy 'nb' before dereferencing 'nb->dev'. Don't do this, because 'nb' might be freed by then. Reported-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: David S. Miller --- net/x25/x25_link.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 4cbc942f762a..21306928d47f 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -396,9 +396,12 @@ void __exit x25_link_free(void) write_lock_bh(&x25_neigh_list_lock); list_for_each_safe(entry, tmp, &x25_neigh_list) { + struct net_device *dev; + nb = list_entry(entry, struct x25_neigh, node); + dev = nb->dev; __x25_remove_neigh(nb); - dev_put(nb->dev); + dev_put(dev); } write_unlock_bh(&x25_neigh_list_lock); } -- cgit v1.2.2