From dc7a08166f3a5f23e79e839a8a88849bd3397c32 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 27 Oct 2009 14:41:35 -0400 Subject: nfs: new subdir Documentation/filesystems/nfs We're adding enough nfs documentation that it may as well have its own subdirectory. Acked-by: Randy Dunlap Signed-off-by: J. Bruce Fields --- net/ipv4/Kconfig | 6 +++--- net/ipv4/ipconfig.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 70491d9035eb..0c94a1ac2946 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -166,7 +166,7 @@ config IP_PNP_DHCP If unsure, say Y. Note that if you want to use DHCP, a DHCP server must be operating on your network. Read - for details. + for details. config IP_PNP_BOOTP bool "IP: BOOTP support" @@ -181,7 +181,7 @@ config IP_PNP_BOOTP does BOOTP itself, providing all necessary information on the kernel command line, you can say N here. If unsure, say Y. Note that if you want to use BOOTP, a BOOTP server must be operating on your network. - Read for details. + Read for details. config IP_PNP_RARP bool "IP: RARP support" @@ -194,7 +194,7 @@ config IP_PNP_RARP older protocol which is being obsoleted by BOOTP and DHCP), say Y here. Note that if you want to use RARP, a RARP server must be operating on your network. Read - for details. + for details. # not yet ready.. # bool ' IP: ARP support' CONFIG_IP_PNP_ARP diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f8d04c256454..7dcbf4706099 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1447,7 +1447,7 @@ late_initcall(ip_auto_config); /* * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel - * command line parameter. See Documentation/filesystems/nfsroot.txt. + * command line parameter. See Documentation/filesystems/nfs/nfsroot.txt. */ static int __init ic_proto_name(char *name) { -- cgit v1.2.2 From dc83d6e27fa80babe31c80aa8568f125f72edf57 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 20 Oct 2009 18:51:34 -0400 Subject: nfsd4: don't try to map gid's in generic rpc code For nfsd we provide users the option of mapping uid's to server-side supplementary group lists. That makes sense for nfsd, but not necessarily for other rpc users (such as the callback client). So move that lookup to svcauth_unix_set_client, which is a program-specific method. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 53 +++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 117f68a8aa40..97cc3de7432e 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -655,23 +655,25 @@ static struct unix_gid *unix_gid_lookup(uid_t uid) return NULL; } -static int unix_gid_find(uid_t uid, struct group_info **gip, - struct svc_rqst *rqstp) +static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) { - struct unix_gid *ug = unix_gid_lookup(uid); + struct unix_gid *ug; + struct group_info *gi; + int ret; + + ug = unix_gid_lookup(uid); if (!ug) - return -EAGAIN; - switch (cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle)) { + return ERR_PTR(-EAGAIN); + ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle); + switch (ret) { case -ENOENT: - *gip = NULL; - return 0; + return ERR_PTR(-ENOENT); case 0: - *gip = ug->gi; - get_group_info(*gip); + gi = get_group_info(ug->gi); cache_put(&ug->h, &unix_gid_cache); - return 0; + return gi; default: - return -EAGAIN; + return ERR_PTR(-EAGAIN); } } @@ -681,6 +683,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) struct sockaddr_in *sin; struct sockaddr_in6 *sin6, sin6_storage; struct ip_map *ipm; + struct group_info *gi; + struct svc_cred *cred = &rqstp->rq_cred; switch (rqstp->rq_addr.ss_family) { case AF_INET: @@ -722,6 +726,17 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) ip_map_cached_put(rqstp, ipm); break; } + + gi = unix_gid_find(cred->cr_uid, rqstp); + switch (PTR_ERR(gi)) { + case -EAGAIN: + return SVC_DROP; + case -ENOENT: + break; + default: + put_group_info(cred->cr_group_info); + cred->cr_group_info = gi; + } return SVC_OK; } @@ -818,19 +833,11 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) slen = svc_getnl(argv); /* gids length */ if (slen > 16 || (len -= (slen + 2)*4) < 0) goto badcred; - if (unix_gid_find(cred->cr_uid, &cred->cr_group_info, rqstp) - == -EAGAIN) + cred->cr_group_info = groups_alloc(slen); + if (cred->cr_group_info == NULL) return SVC_DROP; - if (cred->cr_group_info == NULL) { - cred->cr_group_info = groups_alloc(slen); - if (cred->cr_group_info == NULL) - return SVC_DROP; - for (i = 0; i < slen; i++) - GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); - } else { - for (i = 0; i < slen ; i++) - svc_getnl(argv); - } + for (i = 0; i < slen; i++) + GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { *authp = rpc_autherr_badverf; return SVC_DENIED; -- cgit v1.2.2 From 6f8372b69c3198e06cecb1df2cb9682d0c55e657 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 13:26:06 -0800 Subject: RDMA/cm: fix loopback address support The RDMA CM is intended to support the use of a loopback address when establishing a connection; however, the behavior of the CM when loopback addresses are used is confusing and does not always work, depending on whether loopback was specified by the server, the client, or both. The defined behavior of rdma_bind_addr is to associate an RDMA device with an rdma_cm_id, as long as the user specified a non- zero address. (ie they weren't just trying to reserve a port) Currently, if the loopback address is passed to rdam_bind_addr, no device is associated with the rdma_cm_id. Fix this. If a loopback address is specified by the client as the destination address for a connection, it will fail to establish a connection. This is true even if the server is listing across all addresses or on the loopback address itself. The issue is that the server tries to translate the IP address carried in the REQ message to a local net_device address, which fails. The translation is not needed in this case, since the REQ carries the actual HW address that should be used. Finally, cleanup loopback support to be more transport neutral. Replace separate calls to get/set the sgid and dgid from the device address to a single call that behaves correctly depending on the format of the device address. And support both IPv4 and IPv6 address formats. Signed-off-by: Sean Hefty [ Fixed RDS build by s/ib_addr_get/rdma_addr_get/ - Roland ] Signed-off-by: Roland Dreier --- net/rds/ib.c | 4 ++-- net/rds/iw.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rds/ib.c b/net/rds/ib.c index 536ebe5d3f6b..3b8992361042 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -182,8 +182,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, ic = conn->c_transport_data; dev_addr = &ic->i_cm_id->route.addr.dev_addr; - ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); - ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); + rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); + rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); iinfo->max_send_wr = ic->i_send_ring.w_nr; diff --git a/net/rds/iw.c b/net/rds/iw.c index db224f7c2937..b28fa8525b24 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -184,8 +184,8 @@ static int rds_iw_conn_info_visitor(struct rds_connection *conn, ic = conn->c_transport_data; dev_addr = &ic->i_cm_id->route.addr.dev_addr; - ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); - ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); + rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); + rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); iinfo->max_send_wr = ic->i_send_ring.w_nr; -- cgit v1.2.2 From 78c210efdefe07131f91ed512a3308b15bb14e2f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Aug 2009 15:41:34 -0400 Subject: Revert "knfsd: avoid overloading the CPU scheduler with enormous load averages" This reverts commit 59a252ff8c0f2fa32c896f69d56ae33e641ce7ad. This helps in an entirely cached workload but not necessarily in workloads that require waiting on disk. Conflicts: include/linux/sunrpc/svc.h net/sunrpc/svc_xprt.c Reported-by: Simon Kirby Tested-by: Jesper Krogh Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index df124f78ee48..2c58b75a236f 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -16,8 +16,6 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT -#define SVC_MAX_WAKING 5 - static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); @@ -306,7 +304,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; - int thread_avail; if (!(xprt->xpt_flags & ((1<sp_lock); + if (!list_empty(&pool->sp_threads) && + !list_empty(&pool->sp_sockets)) + printk(KERN_ERR + "svc_xprt_enqueue: " + "threads and transports both waiting??\n"); + if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { /* Don't enqueue dead transports */ dprintk("svc: transport %p is dead, not enqueued\n", xprt); @@ -358,15 +361,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) } process: - /* Work out whether threads are available */ - thread_avail = !list_empty(&pool->sp_threads); /* threads are asleep */ - if (pool->sp_nwaking >= SVC_MAX_WAKING) { - /* too many threads are runnable and trying to wake up */ - thread_avail = 0; - pool->sp_stats.overloads_avoided++; - } - - if (thread_avail) { + if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, rq_list); @@ -381,8 +376,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) svc_xprt_get(xprt); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); - rqstp->rq_waking = 1; - pool->sp_nwaking++; pool->sp_stats.threads_woken++; BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); @@ -651,11 +644,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) return -EINTR; spin_lock_bh(&pool->sp_lock); - if (rqstp->rq_waking) { - rqstp->rq_waking = 0; - pool->sp_nwaking--; - BUG_ON(pool->sp_nwaking < 0); - } xprt = svc_xprt_dequeue(pool); if (xprt) { rqstp->rq_xprt = xprt; @@ -1204,16 +1192,15 @@ static int svc_pool_stats_show(struct seq_file *m, void *p) struct svc_pool *pool = p; if (p == SEQ_START_TOKEN) { - seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n"); + seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n"); return 0; } - seq_printf(m, "%u %lu %lu %lu %lu %lu\n", + seq_printf(m, "%u %lu %lu %lu %lu\n", pool->sp_id, pool->sp_stats.packets, pool->sp_stats.sockets_queued, pool->sp_stats.threads_woken, - pool->sp_stats.overloads_avoided, pool->sp_stats.threads_timedout); return 0; -- cgit v1.2.2 From feb8ca37cc3d83c07fd042509ef1e176cfeb2cfa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 08:10:17 -0500 Subject: SUNRPC: Ensure that we honour autoclose before attempting to reconnect If the XPRT_CLOSE_WAIT flag is set, we need to ensure that we call xprt->ops->close() while holding xprt_lock_write() before we can start reconnecting. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index fd46d42afa89..469de292c23c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -700,6 +700,10 @@ void xprt_connect(struct rpc_task *task) } if (!xprt_lock_write(xprt, task)) return; + + if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) + xprt->ops->close(xprt); + if (xprt_connected(xprt)) xprt_release_write(xprt, task); else { -- cgit v1.2.2 From f0380f3d16df8f9e2fcd1d8c16fb0d94370bea99 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 08:10:17 -0500 Subject: RPC: Fix two potential races in put_rpccred It is possible for rpcauth_destroy_credcache() to cause the rpc credentials to be unhashed while put_rpccred is waiting for the rpc_credcache_lock on another cpu. Should this happen, then we can end up calling hlist_del_rcu(&cred->cr_hash) a second time in put_rpccred, thus causing list corruption. Should the credential actually be hashed, it is also possible for rpcauth_lookup_credcache to find and reference it before we get round to unhashing it. In this case, the call to rpcauth_unhash_cred will fail, and so we should just exit without destroying the cred. Reported-by: Neil Brown Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 54a4e042f104..7ee6f7eaddfb 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -123,16 +123,19 @@ rpcauth_unhash_cred_locked(struct rpc_cred *cred) clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); } -static void +static int rpcauth_unhash_cred(struct rpc_cred *cred) { spinlock_t *cache_lock; + int ret; cache_lock = &cred->cr_auth->au_credcache->lock; spin_lock(cache_lock); - if (atomic_read(&cred->cr_count) == 0) + ret = atomic_read(&cred->cr_count) == 0; + if (ret) rpcauth_unhash_cred_locked(cred); spin_unlock(cache_lock); + return ret; } /* @@ -446,31 +449,35 @@ void put_rpccred(struct rpc_cred *cred) { /* Fast path for unhashed credentials */ - if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) - goto need_lock; - - if (!atomic_dec_and_test(&cred->cr_count)) + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) { + if (atomic_dec_and_test(&cred->cr_count)) + cred->cr_ops->crdestroy(cred); return; - goto out_destroy; -need_lock: + } + if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) return; if (!list_empty(&cred->cr_lru)) { number_cred_unused--; list_del_init(&cred->cr_lru); } - if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) - rpcauth_unhash_cred(cred); if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { - cred->cr_expire = jiffies; - list_add_tail(&cred->cr_lru, &cred_unused); - number_cred_unused++; - spin_unlock(&rpc_credcache_lock); - return; + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) { + cred->cr_expire = jiffies; + list_add_tail(&cred->cr_lru, &cred_unused); + number_cred_unused++; + goto out_nodestroy; + } + if (!rpcauth_unhash_cred(cred)) { + /* We were hashed and someone looked us up... */ + goto out_nodestroy; + } } spin_unlock(&rpc_credcache_lock); -out_destroy: cred->cr_ops->crdestroy(cred); + return; +out_nodestroy: + spin_unlock(&rpc_credcache_lock); } EXPORT_SYMBOL_GPL(put_rpccred); -- cgit v1.2.2 From dd1fd90fe65e2e642f0e58e2ff4849f317a6c43d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Display compressed (shorthand) IPv6 presentation addresses Recent changes to snprintf() introduced the %pI6c formatter, which can display an IPv6 address with standard shorthanding. Using a shorthanded address can save us a few bytes of memory for each stored presentation address, or a few bytes on the wire when sending these in a universal address. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/addr.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net') diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index c7450c8f0a7c..6dcdd2517819 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -55,16 +55,8 @@ static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, /* * RFC 4291, Section 2.2.1 - * - * To keep the result as short as possible, especially - * since we don't shorthand, we don't want leading zeros - * in each halfword, so avoid %pI6. */ - return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x", - ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]), - ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]), - ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]), - ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7])); + return snprintf(buf, buflen, "%pI6c", addr); } static size_t rpc_ntop6(const struct sockaddr *sap, -- cgit v1.2.2 From 206a134b4d8abf57cd34dffacf993869355b9aac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Check explicitly for tk_status == 0 in call_transmit_status() The success case, where task->tk_status == 0, is by far the most frequent case in call_transmit_status(). The default: arm of the switch statement in call_transmit_status() handles the 0 case. default: was moved close to the top of the switch statement in call_transmit_status() under the theory that the compiler places object code for the earliest arms of a switch statement first, making the CPU do less work. The default: arm of a switch statement, however, is executed only after all the other cases have been checked. Even if the compiler rearranges the object code, the default: arm is the "last resort", meaning all of the other cases have been explicitly exhausted. That makes the current arrangement about as inefficient as it gets for the common case. To fix this, add an explicit check for zero before the switch statement. That forces the compiler to do the zero check first, no matter what optimizations it might try to do to the switch statement. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 38829e20500b..7bcd931e06ee 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1180,10 +1180,22 @@ static void call_transmit_status(struct rpc_task *task) { task->tk_action = call_status; + + /* + * Common case: success. Force the compiler to put this + * test first. + */ + if (task->tk_status == 0) { + xprt_end_transmit(task); + rpc_task_force_reencode(task); + return; + } + switch (task->tk_status) { case -EAGAIN: break; default: + dprint_status(task); xprt_end_transmit(task); /* * Special cases: if we've been waiting on the -- cgit v1.2.2 From 09a21c4102c8f7893368553273d39c0cadedf9af Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Allow RPCs to fail quickly if the server is unreachable The kernel sometimes makes RPC calls to services that aren't running. Because the kernel's RPC client always assumes the hard retry semantic when reconnecting a connection-oriented RPC transport, the underlying reconnect logic takes a long while to time out, even though the remote may have responded immediately with ECONNREFUSED. In certain cases, like upcalls to our local rpcbind daemon, or for NFS mount requests, we'd like the kernel to fail immediately if the remote service isn't reachable. This allows another transport to be tried immediately, or the pending request can be abandoned quickly. Introduce a per-request flag which controls how call_transmit_status() behaves when request transmission fails because the server cannot be reached. We don't want soft connection semantics to apply to other errors. The default case of the switch statement in call_transmit_status() no longer falls through; the fall through code is copied to the default case, and a "break;" is added. The transport's connection re-establishment timeout is also ignored for such requests. We want the request to fail immediately, so the reconnect delay is skipped. Additionally, we don't want a connect failure here to further increase the reconnect timeout value, since this request will not be retried. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 11 +++++++++-- net/sunrpc/xprtsock.c | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7bcd931e06ee..68a23583f44c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1197,6 +1197,8 @@ call_transmit_status(struct rpc_task *task) default: dprint_status(task); xprt_end_transmit(task); + rpc_task_force_reencode(task); + break; /* * Special cases: if we've been waiting on the * socket's write_space() callback, or if the @@ -1204,11 +1206,16 @@ call_transmit_status(struct rpc_task *task) * then hold onto the transport lock. */ case -ECONNREFUSED: - case -ECONNRESET: - case -ENOTCONN: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + if (RPC_IS_SOFTCONN(task)) { + xprt_end_transmit(task); + rpc_exit(task, task->tk_status); + break; + } + case -ECONNRESET: + case -ENOTCONN: case -EPIPE: rpc_task_force_reencode(task); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 37c5475ba258..ff312f8b018d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2033,7 +2033,7 @@ static void xs_connect(struct rpc_task *task) if (xprt_test_and_set_connecting(xprt)) return; - if (transport->sock != NULL) { + if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); -- cgit v1.2.2 From 5a46211540a83871196489247f57da2bdde58d87 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Simplify synopsis of rpcb_local_clnt() Clean up: At one point, rpcb_local_clnt() handled IPv6 loopback addresses too, but it doesn't any more; only IPv4 loopback is used now. Get rid of the @addr and @addrlen arguments to rpcb_local_clnt(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 830faf4d9997..28f50da60ceb 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -163,13 +163,12 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, - size_t addrlen, u32 version) +static struct rpc_clnt *rpcb_create_local(u32 version) { struct rpc_create_args args = { .protocol = XPRT_TRANSPORT_UDP, - .address = addr, - .addrsize = addrlen, + .address = (struct sockaddr *)&rpcb_inaddr_loopback, + .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", .program = &rpcb_program, .version = version, @@ -211,14 +210,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, static int rpcb_register_call(const u32 version, struct rpc_message *msg) { - struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback; - size_t addrlen = sizeof(rpcb_inaddr_loopback); struct rpc_clnt *rpcb_clnt; int result, error = 0; msg->rpc_resp = &result; - rpcb_clnt = rpcb_create_local(addr, addrlen, version); + rpcb_clnt = rpcb_create_local(version); if (!IS_ERR(rpcb_clnt)) { error = rpc_call_sync(rpcb_clnt, msg, 0); rpc_shutdown_client(rpcb_clnt); -- cgit v1.2.2 From c526611dd631b2802b6b0221ffb306c5fa25c86c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use a cached RPC client and transport for rpcbind upcalls The kernel's rpcbind client creates and deletes an rpc_clnt and its underlying transport socket for every upcall to the local rpcbind daemon. When starting a typical NFS server on IPv4 and IPv6, the NFS service itself does three upcalls (one per version) times two upcalls (one per transport) times two upcalls (one per address family), making 12, plus another one for the initial call to unregister previous NFS services. Starting the NLM service adds an additional 13 upcalls, for similar reasons. (Currently the NFS service doesn't start IPv6 listeners, but it will soon enough). Instead, let's create an rpc_clnt for rpcbind upcalls during the first local rpcbind query, and cache it. This saves the overhead of creating and destroying an rpc_clnt and a socket for every upcall. The new logic also prevents the kernel from attempting an RPCB_SET or RPCB_UNSET if it knows from the start that the local portmapper does not support rpcbind protocol version 4. This will cut down on the number of rpcbind upcalls in legacy environments. Signed-off-by: Chuck Lever --- net/sunrpc/rpcb_clnt.c | 93 +++++++++++++++++++++++++++++++++++++++--------- net/sunrpc/sunrpc_syms.c | 3 ++ 2 files changed, 80 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 28f50da60ceb..116db74dd942 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -110,6 +111,9 @@ static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; +static struct rpc_clnt * rpcb_local_clnt; +static struct rpc_clnt * rpcb_local_clnt4; + struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -163,7 +167,13 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static struct rpc_clnt *rpcb_create_local(u32 version) +static DEFINE_MUTEX(rpcb_create_local_mutex); + +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local(void) { struct rpc_create_args args = { .protocol = XPRT_TRANSPORT_UDP, @@ -171,12 +181,46 @@ static struct rpc_clnt *rpcb_create_local(u32 version) .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", .program = &rpcb_program, - .version = version, + .version = RPCBVERS_2, .authflavor = RPC_AUTH_UNIX, .flags = RPC_CLNT_CREATE_NOPING, }; + struct rpc_clnt *clnt, *clnt4; + int result = 0; + + if (rpcb_local_clnt) + return result; + + mutex_lock(&rpcb_create_local_mutex); + if (rpcb_local_clnt) + goto out; + + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("RPC: failed to create local rpcbind " + "client (errno %ld).\n", PTR_ERR(clnt)); + result = -PTR_ERR(clnt); + goto out; + } - return rpc_create(&args); + /* + * This results in an RPC ping. On systems running portmapper, + * the v4 ping will fail. Proceed anyway, but disallow rpcb + * v4 upcalls. + */ + clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); + if (IS_ERR(clnt4)) { + dprintk("RPC: failed to create local rpcbind v4 " + "cleint (errno %ld).\n", PTR_ERR(clnt4)); + clnt4 = NULL; + } + + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; + +out: + mutex_unlock(&rpcb_create_local_mutex); + return result; } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, @@ -208,20 +252,13 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } -static int rpcb_register_call(const u32 version, struct rpc_message *msg) +static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) { - struct rpc_clnt *rpcb_clnt; int result, error = 0; msg->rpc_resp = &result; - rpcb_clnt = rpcb_create_local(version); - if (!IS_ERR(rpcb_clnt)) { - error = rpc_call_sync(rpcb_clnt, msg, 0); - rpc_shutdown_client(rpcb_clnt); - } else - error = PTR_ERR(rpcb_clnt); - + error = rpc_call_sync(clnt, msg, 0); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -276,6 +313,11 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) struct rpc_message msg = { .rpc_argp = &map, }; + int error; + + error = rpcb_create_local(); + if (error) + return error; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), @@ -285,7 +327,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_2, &msg); + return rpcb_register_call(rpcb_local_clnt, &msg); } /* @@ -310,7 +352,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -337,7 +379,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -353,7 +395,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(RPCBVERS_4, msg); + return rpcb_register_call(rpcb_local_clnt4, msg); } /** @@ -411,6 +453,13 @@ int rpcb_v4_register(const u32 program, const u32 version, struct rpc_message msg = { .rpc_argp = &map, }; + int error; + + error = rpcb_create_local(); + if (error) + return error; + if (rpcb_local_clnt4 == NULL) + return -EPROTONOSUPPORT; if (address == NULL) return rpcb_unregister_all_protofamilies(&msg); @@ -1024,3 +1073,15 @@ static struct rpc_program rpcb_program = { .version = rpcb_version, .stats = &rpcb_stats, }; + +/** + * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister + * + */ +void cleanup_rpcb_clnt(void) +{ + if (rpcb_local_clnt4) + rpc_shutdown_client(rpcb_local_clnt4); + if (rpcb_local_clnt) + rpc_shutdown_client(rpcb_local_clnt); +} diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 8cce92189019..f438347d817b 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -24,6 +24,8 @@ extern struct cache_detail ip_map_cache, unix_gid_cache; +extern void cleanup_rpcb_clnt(void); + static int __init init_sunrpc(void) { @@ -53,6 +55,7 @@ out: static void __exit cleanup_sunrpc(void) { + cleanup_rpcb_clnt(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); -- cgit v1.2.2 From 2a76b3bfa22993fc09166bd6a8db0dbe902b6813 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use TCP for local rpcbind upcalls Use TCP with the soft connect semantic for local rpcbind upcalls so the kernel can detect immediately if the local rpcbind daemon is not running. Signed-off-by: Chuck Lever --- net/sunrpc/rpcb_clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 116db74dd942..401154094ee1 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -176,7 +176,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex); static int rpcb_create_local(void) { struct rpc_create_args args = { - .protocol = XPRT_TRANSPORT_UDP, + .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *)&rpcb_inaddr_loopback, .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", @@ -258,7 +258,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, 0); + error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); -- cgit v1.2.2 From 012da158f636347c4eb28fd1e1cca020fef5e54d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use soft connects for autobinding over TCP Autobinding is handled by the rpciod process, not in user processes that are generating regular RPC requests. Thus autobinding is usually not affected by signals targetting user processes, such as KILL or timer expiration events. In addition, an RPC request generated by a user process that has RPC_TASK_SOFTCONN set and needs to perform an autobind will hang if the remote rpcbind service is not available. For rpcbind queries on connection-oriented transports, let's use the new soft connect semantic to return control to the user's process quickly, if the kernel's rpcbind client can't connect to the remote rpcbind service. Logic is introduced in call_bind_status() to handle connection errors that occurred during an asynchronous rpcbind query. The logic abandons the rpcbind query if the RPC request has SOFTCONN set, and retries after a few seconds in the normal case. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 17 ++++++++++++++++- net/sunrpc/rpcb_clnt.c | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 68a23583f44c..4b76ef9336c7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1060,7 +1060,7 @@ call_bind_status(struct rpc_task *task) goto retry_timeout; case -EPFNOSUPPORT: /* server doesn't support any rpcbind version we know of */ - dprintk("RPC: %5u remote rpcbind service unavailable\n", + dprintk("RPC: %5u unrecognized remote rpcbind service\n", task->tk_pid); break; case -EPROTONOSUPPORT: @@ -1069,6 +1069,21 @@ call_bind_status(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_bind; return; + case -ECONNREFUSED: /* connection problems */ + case -ECONNRESET: + case -ENOTCONN: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -EPIPE: + dprintk("RPC: %5u remote rpcbind unreachable: %d\n", + task->tk_pid, task->tk_status); + if (!RPC_IS_SOFTCONN(task)) { + rpc_delay(task, 5*HZ); + goto retry_timeout; + } + status = task->tk_status; + break; default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 401154094ee1..3e3772d8eb92 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -537,7 +537,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi .rpc_message = &msg, .callback_ops = &rpcb_getport_ops, .callback_data = map, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN, }; return rpc_run_task(&task_setup_data); -- cgit v1.2.2 From caabea8a565fb4e16f8e58e16bb9d535e591b709 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use soft connect semantics when performing RPC ping Currently, if a remote RPC service is unreachable, an RPC ping will hang until the underlying transport connect attempt times out. A more desirable behavior might be to have the ping fail immediately so upper layers can recover appropriately. In the case of an NFS mount, for instance, this would mean the mount(2) system call could fail immediately if the server isn't listening, rather than hanging uninterruptibly for more than 3 minutes. Change rpc_ping() so that it fails immediately for connection-oriented transports. rpc_create() will then fail immediately for such transports if an RPC ping was requested. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4b76ef9336c7..97931d9f8579 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -79,7 +79,7 @@ static void call_connect_status(struct rpc_task *task); static __be32 *rpc_encode_header(struct rpc_task *task); static __be32 *rpc_verify_header(struct rpc_task *task); -static int rpc_ping(struct rpc_clnt *clnt, int flags); +static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) { @@ -340,7 +340,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) return clnt; if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { - int err = rpc_ping(clnt, RPC_TASK_SOFT); + int err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); return ERR_PTR(err); @@ -528,7 +528,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, clnt->cl_prog = program->number; clnt->cl_vers = version->number; clnt->cl_stats = program->stats; - err = rpc_ping(clnt, RPC_TASK_SOFT); + err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); clnt = ERR_PTR(err); @@ -1709,14 +1709,14 @@ static struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; -static int rpc_ping(struct rpc_clnt *clnt, int flags) +static int rpc_ping(struct rpc_clnt *clnt) { struct rpc_message msg = { .rpc_proc = &rpcproc_null, }; int err; msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0); - err = rpc_call_sync(clnt, &msg, flags); + err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN); put_rpccred(msg.rpc_cred); return err; } -- cgit v1.2.2 From 3a28becc35e5c8f1fabb707bcd8a473712653de6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: soft connect semantics for UDP Introduce soft connect behavior for UDP transports. In this case, a major timeout returns ETIMEDOUT instead of EIO. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 97931d9f8579..154034b675bd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1380,6 +1380,10 @@ call_timeout(struct rpc_task *task) dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); task->tk_timeouts++; + if (RPC_IS_SOFTCONN(task)) { + rpc_exit(task, -ETIMEDOUT); + return; + } if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", -- cgit v1.2.2 From 480e3243df156e39eea6c91057e2ae612a6bbe19 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 8 Dec 2009 13:13:03 -0500 Subject: SUNRPC: IS_ERR/PTR_ERR confusion IS_ERR returns 1 or 0, PTR_ERR returns the error value. Signed-off-by: Roel Kluin Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index fc6a43ccd950..129d75ea25d2 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -485,7 +485,7 @@ gss_refresh_upcall(struct rpc_task *task) dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, cred->cr_uid); gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); - if (IS_ERR(gss_msg) == -EAGAIN) { + if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we * shouldn't normally hit this case on a refresh. */ warn_gssd(); -- cgit v1.2.2 From 053e324f67b9921fe7de0c4cbc720d29cb4bf207 Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Wed, 9 Dec 2009 23:15:22 +0530 Subject: rpc: remove unneeded function parameter in gss_add_msg() The pointer to struct gss_auth parameter in gss_add_msg is not really needed after commit 5b7ddd4a. Zap it. Signed-off-by: Suresh Jayaraman Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 129d75ea25d2..3c3c50f38a1c 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -304,7 +304,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) * to that upcall instead of adding the new upcall. */ static inline struct gss_upcall_msg * -gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) +gss_add_msg(struct gss_upcall_msg *gss_msg) { struct rpc_inode *rpci = gss_msg->inode; struct inode *inode = &rpci->vfs_inode; @@ -445,7 +445,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred); if (IS_ERR(gss_new)) return gss_new; - gss_msg = gss_add_msg(gss_auth, gss_new); + gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { struct inode *inode = &gss_new->inode->vfs_inode; int res = rpc_queue_upcall(inode, &gss_new->msg); -- cgit v1.2.2 From 5781b2356cbecb0b73b06ec8c3897cabdfdd0928 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 13 Dec 2009 19:32:39 -0800 Subject: udp: udp_lib_get_port() fix Now we can have a large udp hash table, udp_lib_get_port() loop should be converted to a do {} while (cond) form, or we dont enter it at all if hash table size is exactly 65536. Reported-by: Yinghai Lu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1f9534846ca9..f0126fdd7e04 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -216,9 +216,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, * force rand to be an odd multiple of UDP_HTABLE_SIZE */ rand = (rand | 1) * (udptable->mask + 1); - for (last = first + udptable->mask + 1; - first != last; - first++) { + last = first + udptable->mask + 1; + do { hslot = udp_hashslot(udptable, net, first); bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); @@ -238,7 +237,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, snum += rand; } while (snum != first); spin_unlock_bh(&hslot->lock); - } + } while (++first != last); goto fail; } else { hslot = udp_hashslot(udptable, net, snum); -- cgit v1.2.2 From d90a909e1f3e006a1d57fe11fd417173b6494701 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 12 Dec 2009 22:11:15 +0000 Subject: net: Fix userspace RTM_NEWLINK notifications. I received some bug reports about userspace programs having problems because after RTM_NEWLINK was received they could not immediate access files under /proc/sys/net/ because they had not been registered yet. The original problem was trivially fixed by moving the userspace notification from rtnetlink_event() to the end of register_netdevice(). When testing that change I discovered I was still getting RTM_NEWLINK events before I could access proc and I was also getting RTM_NEWLINK events after I was seeing RTM_DELLINK. Things practically guaranteed to confuse userspace. After a little more investigation these extra notifications proved to be from the new notifiers NETDEV_POST_INIT and NETDEV_UNREGISTER_BATCH hitting the default case in rtnetlink_event, and triggering unnecessary RTM_NEWLINK messages. rtnetlink_event now explicitly handles NETDEV_UNREGISTER_BATCH and NETDEV_POST_INIT to avoid sending the incorrect userspace notifications. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 11 +++++++++++ net/core/rtnetlink.c | 6 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6fe7d739e59b..be9924f60ec3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5035,6 +5035,11 @@ int register_netdevice(struct net_device *dev) rollback_registered(dev); dev->reg_state = NETREG_UNREGISTERED; } + /* + * Prevent userspace races by waiting until the network + * device is fully setup before sending notifications. + */ + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); out: return ret; @@ -5597,6 +5602,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Notify protocols, that a new device appeared. */ call_netdevice_notifiers(NETDEV_REGISTER, dev); + /* + * Prevent userspace races by waiting until the network + * device is fully setup before sending notifications. + */ + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); + synchronize_net(); err = 0; out: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 33148a568199..794bcb897ff0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1364,15 +1364,15 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_UNREGISTER: rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); break; - case NETDEV_REGISTER: - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); - break; case NETDEV_UP: case NETDEV_DOWN: rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); break; + case NETDEV_POST_INIT: + case NETDEV_REGISTER: case NETDEV_CHANGE: case NETDEV_GOING_DOWN: + case NETDEV_UNREGISTER_BATCH: break; default: rtmsg_ifinfo(RTM_NEWLINK, dev, 0); -- cgit v1.2.2 From 9abfe315de96aa5c9878b2f627542bc54901c6e9 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Mon, 14 Dec 2009 16:38:21 +0100 Subject: ipvs: fix synchronization on connection close commit 9d3a0de makes slaves expire as they would do on the master with much shorter timeouts. But it introduces another problem: When we close a connection, on master server the connection became CLOSE_WAIT/TIME_WAIT, it was synced to slaves, but if master is finished within it's timeouts (CLOSE), it will not be synced to slaves. Then slaves will be kept on CLOSE_WAIT/TIME_WAIT until timeout reaches. Thus we should also sync with CLOSE. Cc: Wensong Zhang Cc: Simon Horman Cc: Julian Anastasov Cc: David S. Miller Signed-off-by: Xiaotian Feng Acked-by: Simon Horman Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b95699f00545..847ffca40184 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1366,6 +1366,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, == sysctl_ip_vs_sync_threshold[0])) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || + (cp->state == IP_VS_TCP_S_CLOSE) || (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || (cp->state == IP_VS_TCP_S_TIME_WAIT))))) ip_vs_sync_conn(cp); -- cgit v1.2.2 From d24deb2580823ab0b8425790c6f5d18e2ff749d8 Mon Sep 17 00:00:00 2001 From: Gertjan van Wingerde Date: Fri, 4 Dec 2009 23:46:54 +0100 Subject: mac80211: Add define for TX headroom reserved by mac80211 itself. Add a definition of the amount of TX headroom reserved by mac80211 itself for its own purposes. Also add BUILD_BUG_ON to validate the value. This define can then be used by drivers to request additional TX headroom in the most efficient manner. Signed-off-by: Gertjan van Wingerde Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8116d1a96a4a..0d2d94881f1f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -515,6 +515,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) * and we need some headroom for passing the frame to monitor * interfaces, but never both at the same time. */ + BUILD_BUG_ON(IEEE80211_TX_STATUS_HEADROOM != + sizeof(struct ieee80211_tx_status_rtap_hdr)); local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom, sizeof(struct ieee80211_tx_status_rtap_hdr)); -- cgit v1.2.2 From 0b5ccb2ee250136dd7385b1c7da28417d0d4d32d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 15 Dec 2009 16:59:18 +0100 Subject: ipv6: reassembly: use seperate reassembly queues for conntrack and local delivery Currently the same reassembly queue might be used for packets reassembled by conntrack in different positions in the stack (PREROUTING/LOCAL_OUT), as well as local delivery. This can cause "packet jumps" when the fragment completing a reassembled packet is queued from a different position in the stack than the previous ones. Add a "user" identifier to the reassembly queue key to seperate the queues of each caller, similar to what we do for IPv4. Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 13 +++++++++++-- net/ipv6/netfilter/nf_conntrack_reasm.c | 7 ++++--- net/ipv6/reassembly.c | 5 ++++- 3 files changed, 19 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 5f2ec208a8c3..c0a82fe78321 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -187,6 +187,16 @@ out: return nf_conntrack_confirm(skb); } +static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, + struct sk_buff *skb) +{ + if (hooknum == NF_INET_PRE_ROUTING) + return IP6_DEFRAG_CONNTRACK_IN; + else + return IP6_DEFRAG_CONNTRACK_OUT; + +} + static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, @@ -199,8 +209,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (skb->nfct) return NF_ACCEPT; - reasm = nf_ct_frag6_gather(skb); - + reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); /* queued */ if (reasm == NULL) return NF_STOLEN; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index e0b9424fa1b2..312c20adc83f 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -168,13 +168,14 @@ out: /* Creation primitives. */ static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) +fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; unsigned int hash; arg.id = id; + arg.user = user; arg.src = src; arg.dst = dst; @@ -559,7 +560,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) return 0; } -struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) +struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) { struct sk_buff *clone; struct net_device *dev = skb->dev; @@ -605,7 +606,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) nf_ct_frag6_evictor(); - fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); + fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 4d98549a6868..3b3a95607125 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -72,6 +72,7 @@ struct frag_queue struct inet_frag_queue q; __be32 id; /* fragment id */ + u32 user; struct in6_addr saddr; struct in6_addr daddr; @@ -141,7 +142,7 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a) struct ip6_create_arg *arg = a; fq = container_of(q, struct frag_queue, q); - return (fq->id == arg->id && + return (fq->id == arg->id && fq->user == arg->user && ipv6_addr_equal(&fq->saddr, arg->src) && ipv6_addr_equal(&fq->daddr, arg->dst)); } @@ -163,6 +164,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) struct ip6_create_arg *arg = a; fq->id = arg->id; + fq->user = arg->user; ipv6_addr_copy(&fq->saddr, arg->src); ipv6_addr_copy(&fq->daddr, arg->dst); } @@ -243,6 +245,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, unsigned int hash; arg.id = id; + arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; -- cgit v1.2.2 From 8fa9ff6849bb86c59cc2ea9faadf3cb2d5223497 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 15 Dec 2009 16:59:59 +0100 Subject: netfilter: fix crashes in bridge netfilter caused by fragment jumps When fragments from bridge netfilter are passed to IPv4 or IPv6 conntrack and a reassembly queue with the same fragment key already exists from reassembling a similar packet received on a different device (f.i. with multicasted fragments), the reassembled packet might continue on a different codepath than where the head fragment originated. This can cause crashes in bridge netfilter when a fragment received on a non-bridge device (and thus with skb->nf_bridge == NULL) continues through the bridge netfilter code. Add a new reassembly identifier for packets originating from bridge netfilter and use it to put those packets in insolated queues. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14805 Reported-and-Tested-by: Chong Qiao Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_defrag_ipv4.c | 21 +++++++++++++++++---- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 ++++++ 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index fa2d6b6fc3e5..331ead3ebd1b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -34,6 +35,20 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } +static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, + struct sk_buff *skb) +{ +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge && + skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) + return IP_DEFRAG_CONNTRACK_BRIDGE_IN; +#endif + if (hooknum == NF_INET_PRE_ROUTING) + return IP_DEFRAG_CONNTRACK_IN; + else + return IP_DEFRAG_CONNTRACK_OUT; +} + static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, @@ -50,10 +65,8 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #endif /* Gather fragments. */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_INET_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) + enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); + if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; } return NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c0a82fe78321..0956ebabbff2 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -190,6 +191,11 @@ out: static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge && + skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) + return IP6_DEFRAG_CONNTRACK_BRIDGE_IN; +#endif if (hooknum == NF_INET_PRE_ROUTING) return IP6_DEFRAG_CONNTRACK_IN; else -- cgit v1.2.2 From 258c889362aa95d0ab534b38ce8c15d3009705b1 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 15 Dec 2009 17:01:25 +0100 Subject: ipvs: zero usvc and udest Make sure that any otherwise uninitialised fields of usvc are zero. This has been obvserved to cause a problem whereby the port of fwmark services may end up as a non-zero value which causes scheduling of a destination server to fail for persisitent services. As observed by Deon van der Merwe . This fix suggested by Julian Anastasov . For good measure also zero udest. Cc: Deon van der Merwe Acked-by: Julian Anastasov Signed-off-by: Simon Horman Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e55a6861d26f..6bde12da2fe0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2714,6 +2714,8 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) return -EINVAL; + memset(usvc, 0, sizeof(*usvc)); + usvc->af = nla_get_u16(nla_af); #ifdef CONFIG_IP_VS_IPV6 if (usvc->af != AF_INET && usvc->af != AF_INET6) @@ -2901,6 +2903,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, if (!(nla_addr && nla_port)) return -EINVAL; + memset(udest, 0, sizeof(*udest)); + nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); udest->port = nla_get_u16(nla_port); -- cgit v1.2.2 From 471452104b8520337ae2fb48c4e61cd4896e025d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 14 Dec 2009 18:00:08 -0800 Subject: const: constify remaining dev_pm_ops Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/iucv/af_iucv.c | 2 +- net/iucv/iucv.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 1e428863574f..c18286a2167b 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -221,7 +221,7 @@ static int afiucv_pm_restore_thaw(struct device *dev) return 0; } -static struct dev_pm_ops afiucv_pm_ops = { +static const struct dev_pm_ops afiucv_pm_ops = { .prepare = afiucv_pm_prepare, .complete = afiucv_pm_complete, .freeze = afiucv_pm_freeze, diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 3b1f5f5f8de7..fd8b28361a64 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -93,7 +93,7 @@ static int iucv_pm_freeze(struct device *); static int iucv_pm_thaw(struct device *); static int iucv_pm_restore(struct device *); -static struct dev_pm_ops iucv_pm_ops = { +static const struct dev_pm_ops iucv_pm_ops = { .prepare = iucv_pm_prepare, .complete = iucv_pm_complete, .freeze = iucv_pm_freeze, -- cgit v1.2.2 From e7d2860b690d4f3bed6824757c540579638e3d1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= Date: Mon, 14 Dec 2009 18:01:06 -0800 Subject: tree-wide: convert open calls to remove spaces to skip_spaces() lib function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes use of skip_spaces() defined in lib/string.c for removing leading spaces from strings all over the tree. It decreases lib.a code size by 47 bytes and reuses the function tree-wide: text data bss dec hex filename 64688 584 592 65864 10148 (TOTALS-BEFORE) 64641 584 592 65817 10119 (TOTALS-AFTER) Also, while at it, if we see (*str && isspace(*str)), we can be sure to remove the first condition (*str) as the second one (isspace(*str)) also evaluates to 0 whenever *str == 0, making it redundant. In other words, "a char equals zero is never a space". Julia Lawall tried the semantic patch (http://coccinelle.lip6.fr) below, and found occurrences of this pattern on 3 more files: drivers/leds/led-class.c drivers/leds/ledtrig-timer.c drivers/video/output.c @@ expression str; @@ ( // ignore skip_spaces cases while (*str && isspace(*str)) { \(str++;\|++str;\) } | - *str && isspace(*str) ) Signed-off-by: André Goddard Rosa Cc: Julia Lawall Cc: Martin Schwidefsky Cc: Jeff Dike Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Richard Purdie Cc: Neil Brown Cc: Kyle McMartin Cc: Henrique de Moraes Holschuh Cc: David Howells Cc: Cc: Samuel Ortiz Cc: Patrick McHardy Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/irda/irnet/irnet.h | 1 + net/irda/irnet/irnet_ppp.c | 8 +++----- net/netfilter/xt_recent.c | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index b001c361ad30..4300df35d37d 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -249,6 +249,7 @@ #include #include #include /* isspace() */ +#include /* skip_spaces() */ #include #include diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 7dea882dbb75..156020d138b5 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -76,9 +76,8 @@ irnet_ctrl_write(irnet_socket * ap, /* Look at the next command */ start = next; - /* Scrap whitespaces before the command */ - while(isspace(*start)) - start++; + /* Scrap whitespaces before the command */ + start = skip_spaces(start); /* ',' is our command separator */ next = strchr(start, ','); @@ -133,8 +132,7 @@ irnet_ctrl_write(irnet_socket * ap, char * endp; /* Scrap whitespaces before the command */ - while(isspace(*begp)) - begp++; + begp = skip_spaces(begp); /* Convert argument to a number (last arg is the base) */ addr = simple_strtoul(begp, &endp, 16); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index eb0ceb846527..fc70a49c0afd 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -482,8 +482,7 @@ static ssize_t recent_old_proc_write(struct file *file, if (copy_from_user(buf, input, size)) return -EFAULT; - while (isspace(*c)) - c++; + c = skip_spaces(c); if (size - (c - buf) < 5) return c - buf; -- cgit v1.2.2 From 48f186124220794fce85ed1439fc32f16f69d3e2 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Mon, 14 Dec 2009 21:27:53 -0800 Subject: rpc: add rpc_queue_empty function Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index cef74ba0666c..89ea8e69ec78 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -384,6 +384,20 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r __rpc_do_wake_up_task(queue, task); } +/* + * Tests whether rpc queue is empty + */ +int rpc_queue_empty(struct rpc_wait_queue *queue) +{ + int res; + + spin_lock_bh(&queue->lock); + res = queue->qlen; + spin_unlock_bh(&queue->lock); + return (res == 0); +} +EXPORT_SYMBOL_GPL(rpc_queue_empty); + /* * Wake up a task on a specific queue */ -- cgit v1.2.2 From 689cf5c15baf603a8041565ff0bd0d65d1634fd7 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Mon, 14 Dec 2009 21:27:56 -0800 Subject: nfs: enforce FIFO ordering of operations trying to acquire slot Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 89ea8e69ec78..aae6907fd546 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -210,6 +210,7 @@ void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qnam { __rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY); } +EXPORT_SYMBOL_GPL(rpc_init_priority_wait_queue); void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) { -- cgit v1.2.2 From bb5b7c11263dbbe78253cd05945a6bf8f55add8e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Dec 2009 20:56:42 -0800 Subject: tcp: Revert per-route SACK/DSACK/TIMESTAMP changes. It creates a regression, triggering badness for SYN_RECV sockets, for example: [19148.022102] Badness at net/ipv4/inet_connection_sock.c:293 [19148.022570] NIP: c02a0914 LR: c02a0904 CTR: 00000000 [19148.023035] REGS: eeecbd30 TRAP: 0700 Not tainted (2.6.32) [19148.023496] MSR: 00029032 CR: 24002442 XER: 00000000 [19148.024012] TASK = eee9a820[1756] 'privoxy' THREAD: eeeca000 This is likely caused by the change in the 'estab' parameter passed to tcp_parse_options() when invoked by the functions in net/ipv4/tcp_minisocks.c But even if that is fixed, the ->conn_request() changes made in this patch series is fundamentally wrong. They try to use the listening socket's 'dst' to probe the route settings. The listening socket doesn't even have a route, and you can't get the right route (the child request one) until much later after we setup all of the state, and it must be done by hand. This stuff really isn't ready, so the best thing to do is a full revert. This reverts the following commits: f55017a93f1a74d50244b1254b9a2bd7ac9bbf7d 022c3f7d82f0f1c68018696f2f027b87b9bb45c2 1aba721eba1d84a2defce45b950272cee1e6c72a cda42ebd67ee5fdf09d7057b5a4584d36fe8a335 345cda2fd695534be5a4494f1b59da9daed33663 dc343475ed062e13fc260acccaab91d7d80fd5b2 05eaade2782fb0c90d3034fd7a7d5a16266182bb 6a2a2d6bf8581216e08be15fcb563cfd6c430e1e Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 27 +++++++++++++-------------- net/ipv4/tcp_input.c | 24 ++++++++---------------- net/ipv4/tcp_ipv4.c | 21 +++++++++------------ net/ipv4/tcp_minisocks.c | 10 +++++----- net/ipv4/tcp_output.c | 18 +++++------------- net/ipv6/syncookies.c | 28 +++++++++++++--------------- net/ipv6/tcp_ipv6.c | 3 +-- 7 files changed, 54 insertions(+), 77 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 26399ad2a289..66fd80ef2473 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -277,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + ret = NULL; req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ if (!req) @@ -292,6 +299,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; ireq->ecn_ok = 0; + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -340,20 +353,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } } - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - /* Try to redo what tcp_v4_send_synack did. */ req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 12cab7d74dba..28e029632493 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3727,7 +3727,7 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - u8 **hvpp, int estab, struct dst_entry *dst) + u8 **hvpp, int estab) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); @@ -3766,8 +3766,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && sysctl_tcp_window_scaling && - !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) { + !estab && sysctl_tcp_window_scaling) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > 14) { @@ -3783,8 +3782,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps && - !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) { + (!estab && sysctl_tcp_timestamps))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -3792,8 +3790,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && sysctl_tcp_sack && - !dst_feature(dst, RTAX_FEATURE_NO_SACK)) { + !estab && sysctl_tcp_sack) { opt_rx->sack_ok = 1; tcp_sack_reset(opt_rx); } @@ -3878,7 +3875,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, if (tcp_parse_aligned_timestamp(tp, th)) return 1; } - tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1); return 1; } @@ -4133,10 +4130,8 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack && - !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4165,15 +4160,13 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack && - !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) @@ -5428,11 +5421,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); struct tcp_cookie_values *cvp = tp->cookie_values; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst); + tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0); if (th->ack) { /* rfc793: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 15e96030ce47..65b8ebfd078a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1262,20 +1262,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; #endif - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(sk, skb); - - dst = inet_csk_route_req(sk, req); - if(!dst) - goto drop_and_free; - tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0); if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && @@ -1319,8 +1309,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); + ireq = inet_rsk(req); + ireq->loc_addr = daddr; + ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; + ireq->opt = tcp_v4_save_options(sk, skb); + if (security_inet_conn_request(sk, skb, req)) - goto drop_and_release; + goto drop_and_free; if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1345,6 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && + (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 87accec8d097..f206ee5dda80 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -95,9 +95,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); int paws_reject = 0; + tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -526,9 +526,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); int paws_reject = 0; - if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) { - tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); + tmp_opt.saw_tstamp = 0; + if (th->doff > (sizeof(struct tcphdr)>>2)) { + tcp_parse_options(skb, &tmp_opt, &hash_location, 0); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 93316a96d820..383ce237640f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -553,7 +553,6 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_cookie_values *cvp = tp->cookie_values; - struct dst_entry *dst = __sk_dst_get(sk); unsigned remaining = MAX_TCP_OPTION_SPACE; u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? tcp_cookie_size_check(cvp->cookie_desired) : @@ -581,22 +580,18 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sysctl_tcp_timestamps && - !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) && - *md5 == NULL)) { + if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sysctl_tcp_window_scaling && - !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) { + if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sysctl_tcp_sack && - !dst_feature(dst, RTAX_FEATURE_NO_SACK))) { + if (likely(sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; @@ -2527,9 +2522,7 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps && - (!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ? - TCPOLEN_TSTAMP_ALIGNED : 0)); + (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); #ifdef CONFIG_TCP_MD5SIG if (tp->af_specific->md5_lookup(sk, sk) != NULL) @@ -2555,8 +2548,7 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - (sysctl_tcp_window_scaling && - !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)), + sysctl_tcp_window_scaling, &rcv_wscale); tp->rx_opt.rcv_wscale = rcv_wscale; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 5b9af508b8f2..7208a06576c6 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -185,6 +185,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + ret = NULL; req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (!req) @@ -218,6 +225,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->retrans = 0; ireq->ecn_ok = 0; + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; @@ -253,21 +266,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out_free; } - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ee9cf62458d4..febfd595a40d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1169,7 +1169,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); __u32 isn = TCP_SKB_CB(skb)->when; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; @@ -1208,7 +1207,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0); if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && -- cgit v1.2.2 From 1a35ca80c1db7279c3c0655063f6d3490e399b17 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Dec 2009 05:47:03 +0000 Subject: packet: dont call sleeping functions while holding rcu_read_lock() commit 654d1f8a019dfa06d (packet: less dev_put() calls) introduced a problem, calling potentially sleeping functions from a rcu_read_lock() protected section. Fix this by releasing lock before the sock_wmalloc()/memcpy_fromiovec() calls. After skb allocation and copy from user space, we redo device lookup and appropriate tests. Reported-and-tested-by: Frederic Weisbecker Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 71 ++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 020562164b56..e0516a22be2e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -415,7 +415,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name; - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct net_device *dev; __be16 proto = 0; int err; @@ -437,6 +437,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, */ saddr->spkt_device[13] = 0; +retry: rcu_read_lock(); dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device); err = -ENODEV; @@ -456,58 +457,48 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, if (len > dev->mtu + dev->hard_header_len) goto out_unlock; - err = -ENOBUFS; - skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL); - - /* - * If the write buffer is full, then tough. At this level the user - * gets to deal with the problem - do your own algorithmic backoffs. - * That's far more flexible. - */ - - if (skb == NULL) - goto out_unlock; - - /* - * Fill it in - */ - - /* FIXME: Save some space for broken drivers that write a - * hard header at transmission time by themselves. PPP is the - * notable one here. This should really be fixed at the driver level. - */ - skb_reserve(skb, LL_RESERVED_SPACE(dev)); - skb_reset_network_header(skb); - - /* Try to align data part correctly */ - if (dev->header_ops) { - skb->data -= dev->hard_header_len; - skb->tail -= dev->hard_header_len; - if (len < dev->hard_header_len) - skb_reset_network_header(skb); + if (!skb) { + size_t reserved = LL_RESERVED_SPACE(dev); + unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0; + + rcu_read_unlock(); + skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL); + if (skb == NULL) + return -ENOBUFS; + /* FIXME: Save some space for broken drivers that write a hard + * header at transmission time by themselves. PPP is the notable + * one here. This should really be fixed at the driver level. + */ + skb_reserve(skb, reserved); + skb_reset_network_header(skb); + + /* Try to align data part correctly */ + if (hhlen) { + skb->data -= hhlen; + skb->tail -= hhlen; + if (len < hhlen) + skb_reset_network_header(skb); + } + err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + if (err) + goto out_free; + goto retry; } - /* Returns -EFAULT on error */ - err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - if (err) - goto out_free; - - /* - * Now send it - */ dev_queue_xmit(skb); rcu_read_unlock(); return len; -out_free: - kfree_skb(skb); out_unlock: rcu_read_unlock(); +out_free: + kfree_skb(skb); return err; } -- cgit v1.2.2 From 28dfef8febe48f59cf1e7596e1992a6a1893ca24 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 15 Dec 2009 16:46:48 -0800 Subject: const: constify remaining pipe_buf_operations Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bfa3e7865a8c..93c4e060c91e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -93,7 +93,7 @@ static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, /* Pipe buffer operations for a socket. */ -static struct pipe_buf_operations sock_pipe_buf_ops = { +static const struct pipe_buf_operations sock_pipe_buf_ops = { .can_merge = 0, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, -- cgit v1.2.2 From 198de4d7ac3a0f1351c6377ff657950457ed0038 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Aug 2009 19:29:23 +0400 Subject: reorder alloc_fd/attach_fd in socketpair() Signed-off-by: Al Viro --- net/socket.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index b94c3dd71015..bf538bea8fbf 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1396,23 +1396,30 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, goto out_release_both; } - fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); - if (unlikely(fd2 < 0)) { - err = fd2; + err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); + if (unlikely(err < 0)) { put_filp(newfile1); put_unused_fd(fd1); goto out_release_both; } - err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); - if (unlikely(err < 0)) { - goto out_fd2; + fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); + if (unlikely(fd2 < 0)) { + err = fd2; + fput(newfile1); + put_unused_fd(fd1); + sock_release(sock2); + goto out; } err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK); if (unlikely(err < 0)) { + put_filp(newfile2); + put_unused_fd(fd2); fput(newfile1); - goto out_fd1; + put_unused_fd(fd1); + sock_release(sock2); + goto out; } audit_fd_pair(fd1, fd2); @@ -1438,16 +1445,6 @@ out_release_1: sock_release(sock1); out: return err; - -out_fd2: - put_filp(newfile1); - sock_release(sock1); -out_fd1: - put_filp(newfile2); - sock_release(sock2); - put_unused_fd(fd1); - put_unused_fd(fd2); - goto out; } /* -- cgit v1.2.2 From 7cbe66b6b53b6615f1033bd5b3dbad8162886373 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Aug 2009 19:59:08 +0400 Subject: merge sock_alloc_fd/sock_attach_fd into a new helper Signed-off-by: Al Viro --- net/socket.c | 80 +++++++++++++++++------------------------------------------- 1 file changed, 23 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index bf538bea8fbf..dbb3802a7645 100644 --- a/net/socket.c +++ b/net/socket.c @@ -355,32 +355,30 @@ static const struct dentry_operations sockfs_dentry_operations = { * but we take care of internal coherence yet. */ -static int sock_alloc_fd(struct file **filep, int flags) +static int sock_alloc_file(struct socket *sock, struct file **f, int flags) { + struct qstr name = { .name = "" }; + struct dentry *dentry; + struct file *file; int fd; fd = get_unused_fd_flags(flags); - if (likely(fd >= 0)) { - struct file *file = get_empty_filp(); + if (unlikely(fd < 0)) + return fd; - *filep = file; - if (unlikely(!file)) { - put_unused_fd(fd); - return -ENFILE; - } - } else - *filep = NULL; - return fd; -} + file = get_empty_filp(); -static int sock_attach_fd(struct socket *sock, struct file *file, int flags) -{ - struct dentry *dentry; - struct qstr name = { .name = "" }; + if (unlikely(!file)) { + put_unused_fd(fd); + return -ENFILE; + } dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); - if (unlikely(!dentry)) + if (unlikely(!dentry)) { + put_filp(file); + put_unused_fd(fd); return -ENOMEM; + } dentry->d_op = &sockfs_dentry_operations; /* @@ -399,24 +397,18 @@ static int sock_attach_fd(struct socket *sock, struct file *file, int flags) file->f_pos = 0; file->private_data = sock; - return 0; + *f = file; + return fd; } int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_fd(&newfile, flags); - - if (likely(fd >= 0)) { - int err = sock_attach_fd(sock, newfile, flags); + int fd = sock_alloc_file(sock, &newfile, flags); - if (unlikely(err < 0)) { - put_filp(newfile); - put_unused_fd(fd); - return err; - } + if (likely(fd >= 0)) fd_install(fd, newfile); - } + return fd; } @@ -1390,20 +1382,13 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC); + fd1 = sock_alloc_file(sock1, &newfile1, flags); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); - if (unlikely(err < 0)) { - put_filp(newfile1); - put_unused_fd(fd1); - goto out_release_both; - } - - fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); + fd2 = sock_alloc_file(sock2, &newfile2, flags); if (unlikely(fd2 < 0)) { err = fd2; fput(newfile1); @@ -1412,16 +1397,6 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, goto out; } - err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK); - if (unlikely(err < 0)) { - put_filp(newfile2); - put_unused_fd(fd2); - fput(newfile1); - put_unused_fd(fd1); - sock_release(sock2); - goto out; - } - audit_fd_pair(fd1, fd2); fd_install(fd1, newfile1); fd_install(fd2, newfile2); @@ -1548,17 +1523,13 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC); + newfd = sock_alloc_file(newsock, &newfile, flags); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); goto out_put; } - err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK); - if (err < 0) - goto out_fd_simple; - err = security_socket_accept(sock, newsock); if (err) goto out_fd; @@ -1588,11 +1559,6 @@ out_put: fput_light(sock->file, fput_needed); out: return err; -out_fd_simple: - sock_release(newsock); - put_filp(newfile); - put_unused_fd(newfd); - goto out_put; out_fd: fput(newfile); put_unused_fd(newfd); -- cgit v1.2.2 From 6b18662e239a032f908b7f6e164bdf7e2e0a32c9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Aug 2009 02:02:43 +0400 Subject: 9p connect fixes * if we fail in p9_conn_create(), we shouldn't leak references to struct file. Logics in ->close() doesn't help - ->trans is already gone by the time it's called. * sock_create_kern() can fail. * use of sock_map_fd() is all fscked up; I'd fixed most of that, but the rest will have to wait for a bit more work in net/socket.c (we still are violating the basic rule of working with descriptor table: "once the reference is installed there, don't rely on finding it there again"). Signed-off-by: Al Viro --- net/9p/trans_fd.c | 112 ++++++++++++++++++++++-------------------------------- 1 file changed, 46 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 4dd873e3a1bb..be1cb909d8c0 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -42,6 +42,8 @@ #include #include +#include /* killme */ + #define P9_PORT 564 #define MAX_SOCK_BUF (64*1024) #define MAXPOLLWADDR 2 @@ -788,24 +790,41 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) static int p9_socket_open(struct p9_client *client, struct socket *csocket) { - int fd, ret; + struct p9_trans_fd *p; + int ret, fd; + + p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); + if (!p) + return -ENOMEM; csocket->sk->sk_allocation = GFP_NOIO; fd = sock_map_fd(csocket, 0); if (fd < 0) { P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); + sock_release(csocket); + kfree(p); return fd; } - ret = p9_fd_open(client, fd, fd); - if (ret < 0) { - P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n"); + get_file(csocket->file); + get_file(csocket->file); + p->wr = p->rd = csocket->file; + client->trans = p; + client->status = Connected; + + sys_close(fd); /* still racy */ + + p->rd->f_flags |= O_NONBLOCK; + + p->conn = p9_conn_create(client); + if (IS_ERR(p->conn)) { + ret = PTR_ERR(p->conn); + p->conn = NULL; + kfree(p); + sockfd_put(csocket); sockfd_put(csocket); return ret; } - - ((struct p9_trans_fd *)client->trans)->rd->f_flags |= O_NONBLOCK; - return 0; } @@ -883,7 +902,6 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) struct socket *csocket; struct sockaddr_in sin_server; struct p9_fd_opts opts; - struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */ err = parse_opts(args, &opts); if (err < 0) @@ -897,12 +915,11 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); - sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); + err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); - if (!csocket) { + if (err) { P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); - err = -EIO; - goto error; + return err; } err = csocket->ops->connect(csocket, @@ -912,30 +929,11 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem connecting socket to %s\n", addr); - goto error; - } - - err = p9_socket_open(client, csocket); - if (err < 0) - goto error; - - p = (struct p9_trans_fd *) client->trans; - p->conn = p9_conn_create(client); - if (IS_ERR(p->conn)) { - err = PTR_ERR(p->conn); - p->conn = NULL; - goto error; - } - - return 0; - -error: - if (csocket) sock_release(csocket); + return err; + } - kfree(p); - - return err; + return p9_socket_open(client, csocket); } static int @@ -944,49 +942,33 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) int err; struct socket *csocket; struct sockaddr_un sun_server; - struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */ csocket = NULL; if (strlen(addr) > UNIX_PATH_MAX) { P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", addr); - err = -ENAMETOOLONG; - goto error; + return -ENAMETOOLONG; } sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); - sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + err = sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + if (err < 0) { + P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n"); + return err; + } err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem connecting socket: %s: %d\n", addr, err); - goto error; - } - - err = p9_socket_open(client, csocket); - if (err < 0) - goto error; - - p = (struct p9_trans_fd *) client->trans; - p->conn = p9_conn_create(client); - if (IS_ERR(p->conn)) { - err = PTR_ERR(p->conn); - p->conn = NULL; - goto error; - } - - return 0; - -error: - if (csocket) sock_release(csocket); + return err; + } - kfree(p); - return err; + return p9_socket_open(client, csocket); } static int @@ -994,7 +976,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) { int err; struct p9_fd_opts opts; - struct p9_trans_fd *p = NULL; /* this get allocated in p9_fd_open */ + struct p9_trans_fd *p; parse_opts(args, &opts); @@ -1005,21 +987,19 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) err = p9_fd_open(client, opts.rfd, opts.wfd); if (err < 0) - goto error; + return err; p = (struct p9_trans_fd *) client->trans; p->conn = p9_conn_create(client); if (IS_ERR(p->conn)) { err = PTR_ERR(p->conn); p->conn = NULL; - goto error; + fput(p->rd); + fput(p->wr); + return err; } return 0; - -error: - kfree(p); - return err; } static struct p9_trans_module p9_tcp_trans = { -- cgit v1.2.2 From cc3808f8c354889982e7e323050f1e50ad99a009 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Aug 2009 09:43:59 +0400 Subject: switch sock_alloc_file() to alloc_file() Signed-off-by: Al Viro --- net/socket.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index dbb3802a7645..eaaba3510e81 100644 --- a/net/socket.c +++ b/net/socket.c @@ -366,16 +366,8 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - file = get_empty_filp(); - - if (unlikely(!file)) { - put_unused_fd(fd); - return -ENFILE; - } - dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); if (unlikely(!dentry)) { - put_filp(file); put_unused_fd(fd); return -ENOMEM; } @@ -388,11 +380,19 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) */ dentry->d_flags &= ~DCACHE_UNHASHED; d_instantiate(dentry, SOCK_INODE(sock)); + SOCK_INODE(sock)->i_fop = &socket_file_ops; - sock->file = file; - init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, + file = alloc_file(sock_mnt, dentry, FMODE_READ | FMODE_WRITE, &socket_file_ops); - SOCK_INODE(sock)->i_fop = &socket_file_ops; + if (unlikely(!file)) { + /* drop dentry, keep inode */ + atomic_inc(&path.dentry->d_inode->i_count); + dput(dentry); + put_unused_fd(fd); + return -ENFILE; + } + + sock->file = file; file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->f_pos = 0; file->private_data = sock; -- cgit v1.2.2 From 2c48b9c45579a9b5e3e74694eebf3d2451f3dbd3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Aug 2009 00:52:35 +0400 Subject: switch alloc_file() to passing struct path ... and have the caller grab both mnt and dentry; kill leak in infiniband, while we are at it. Signed-off-by: Al Viro --- net/socket.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index eaaba3510e81..dbfdfa96d29b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -358,7 +358,7 @@ static const struct dentry_operations sockfs_dentry_operations = { static int sock_alloc_file(struct socket *sock, struct file **f, int flags) { struct qstr name = { .name = "" }; - struct dentry *dentry; + struct path path; struct file *file; int fd; @@ -366,28 +366,29 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); - if (unlikely(!dentry)) { + path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + if (unlikely(!path.dentry)) { put_unused_fd(fd); return -ENOMEM; } + path.mnt = mntget(sock_mnt); - dentry->d_op = &sockfs_dentry_operations; + path.dentry->d_op = &sockfs_dentry_operations; /* * We dont want to push this dentry into global dentry hash table. * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED * This permits a working /proc/$pid/fd/XXX on sockets */ - dentry->d_flags &= ~DCACHE_UNHASHED; - d_instantiate(dentry, SOCK_INODE(sock)); + path.dentry->d_flags &= ~DCACHE_UNHASHED; + d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; - file = alloc_file(sock_mnt, dentry, FMODE_READ | FMODE_WRITE, + file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &socket_file_ops); if (unlikely(!file)) { /* drop dentry, keep inode */ atomic_inc(&path.dentry->d_inode->i_count); - dput(dentry); + path_put(&path); put_unused_fd(fd); return -ENFILE; } -- cgit v1.2.2 From a3a065e3f13da8a3470ed09c7f38aad256083726 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Nov 2009 05:30:19 +0100 Subject: fs: no games with DCACHE_UNHASHED Filesystems outside the regular namespace do not have to clear DCACHE_UNHASHED in order to have a working /proc/$pid/fd/XXX. Nothing in proc prevents the fd link from being used if its dentry is not in the hash. Also, it does not get put into the dcache hash if DCACHE_UNHASHED is clear; that depends on the filesystem calling d_add or d_rehash. So delete the misleading comments and needless code. Acked-by: Miklos Szeredi Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- net/socket.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index dbfdfa96d29b..769c386bd428 100644 --- a/net/socket.c +++ b/net/socket.c @@ -312,18 +312,6 @@ static struct file_system_type sock_fs_type = { .kill_sb = kill_anon_super, }; -static int sockfs_delete_dentry(struct dentry *dentry) -{ - /* - * At creation time, we pretended this dentry was hashed - * (by clearing DCACHE_UNHASHED bit in d_flags) - * At delete time, we restore the truth : not hashed. - * (so that dput() can proceed correctly) - */ - dentry->d_flags |= DCACHE_UNHASHED; - return 0; -} - /* * sockfs_dname() is called from d_path(). */ @@ -334,7 +322,6 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) } static const struct dentry_operations sockfs_dentry_operations = { - .d_delete = sockfs_delete_dentry, .d_dname = sockfs_dname, }; @@ -374,12 +361,6 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) path.mnt = mntget(sock_mnt); path.dentry->d_op = &sockfs_dentry_operations; - /* - * We dont want to push this dentry into global dentry hash table. - * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED - * This permits a working /proc/$pid/fd/XXX on sockets - */ - path.dentry->d_flags &= ~DCACHE_UNHASHED; d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; -- cgit v1.2.2 From 971beb83aeb2a309175682cf5683d64fd4591841 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 7 Dec 2009 14:23:21 +0100 Subject: Bluetooth: Fix PTR_ERR return of wrong pointer in hidp_setup_hid() Return the PTR_ERR of the correct pointer. Signed-off-by: Roel Kluin Signed-off-by: Marcel Holtmann --- net/bluetooth/hidp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 569750010fd3..18e7f5a43dc4 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -770,7 +770,7 @@ static int hidp_setup_hid(struct hidp_session *session, hid = hid_allocate_device(); if (IS_ERR(hid)) - return PTR_ERR(session->hid); + return PTR_ERR(hid); session->hid = hid; session->req = req; -- cgit v1.2.2 From 186de9a33803c7ee20d9af75c9049b50e68a3a08 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 15 Dec 2009 15:56:34 -0200 Subject: Bluetooth: Fix unset of RemoteBusy flag for L2CAP RemoteBusy flag need to be unset before l2cap_ertm_send(), otherwise l2cap_ertm_send() will return without sending packets because it checks that flag before start sending. Signed-off-by: Gustavo F. Padovan Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 5129b88c8e5b..7db9a1f8f882 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -3435,8 +3435,8 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str (pi->unacked_frames > 0)) __mod_retrans_timer(); - l2cap_ertm_send(sk); pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; + l2cap_ertm_send(sk); } break; -- cgit v1.2.2 From 186ee8cf0130993dea8ab8867ff1af8a148f9ae6 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 15 Dec 2009 20:13:27 -0200 Subject: Bluetooth: Ack L2CAP I-frames before retransmit missing packet Moving the Ack to before l2cap_retransmit_frame() we can avoid the case where txWindow is full and the packet can't be retransmited. Signed-off-by: Gustavo F. Padovan Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 7db9a1f8f882..fb0f81d99f96 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -3471,9 +3471,9 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; if (rx_control & L2CAP_CTRL_POLL) { - l2cap_retransmit_frame(sk, tx_seq); pi->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(sk); + l2cap_retransmit_frame(sk, tx_seq); l2cap_ertm_send(sk); if (pi->conn_state & L2CAP_CONN_WAIT_F) { pi->srej_save_reqseq = tx_seq; -- cgit v1.2.2 From b13f5860447a98daf0358a51fbff66154ac0663a Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 15 Dec 2009 11:38:04 +0200 Subject: Bluetooth: Fix L2CAP locking scheme regression When locking was introduced the error path branch was not taken into account. Error was found in sparse code checking. Kudos to Jani Nikula. Signed-off-by: Andrei Emeltchenko Acked-by: Gustavo F. Padovan Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index fb0f81d99f96..1120cf14a548 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1212,6 +1212,7 @@ static void l2cap_monitor_timeout(unsigned long arg) bh_lock_sock(sk); if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) { l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk); + bh_unlock_sock(sk); return; } -- cgit v1.2.2 From 9c69fabe789b0eb468a0c7031ae7bb850760aea8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 18 Dec 2009 20:11:03 -0800 Subject: netns: fix net.ipv6.route.gc_min_interval_ms in netns sysctl table was copied, all right, but ->data for net.ipv6.route.gc_min_interval_ms was not reinitialized for "!= &init_net" case. In init_net everthing works by accident due to correct ->data initialization in source table. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index db3b27303890..c2bd74c5f8d9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2630,6 +2630,7 @@ struct ctl_table *ipv6_route_sysctl_init(struct net *net) table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; + table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; } return table; -- cgit v1.2.2 From 3705e11a21bcdffe7422ee7870e1b23fe4ac70f4 Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Fri, 18 Dec 2009 20:25:13 -0800 Subject: ipv6: fix an oops when force unload ipv6 module When I do an ipv6 module force unload,I got the following oops: #rmmod -f ipv6 ------------[ cut here ]------------ kernel BUG at mm/slub.c:2969! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/devices/pci0000:00/0000:00:11.0/0000:02:03.0/net/eth2/ifindex Modules linked in: ipv6(-) dm_multipath uinput ppdev tpm_tis tpm tpm_bios pcspkr pcnet32 mii parport_pc i2c_piix4 parport i2c_core floppy mptspi mptscsih mptbase scsi_transport_spi Pid: 2530, comm: rmmod Tainted: G R 2.6.32 #2 440BX Desktop Reference Platform/VMware Virtual Platform EIP: 0060:[] EFLAGS: 00010246 CPU: 0 EIP is at kfree+0x6a/0xdd EAX: 00000000 EBX: c09e86bc ECX: c043e4dd EDX: c14293e0 ESI: e141f1d8 EDI: e140fc31 EBP: dec58ef0 ESP: dec58ed0 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process rmmod (pid: 2530, ti=dec58000 task=decb1940 task.ti=dec58000) Stack: c14293e0 00000282 df624240 c0897d08 c09e86bc c09e86bc e141f1d8 dec58f1c <0> dec58f00 e140fc31 c09e84c4 e141f1bc dec58f14 c0689d21 dec58f1c e141f1bc <0> 00000000 dec58f2c c0689eff c09e84d8 c09e84d8 e141f1bc bff33a90 dec58f38 Call Trace: [] ? ipv6_frags_exit_net+0x22/0x32 [ipv6] [] ? ops_exit_list+0x19/0x3d [] ? unregister_pernet_operations+0x2a/0x51 [] ? unregister_pernet_subsys+0x17/0x24 [] ? ipv6_frag_exit+0x21/0x32 [ipv6] [] ? inet6_exit+0x47/0x122 [ipv6] [] ? sys_delete_module+0x198/0x1f6 [] ? remove_vma+0x57/0x5d [] ? do_page_fault+0x2e7/0x315 [] ? sysenter_do_call+0x12/0x28 Code: 86 00 00 00 40 c1 e8 0c c1 e0 05 01 d0 89 45 e0 66 83 38 00 79 06 8b 40 0c 89 45 e0 8b 55 e0 8b 02 84 c0 78 14 66 a9 00 c0 75 04 <0f> 0b eb fe 8b 45 e0 e8 35 15 fe ff eb 5d 8b 45 04 8b 55 e0 89 EIP: [] kfree+0x6a/0xdd SS:ESP 0068:dec58ed0 ---[ end trace 4475d1a5b0afa7e5 ]--- It's because in ip6_frags_ns_sysctl_register, "table" only alloced when "net" is not equals to "init_net".So when we free "table" in ip6_frags_ns_sysctl_unregister,we should check this first. This patch fix the problem. Signed-off-by: Yang Hongyang Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3b3a95607125..2cddea3bd6be 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -708,7 +708,8 @@ static void ip6_frags_ns_sysctl_unregister(struct net *net) table = net->ipv6.sysctl.frags_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr); - kfree(table); + if (!net_eq(net, &init_net)) + kfree(table); } static struct ctl_table_header *ip6_ctl_header; -- cgit v1.2.2 From 9a418af5df03ad133cd8c8f6742b75e542db6392 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 17 Dec 2009 13:55:48 +0100 Subject: mac80211: fix peer HT capabilities I noticed yesterday, because Jeff had noticed a speed regression, cf. bug http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2138 that the SM PS settings for peers were wrong. Instead of overwriting the SM PS settings with the local bits, we need to keep the remote bits. The bug was part of the original HT code from over two years ago, but unfortunately nobody noticed that it makes no sense -- we shouldn't be overwriting the peer's setting with our own but rather keep it intact when masking the peer capabilities with our own. While fixing that, I noticed that the masking of capabilities is completely useless for most of the bits, so also fix those other bits. Finally, I also noticed that PSMP_SUPPORT no longer exists in the final 802.11n version, so also remove that. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ht.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 3787455fb696..d7dcee680728 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -34,9 +34,28 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, ht_cap->ht_supported = true; - ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & sband->ht_cap.cap; - ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS; - ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS; + /* + * The bits listed in this expression should be + * the same for the peer and us, if the station + * advertises more then we can't use those thus + * we mask them out. + */ + ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & + (sband->ht_cap.cap | + ~(IEEE80211_HT_CAP_LDPC_CODING | + IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_GRN_FLD | + IEEE80211_HT_CAP_SGI_20 | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_DSSSCCK40)); + /* + * The STBC bits are asymmetric -- if we don't have + * TX then mask out the peer's RX and vice versa. + */ + if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC)) + ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC; + if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC)) + ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC; ampdu_info = ht_cap_ie->ampdu_params_info; ht_cap->ampdu_factor = -- cgit v1.2.2 From 0183826b58a2712ffe608bc3302447be3e6a3ab8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 17 Dec 2009 16:16:53 +0100 Subject: mac80211: fix WMM AP settings application My commit 77fdaa12cea26c204cc12c312fe40bc0f3dcdfd8 Author: Johannes Berg Date: Tue Jul 7 03:45:17 2009 +0200 mac80211: rework MLME for multiple authentications inadvertedly broke WMM because it removed, along with a bunch of other now useless initialisations, the line initialising sdata->u.mgd.wmm_last_param_set to -1 which would make it adopt any WMM parameter set. If, as is usually the case, the AP uses WMM parameter set sequence number zero, we'd never update it until the AP changes the sequence number. Add the missing initialisation back to get the WMM settings from the AP applied locally. Signed-off-by: Johannes Berg Cc: stable@kernel.org [2.6.31+] Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d8d50fb5e823..c79e59f82fd9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -915,6 +915,14 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL | IEEE80211_STA_BEACON_POLL); + /* + * Always handle WMM once after association regardless + * of the first value the AP uses. Setting -1 here has + * that effect because the AP values is an unsigned + * 4-bit value. + */ + sdata->u.mgd.wmm_last_param_set = -1; + ieee80211_led_assoc(local, 1); sdata->vif.bss_conf.assoc = 1; -- cgit v1.2.2 From 45465487897a1c6d508b14b904dc5777f7ec7e04 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Mon, 21 Dec 2009 14:37:26 -0800 Subject: kfifo: move struct kfifo in place This is a new generic kernel FIFO implementation. The current kernel fifo API is not very widely used, because it has to many constrains. Only 17 files in the current 2.6.31-rc5 used it. FIFO's are like list's a very basic thing and a kfifo API which handles the most use case would save a lot of development time and memory resources. I think this are the reasons why kfifo is not in use: - The API is to simple, important functions are missing - A fifo can be only allocated dynamically - There is a requirement of a spinlock whether you need it or not - There is no support for data records inside a fifo So I decided to extend the kfifo in a more generic way without blowing up the API to much. The new API has the following benefits: - Generic usage: For kernel internal use and/or device driver. - Provide an API for the most use case. - Slim API: The whole API provides 25 functions. - Linux style habit. - DECLARE_KFIFO, DEFINE_KFIFO and INIT_KFIFO Macros - Direct copy_to_user from the fifo and copy_from_user into the fifo. - The kfifo itself is an in place member of the using data structure, this save an indirection access and does not waste the kernel allocator. - Lockless access: if only one reader and one writer is active on the fifo, which is the common use case, no additional locking is necessary. - Remove spinlock - give the user the freedom of choice what kind of locking to use if one is required. - Ability to handle records. Three type of records are supported: - Variable length records between 0-255 bytes, with a record size field of 1 bytes. - Variable length records between 0-65535 bytes, with a record size field of 2 bytes. - Fixed size records, which no record size field. - Preserve memory resource. - Performance! - Easy to use! This patch: Since most users want to have the kfifo as part of another object, reorganize the code to allow including struct kfifo in another data structure. This requires changing the kfifo_alloc and kfifo_init prototypes so that we pass an existing kfifo pointer into them. This patch changes the implementation and all existing users. [akpm@linux-foundation.org: fix warning] Signed-off-by: Stefani Seibold Acked-by: Greg Kroah-Hartman Acked-by: Mauro Carvalho Chehab Acked-by: Andi Kleen Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/dccp/probe.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index dc328425fa20..6230ceb0823e 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -43,7 +43,7 @@ static int bufsize = 64 * 1024; static const char procname[] = "dccpprobe"; static struct { - struct kfifo *fifo; + struct kfifo fifo; spinlock_t lock; wait_queue_head_t wait; struct timespec tstart; @@ -67,7 +67,7 @@ static void printl(const char *fmt, ...) len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); va_end(args); - kfifo_put(dccpw.fifo, tbuf, len); + kfifo_put(&dccpw.fifo, tbuf, len); wake_up(&dccpw.wait); } @@ -109,7 +109,7 @@ static struct jprobe dccp_send_probe = { static int dccpprobe_open(struct inode *inode, struct file *file) { - kfifo_reset(dccpw.fifo); + kfifo_reset(&dccpw.fifo); getnstimeofday(&dccpw.tstart); return 0; } @@ -131,11 +131,11 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf, return -ENOMEM; error = wait_event_interruptible(dccpw.wait, - __kfifo_len(dccpw.fifo) != 0); + __kfifo_len(&dccpw.fifo) != 0); if (error) goto out_free; - cnt = kfifo_get(dccpw.fifo, tbuf, len); + cnt = kfifo_get(&dccpw.fifo, tbuf, len); error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; out_free: @@ -156,10 +156,8 @@ static __init int dccpprobe_init(void) init_waitqueue_head(&dccpw.wait); spin_lock_init(&dccpw.lock); - dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock); - if (IS_ERR(dccpw.fifo)) - return PTR_ERR(dccpw.fifo); - + if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock)) + return ret; if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; @@ -172,14 +170,14 @@ static __init int dccpprobe_init(void) err1: proc_net_remove(&init_net, procname); err0: - kfifo_free(dccpw.fifo); + kfifo_free(&dccpw.fifo); return ret; } module_init(dccpprobe_init); static __exit void dccpprobe_exit(void) { - kfifo_free(dccpw.fifo); + kfifo_free(&dccpw.fifo); proc_net_remove(&init_net, procname); unregister_jprobe(&dccp_send_probe); -- cgit v1.2.2 From c1e13f25674ed564948ecb7dfe5f83e578892896 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Mon, 21 Dec 2009 14:37:27 -0800 Subject: kfifo: move out spinlock Move the pointer to the spinlock out of struct kfifo. Most users in tree do not actually use a spinlock, so the few exceptions now have to call kfifo_{get,put}_locked, which takes an extra argument to a spinlock. Signed-off-by: Stefani Seibold Acked-by: Greg Kroah-Hartman Acked-by: Mauro Carvalho Chehab Acked-by: Andi Kleen Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/dccp/probe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 6230ceb0823e..c6b50351aa78 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -67,7 +67,7 @@ static void printl(const char *fmt, ...) len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); va_end(args); - kfifo_put(&dccpw.fifo, tbuf, len); + kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); wake_up(&dccpw.wait); } @@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf, if (error) goto out_free; - cnt = kfifo_get(&dccpw.fifo, tbuf, len); + cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; out_free: @@ -156,7 +156,7 @@ static __init int dccpprobe_init(void) init_waitqueue_head(&dccpw.wait); spin_lock_init(&dccpw.lock); - if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock)) + if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL)) return ret; if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; -- cgit v1.2.2 From e64c026dd09b73faf20707711402fc5ed55a8e70 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Mon, 21 Dec 2009 14:37:28 -0800 Subject: kfifo: cleanup namespace change name of __kfifo_* functions to kfifo_*, because the prefix __kfifo should be reserved for internal functions only. Signed-off-by: Stefani Seibold Acked-by: Greg Kroah-Hartman Acked-by: Mauro Carvalho Chehab Acked-by: Andi Kleen Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/dccp/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index c6b50351aa78..9ef36849edd7 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -131,7 +131,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf, return -ENOMEM; error = wait_event_interruptible(dccpw.wait, - __kfifo_len(&dccpw.fifo) != 0); + kfifo_len(&dccpw.fifo) != 0); if (error) goto out_free; -- cgit v1.2.2 From 7acd72eb85f1c7a15e8b5eb554994949241737f1 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Mon, 21 Dec 2009 14:37:28 -0800 Subject: kfifo: rename kfifo_put... into kfifo_in... and kfifo_get... into kfifo_out... rename kfifo_put... into kfifo_in... to prevent miss use of old non in kernel-tree drivers ditto for kfifo_get... -> kfifo_out... Improve the prototypes of kfifo_in and kfifo_out to make the kerneldoc annotations more readable. Add mini "howto porting to the new API" in kfifo.h Signed-off-by: Stefani Seibold Acked-by: Greg Kroah-Hartman Acked-by: Mauro Carvalho Chehab Acked-by: Andi Kleen Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/dccp/probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 9ef36849edd7..a1362dc8abb0 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -67,7 +67,7 @@ static void printl(const char *fmt, ...) len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); va_end(args); - kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); + kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); wake_up(&dccpw.wait); } @@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf, if (error) goto out_free; - cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); + cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; out_free: -- cgit v1.2.2 From f466dba1832f05006cf6caa9be41fb98d11cb848 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 23 Dec 2009 22:02:57 -0800 Subject: pktgen: ndo_start_xmit can return NET_XMIT_xxx values This updates pktgen so that it does not decrement skb->users when it receives valid NET_XMIT_xxx values. These are now valid return values from ndo_start_xmit in net-next-2.6. They also indicate the skb has been consumed. This fixes pktgen to work correctly with vlan devices. Signed-off-by: John Fastabend Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/core/pktgen.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a23b45f08ec9..de0c2c726420 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -250,8 +250,7 @@ struct pktgen_dev { __u64 count; /* Default No packets to send */ __u64 sofar; /* How many pkts we've sent so far */ __u64 tx_bytes; /* How many bytes we've transmitted */ - __u64 errors; /* Errors when trying to transmit, - pkts will be re-sent */ + __u64 errors; /* Errors when trying to transmit, */ /* runtime counters relating to clone_skb */ @@ -3465,6 +3464,12 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->seq_num++; pkt_dev->tx_bytes += pkt_dev->last_pkt_size; break; + case NET_XMIT_DROP: + case NET_XMIT_CN: + case NET_XMIT_POLICED: + /* skb has been consumed */ + pkt_dev->errors++; + break; default: /* Drivers are not supposed to return other values! */ if (net_ratelimit()) pr_info("pktgen: %s xmit error: %d\n", -- cgit v1.2.2 From 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Fri, 25 Dec 2009 17:30:22 -0800 Subject: net: restore ip source validation when using policy routing and the skb mark: there are cases where a back path validation requires us to use a different routing table for src ip validation than the one used for mapping ingress dst ip. One such a case is transparent proxying where we pretend to be the destination system and therefore the local table is used for incoming packets but possibly a main table would be used on outbound. Make the default behavior to allow the above and if users need to turn on the symmetry via sysctl src_valid_mark Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 1 + net/ipv4/fib_frontend.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5cdbc102a418..040c4f05b653 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1397,6 +1397,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, "accept_source_route"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), + DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3323168ee52d..82dbf711d6d0 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); + if (mark && !IN_DEV_SRC_VMARK(in_dev)) + fl.mark = 0; } rcu_read_unlock(); -- cgit v1.2.2 From 96c5340147584481ef0c0afbb5423f7563c1d24a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 25 Dec 2009 23:30:02 +0000 Subject: NET: XFRM: Fix spelling of neighbour. Signed-off-by: Ralf Baechle net/xfrm/xfrm_policy.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cb81ca35b0d6..4725a549ad4d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1445,7 +1445,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (!dev) goto free_dst; - /* Copy neighbout for reachability confirmation */ + /* Copy neighbour for reachability confirmation */ dst0->neighbour = neigh_clone(dst->neighbour); xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); -- cgit v1.2.2 From 2e10d330f8d5f039fa1e00baf59435ab0f11c722 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 20 Dec 2009 19:07:09 +0100 Subject: mac80211: fix ibss join with fixed-bssid When fixed bssid is requested when joining an ibss network, incoming beacons that match the configured bssid cause mac80211 to create new sta entries, even before the ibss interface is in joined state. When that happens, it fails to bring up the interface entirely, because it checks for existing sta entries before joining. This patch fixes this bug by refusing to create sta info entries before the interface is fully operational. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 10d13856f86c..1f2db647bb5c 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -382,6 +382,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, u8 *bssid,u8 *addr, u32 supp_rates) { + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta; int band = local->hw.conf.channel->band; @@ -397,6 +398,9 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, return NULL; } + if (ifibss->state == IEEE80211_IBSS_MLME_SEARCH) + return NULL; + if (compare_ether_addr(bssid, sdata->u.ibss.bssid)) return NULL; -- cgit v1.2.2 From 3bdb2d48c5f58c781a4099c99044384a23620884 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Dec 2009 13:12:05 +0100 Subject: cfg80211: fix race between deauth and assoc response Joseph Nahmias reported, in http://bugs.debian.org/562016, that he was getting the following warning (with some log around the issue): ath0: direct probe to AP 00:11:95:77:e0:b0 (try 1) ath0: direct probe responded ath0: authenticate with AP 00:11:95:77:e0:b0 (try 1) ath0: authenticated ath0: associate with AP 00:11:95:77:e0:b0 (try 1) ath0: deauthenticating from 00:11:95:77:e0:b0 by local choice (reason=3) ath0: direct probe to AP 00:11:95:77:e0:b0 (try 1) ath0: RX AssocResp from 00:11:95:77:e0:b0 (capab=0x421 status=0 aid=2) ath0: associated ------------[ cut here ]------------ WARNING: at net/wireless/mlme.c:97 cfg80211_send_rx_assoc+0x14d/0x152 [cfg80211]() Hardware name: 7658CTO ... Pid: 761, comm: phy0 Not tainted 2.6.32-trunk-686 #1 Call Trace: [] ? warn_slowpath_common+0x5e/0x8a [] ? warn_slowpath_null+0xa/0xc [] ? cfg80211_send_rx_assoc+0x14d/0x152 ... ath0: link becomes ready ath0: deauthenticating from 00:11:95:77:e0:b0 by local choice (reason=3) ath0: no IPv6 routers present ath0: link is not ready ath0: direct probe to AP 00:11:95:77:e0:b0 (try 1) ath0: direct probe responded ath0: authenticate with AP 00:11:95:77:e0:b0 (try 1) ath0: authenticated ath0: associate with AP 00:11:95:77:e0:b0 (try 1) ath0: RX ReassocResp from 00:11:95:77:e0:b0 (capab=0x421 status=0 aid=2) ath0: associated It is not clear to me how the first "direct probe" here happens, but this seems to be a race condition, if the user requests to deauth after requesting assoc, but before the assoc response is received. In that case, it may happen that mac80211 tries to report the assoc success to cfg80211, but gets blocked on the wdev lock that is held because the user is requesting the deauth. The result is that we run into a warning. This is mostly harmless, but maybe cause an unexpected event to be sent to userspace; we'd send an assoc success event although userspace was no longer expecting that. To fix this, remove the warning and check whether the race happened and in that case abort processing. Reported-by: Joseph Nahmias Cc: stable@kernel.org Cc: 562016-quiet@bugs.debian.org Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/mlme.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 1001db4912f7..82e6002c8d67 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -93,7 +93,18 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len) } } - WARN_ON(!bss); + /* + * We might be coming here because the driver reported + * a successful association at the same time as the + * user requested a deauth. In that case, we will have + * removed the BSS from the auth_bsses list due to the + * deauth request when the assoc response makes it. If + * the two code paths acquire the lock the other way + * around, that's just the standard situation of a + * deauth being requested while connected. + */ + if (!bss) + goto out; } else if (wdev->conn) { cfg80211_sme_failed_assoc(wdev); /* -- cgit v1.2.2 From 65486c8b30498dd274eea2c542696f22b63fe5b8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Dec 2009 15:33:35 +0100 Subject: cfg80211: fix error path in cfg80211_wext_siwscan If there's an invalid channel or SSID, the code leaks the scan request. Always free the scan request, unless it was successfully given to the driver. Reported-by: Dan Carpenter Signed-off-by: Johannes Berg Acked-by: Dan Carpenter Signed-off-by: John W. Linville --- net/wireless/scan.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 12dfa62aad18..0c2cbbebca95 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -601,7 +601,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, struct cfg80211_registered_device *rdev; struct wiphy *wiphy; struct iw_scan_req *wreq = NULL; - struct cfg80211_scan_request *creq; + struct cfg80211_scan_request *creq = NULL; int i, err, n_channels = 0; enum ieee80211_band band; @@ -694,8 +694,10 @@ int cfg80211_wext_siwscan(struct net_device *dev, /* translate "Scan for SSID" request */ if (wreq) { if (wrqu->data.flags & IW_SCAN_THIS_ESSID) { - if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) - return -EINVAL; + if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) { + err = -EINVAL; + goto out; + } memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len); creq->ssids[0].ssid_len = wreq->essid_len; } @@ -707,12 +709,15 @@ int cfg80211_wext_siwscan(struct net_device *dev, err = rdev->ops->scan(wiphy, dev, creq); if (err) { rdev->scan_req = NULL; - kfree(creq); + /* creq will be freed below */ } else { nl80211_send_scan_start(rdev, dev); + /* creq now owned by driver */ + creq = NULL; dev_hold(dev); } out: + kfree(creq); cfg80211_unlock_rdev(rdev); return err; } -- cgit v1.2.2 From b98c06b6debfe84c90200143bb1102f312f50a33 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 24 Dec 2009 15:26:09 -0500 Subject: mac80211: fix race with suspend and dynamic_ps_disable_work When mac80211 suspends it calls a driver's suspend callback as a last step and after that the driver assumes no calls will be made to it until we resume and its start callback is kicked. If such calls are made, however, suspend can end up throwing hardware in an unexpected state and making the device unusable upon resume. Fix this by preventing mac80211 to schedule dynamic_ps_disable_work by checking for when mac80211 starts to suspend and starts quiescing. Frames should be allowed to go through though as that is part of the quiescing steps and we do not flush the mac80211 workqueue since it was already done towards the beginning of suspend cycle. The other mac80211 issue will be hanled in the next patch. For further details see refer to the thread: http://marc.info/?t=126144866100001&r=1&w=2 Cc: stable@kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/mac80211/tx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8834cc93c716..27ceaefd7bc8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1419,6 +1419,10 @@ static bool need_dynamic_ps(struct ieee80211_local *local) if (!local->ps_sdata) return false; + /* No point if we're going to suspend */ + if (local->quiescing) + return false; + return true; } -- cgit v1.2.2 From 24feda0084722189468a65e20019cdd8ef99702b Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 24 Dec 2009 15:38:22 -0500 Subject: mac80211: fix propagation of failed hardware reconfigurations mac80211 does not propagate failed hardware reconfiguration requests. For suspend and resume this is important due to all the possible issues that can come out of the suspend <-> resume cycle. Not propagating the error means cfg80211 will assume the resume for the device went through fine and mac80211 will continue on trying to poke at the hardware, enable timers, queue work, and so on for a device which is completley unfunctional. The least we can do is to propagate device start issues and warn when this occurs upon resume. A side effect of this patch is we also now propagate the start errors upon harware reconfigurations (non-suspend), but this should also be desirable anyway, there is not point in continuing to reconfigure a device if mac80211 was unable to start the device. For further details refer to the thread: http://marc.info/?t=126151038700001&r=1&w=2 Cc: stable@kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/mac80211/util.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 78a6e924c7e1..dc76267e436e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1039,7 +1039,19 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* restart hardware */ if (local->open_count) { + /* + * Upon resume hardware can sometimes be goofy due to + * various platform / driver / bus issues, so restarting + * the device may at times not work immediately. Propagate + * the error. + */ res = drv_start(local); + if (res) { + WARN(local->suspended, "Harware became unavailable " + "upon resume. This is could be a software issue" + "prior to suspend or a harware issue\n"); + return res; + } ieee80211_led_radio(local, true); } -- cgit v1.2.2 From b292cf9ce70d221c3f04ff62db5ab13d9a249ca8 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Thu, 31 Dec 2009 10:52:36 +0800 Subject: sunrpc: fix peername failed on closed listener There're some warnings of "nfsd: peername failed (err 107)!" socket error -107 means Transport endpoint is not connected. This warning message was outputed by svc_tcp_accept() [net/sunrpc/svcsock.c], when kernel_getpeername returns -107. This means socket might be CLOSED. And svc_tcp_accept was called by svc_recv() [net/sunrpc/svc_xprt.c] if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { newxpt = xprt->xpt_ops->xpo_accept(xprt); So this might happen when xprt->xpt_flags has both XPT_LISTENER and XPT_CLOSE. Let's take a look at commit b0401d72, this commit has moved the close processing after do recvfrom method, but this commit also introduces this warnings, if the xpt_flags has both XPT_LISTENER and XPT_CLOSED, we should close it, not accpet then close. Signed-off-by: Xiaotian Feng Cc: J. Bruce Fields Cc: Neil Brown Cc: Trond Myklebust Cc: David S. Miller Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2c58b75a236f..810ffe8a636b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -699,7 +699,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&pool->sp_lock); len = 0; - if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + if (test_bit(XPT_LISTENER, &xprt->xpt_flags) && + !test_bit(XPT_CLOSE, &xprt->xpt_flags)) { struct svc_xprt *newxpt; newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) { -- cgit v1.2.2