From 6f3d09291b4982991680b61763b2541e53e2a95f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 19 Mar 2008 01:44:24 +0100 Subject: sched, net: socket wakeups are sync 'sync' wakeups are a hint towards the scheduler that (certain) networking related wakeups likely create coupling between tasks. Signed-off-by: Ingo Molnar --- net/core/sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 09cb3a74de7f..2654c147c004 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1621,7 +1621,7 @@ static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_sync(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); } @@ -1635,7 +1635,7 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_sync(sk->sk_sleep); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) -- cgit v1.2.2 From d3073779f8362d64b804882f5f41c208c4a5e11e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 24 Mar 2008 12:03:03 -0400 Subject: SVCRDMA: Use only 1 RDMA read scatter entry for iWARP adapters The iWARP protocol limits RDMA read requests to a single scatter entry. NFS/RDMA has code in rdma_read_max_sge() that is supposed to limit the sge_count for RDMA read requests to 1, but the code to do that is inside an #ifdef RDMA_TRANSPORT_IWARP block. In the mainline kernel at least, RDMA_TRANSPORT_IWARP is an enum and not a preprocessor #define, so the #ifdef'ed code is never compiled. In my test of a kernel build with -j8 on an NFS/RDMA mount, this problem eventually leads to trouble starting with: svcrdma: Error posting send = -22 svcrdma : RDMA_READ error = -22 and things go downhill from there. The trivial fix is to delete the #ifdef guard. The check seems to be a remnant of when the NFS/RDMA code was not merged and needed to compile against multiple kernel versions, although I don't think it ever worked as intended. In any case now that the code is upstream there's no need to test whether the RDMA_TRANSPORT_IWARP constant is defined or not. Without this patch, my kernel build on an NFS/RDMA mount using NetEffect adapters quickly and 100% reproducibly failed with an error like: ld: final link failed: Software caused connection abort With the patch applied I was able to complete a kernel build on the same setup. (Tom Tucker says this is "actually an _ancient_ remnant when it had to compile against iWARP vs. non-iWARP enabled OFA trees.") Signed-off-by: Roland Dreier Acked-by: Tom Tucker Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ab54a736486e..971271602dd0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -237,14 +237,12 @@ static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt, static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) { -#ifdef RDMA_TRANSPORT_IWARP if ((RDMA_TRANSPORT_IWARP == rdma_node_get_transport(xprt->sc_cm_id-> device->node_type)) && sge_count > 1) return 1; else -#endif return min_t(int, sge_count, xprt->sc_max_sge); } -- cgit v1.2.2 From c8237a5fcea9d49a73275b4c8f541dd42f8da1a4 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Tue, 25 Mar 2008 22:27:19 -0400 Subject: SVCRDMA: Check num_sge when setting LAST_CTXT bit The RDMACTXT_F_LAST_CTXT bit was getting set incorrectly when the last chunk in the read-list spanned multiple pages. This resulted in a kernel panic when the wrong context was used to build the RPC iovec page list. RDMA_READ is used to fetch RPC data from the client for NFS_WRITE requests. A scatter-gather is used to map the advertised client side buffer to the server-side iovec and associated page list. WR contexts are used to convey which scatter-gather entries are handled by each WR. When the write data is large, a single RPC may require multiple RDMA_READ requests so the contexts for a single RPC are chained together in a linked list. The last context in this list is marked with a bit RDMACTXT_F_LAST_CTXT so that when this WR completes, the CQ handler code can enqueue the RPC for processing. The code in rdma_read_xdr was setting this bit on the last two contexts on this list when the last read-list chunk spanned multiple pages. This caused the svc_rdma_recvfrom logic to incorrectly build the RPC and caused the kernel to crash because the second-to-last context doesn't contain the iovec page list. Modified the condition that sets this bit so that it correctly detects the last context for the RPC. Signed-off-by: Tom Tucker Tested-by: Roland Dreier Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 971271602dd0..c22d6b6f2db4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -322,15 +322,6 @@ next_sge: ctxt->direction = DMA_FROM_DEVICE; clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - if ((ch+1)->rc_discrim == 0) { - /* - * Checked in sq_cq_reap to see if we need to - * be enqueued - */ - set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - ctxt->next = hdr_ctxt; - hdr_ctxt->next = head; - } /* Prepare READ WR */ memset(&read_wr, 0, sizeof read_wr); @@ -348,7 +339,17 @@ next_sge: rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start], &sgl_offset, read_wr.num_sge); - + if (((ch+1)->rc_discrim == 0) && + (read_wr.num_sge == ch_sge_ary[ch_no].count)) { + /* + * Mark the last RDMA_READ with a bit to + * indicate all RPC data has been fetched from + * the client and the RPC needs to be enqueued. + */ + set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + ctxt->next = hdr_ctxt; + hdr_ctxt->next = head; + } /* Post the read */ err = svc_rdma_send(xprt, &read_wr); if (err) { -- cgit v1.2.2 From 3387b804d8850494bdf91d16800925a3fd46e37d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 28 Mar 2008 14:15:57 -0700 Subject: net/9p/trans_fd.c:p9_trans_fd_init(): module_init functions should return 0 on success Mar 23 09:06:31 opensuse103 kernel: Installing 9P2000 support Mar 23 09:06:31 opensuse103 kernel: sys_init_module: '9pnet_fd'->init suspiciously returned 1, it should follow 0/-E convention Mar 23 09:06:31 opensuse103 kernel: sys_init_module: loading module anyway... Mar 23 09:06:31 opensuse103 kernel: Pid: 5323, comm: modprobe Not tainted 2.6.25-rc6-git7-default #1 Mar 23 09:06:31 opensuse103 kernel: [] sys_init_module+0x172b/0x17c9 Mar 23 09:06:31 opensuse103 kernel: [] sys_mmap2+0x62/0x77 Mar 23 09:06:31 opensuse103 kernel: [] sysenter_past_esp+0x6d/0xa9 Mar 23 09:06:31 opensuse103 kernel: ======================= Cc: Latchesar Ionkov Cc: Eric Van Hensbergen Cc: David S. Miller Cc: "Rafael J. Wysocki" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/9p/trans_fd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 4e8d4e724b96..f624dff76852 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1520,7 +1520,7 @@ static int __init p9_trans_fd_init(void) v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); - return 1; + return 0; } module_init(p9_trans_fd_init); -- cgit v1.2.2 From 91e916cffec7c0153c5cbaa447151862a7a9a047 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 29 Mar 2008 03:08:38 +0000 Subject: net/rxrpc trivial annotations Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/rxkad.c | 27 +++++++++++++++------------ 2 files changed, 16 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1aaa2e804b0d..126ca777b410 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -23,7 +23,7 @@ struct rxrpc_crypt { union { u8 x[FCRYPT_BSIZE]; - u32 n[2]; + __be32 n[2]; }; } __attribute__((aligned(8))); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index f48434adb7c2..d1c296f2d617 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -261,6 +261,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call, __be32 x[2]; } tmpbuf __attribute__((aligned(8))); /* must all be in same page */ __be32 x; + u32 y; int ret; sp = rxrpc_skb(skb); @@ -292,11 +293,11 @@ static int rxkad_secure_packet(const struct rxrpc_call *call, sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); - x = ntohl(tmpbuf.x[1]); - x = (x >> 16) & 0xffff; - if (x == 0) - x = 1; /* zero checksums are not permitted */ - sp->hdr.cksum = htons(x); + y = ntohl(tmpbuf.x[1]); + y = (y >> 16) & 0xffff; + if (y == 0) + y = 1; /* zero checksums are not permitted */ + sp->hdr.cksum = htons(y); switch (call->conn->security_level) { case RXRPC_SECURITY_PLAIN: @@ -314,7 +315,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call, break; } - _leave(" = %d [set %hx]", ret, x); + _leave(" = %d [set %hx]", ret, y); return ret; } @@ -492,6 +493,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call, __be32 x[2]; } tmpbuf __attribute__((aligned(8))); /* must all be in same page */ __be32 x; + u16 y; __be16 cksum; int ret; @@ -526,12 +528,12 @@ static int rxkad_verify_packet(const struct rxrpc_call *call, sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); - x = ntohl(tmpbuf.x[1]); - x = (x >> 16) & 0xffff; - if (x == 0) - x = 1; /* zero checksums are not permitted */ + y = ntohl(tmpbuf.x[1]); + y = (y >> 16) & 0xffff; + if (y == 0) + y = 1; /* zero checksums are not permitted */ - cksum = htons(x); + cksum = htons(y); if (sp->hdr.cksum != cksum) { *_abort_code = RXKADSEALEDINCON; _leave(" = -EPROTO [csum failed]"); @@ -1001,7 +1003,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct rxrpc_crypt session_key; time_t expiry; void *ticket; - u32 abort_code, version, kvno, ticket_len, csum, level; + u32 abort_code, version, kvno, ticket_len, level; + __be32 csum; int ret; _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); -- cgit v1.2.2