aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/sunrpc
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/Kconfig28
-rw-r--r--net/sunrpc/addr.c2
-rw-r--r--net/sunrpc/auth.c36
-rw-r--r--net/sunrpc/auth_generic.c2
-rw-r--r--net/sunrpc/auth_gss/Makefile9
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c60
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c44
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c3
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c2
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c40
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c247
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c186
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c267
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c127
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c55
-rw-r--r--net/sunrpc/bc_svc.c2
-rw-r--r--net/sunrpc/cache.c345
-rw-r--r--net/sunrpc/clnt.c105
-rw-r--r--net/sunrpc/netns.h19
-rw-r--r--net/sunrpc/rpc_pipe.c65
-rw-r--r--net/sunrpc/rpcb_clnt.c306
-rw-r--r--net/sunrpc/sched.c130
-rw-r--r--net/sunrpc/stats.c47
-rw-r--r--net/sunrpc/sunrpc_syms.c58
-rw-r--r--net/sunrpc/svc.c44
-rw-r--r--net/sunrpc/svc_xprt.c166
-rw-r--r--net/sunrpc/svcauth.c1
-rw-r--r--net/sunrpc/svcauth_unix.c229
-rw-r--r--net/sunrpc/svcsock.c505
-rw-r--r--net/sunrpc/xdr.c211
-rw-r--r--net/sunrpc/xprt.c68
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c86
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c55
-rw-r--r--net/sunrpc/xprtrdma/transport.c25
-rw-r--r--net/sunrpc/xprtrdma/verbs.c55
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
-rw-r--r--net/sunrpc/xprtsock.c798
41 files changed, 2390 insertions, 2153 deletions
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 3376d7657185..b2198e65d8bb 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -18,14 +18,13 @@ config SUNRPC_XPRT_RDMA
18 If unsure, say N. 18 If unsure, say N.
19 19
20config RPCSEC_GSS_KRB5 20config RPCSEC_GSS_KRB5
21 tristate 21 tristate "Secure RPC: Kerberos V mechanism"
22 depends on SUNRPC && CRYPTO 22 depends on SUNRPC && CRYPTO
23 prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4) 23 depends on CRYPTO_MD5 && CRYPTO_DES && CRYPTO_CBC && CRYPTO_CTS
24 depends on CRYPTO_ECB && CRYPTO_HMAC && CRYPTO_SHA1 && CRYPTO_AES
25 depends on CRYPTO_ARC4
24 default y 26 default y
25 select SUNRPC_GSS 27 select SUNRPC_GSS
26 select CRYPTO_MD5
27 select CRYPTO_DES
28 select CRYPTO_CBC
29 help 28 help
30 Choose Y here to enable Secure RPC using the Kerberos version 5 29 Choose Y here to enable Secure RPC using the Kerberos version 5
31 GSS-API mechanism (RFC 1964). 30 GSS-API mechanism (RFC 1964).
@@ -36,22 +35,3 @@ config RPCSEC_GSS_KRB5
36 Kerberos support should be installed. 35 Kerberos support should be installed.
37 36
38 If unsure, say Y. 37 If unsure, say Y.
39
40config RPCSEC_GSS_SPKM3
41 tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
42 depends on SUNRPC && EXPERIMENTAL
43 select SUNRPC_GSS
44 select CRYPTO
45 select CRYPTO_MD5
46 select CRYPTO_DES
47 select CRYPTO_CAST5
48 select CRYPTO_CBC
49 help
50 Choose Y here to enable Secure RPC using the SPKM3 public key
51 GSS-API mechanism (RFC 2025).
52
53 Secure RPC calls with SPKM3 require an auxiliary userspace
54 daemon which may be found in the Linux nfs-utils package
55 available from http://linux-nfs.org/.
56
57 If unsure, say N.
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 1419d0cdbbac..4195233c4914 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -151,7 +151,7 @@ static size_t rpc_pton4(const char *buf, const size_t buflen,
151 return 0; 151 return 0;
152 152
153 sin->sin_family = AF_INET; 153 sin->sin_family = AF_INET;
154 return sizeof(struct sockaddr_in);; 154 return sizeof(struct sockaddr_in);
155} 155}
156 156
157#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 157#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index e9eaaf7d43c1..cd6e4aa19dbf 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -326,10 +326,12 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
326 * Run memory cache shrinker. 326 * Run memory cache shrinker.
327 */ 327 */
328static int 328static int
329rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 329rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc)
330{ 330{
331 LIST_HEAD(free); 331 LIST_HEAD(free);
332 int res; 332 int res;
333 int nr_to_scan = sc->nr_to_scan;
334 gfp_t gfp_mask = sc->gfp_mask;
333 335
334 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 336 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
335 return (nr_to_scan == 0) ? 0 : -1; 337 return (nr_to_scan == 0) ? 0 : -1;
@@ -563,8 +565,17 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p)
563 return cred->cr_ops->crvalidate(task, p); 565 return cred->cr_ops->crvalidate(task, p);
564} 566}
565 567
568static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
569 __be32 *data, void *obj)
570{
571 struct xdr_stream xdr;
572
573 xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
574 encode(rqstp, &xdr, obj);
575}
576
566int 577int
567rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, 578rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
568 __be32 *data, void *obj) 579 __be32 *data, void *obj)
569{ 580{
570 struct rpc_cred *cred = task->tk_rqstp->rq_cred; 581 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
@@ -574,11 +585,22 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
574 if (cred->cr_ops->crwrap_req) 585 if (cred->cr_ops->crwrap_req)
575 return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); 586 return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
576 /* By default, we encode the arguments normally. */ 587 /* By default, we encode the arguments normally. */
577 return encode(rqstp, data, obj); 588 rpcauth_wrap_req_encode(encode, rqstp, data, obj);
589 return 0;
590}
591
592static int
593rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
594 __be32 *data, void *obj)
595{
596 struct xdr_stream xdr;
597
598 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
599 return decode(rqstp, &xdr, obj);
578} 600}
579 601
580int 602int
581rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, 603rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
582 __be32 *data, void *obj) 604 __be32 *data, void *obj)
583{ 605{
584 struct rpc_cred *cred = task->tk_rqstp->rq_cred; 606 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
@@ -589,13 +611,13 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
589 return cred->cr_ops->crunwrap_resp(task, decode, rqstp, 611 return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
590 data, obj); 612 data, obj);
591 /* By default, we decode the arguments normally. */ 613 /* By default, we decode the arguments normally. */
592 return decode(rqstp, data, obj); 614 return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
593} 615}
594 616
595int 617int
596rpcauth_refreshcred(struct rpc_task *task) 618rpcauth_refreshcred(struct rpc_task *task)
597{ 619{
598 struct rpc_cred *cred = task->tk_rqstp->rq_cred; 620 struct rpc_cred *cred;
599 int err; 621 int err;
600 622
601 cred = task->tk_rqstp->rq_cred; 623 cred = task->tk_rqstp->rq_cred;
@@ -658,7 +680,7 @@ out1:
658 return err; 680 return err;
659} 681}
660 682
661void __exit rpcauth_remove_module(void) 683void rpcauth_remove_module(void)
662{ 684{
663 rpc_destroy_authunix(); 685 rpc_destroy_authunix();
664 rpc_destroy_generic_auth(); 686 rpc_destroy_generic_auth();
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 43162bb3b78f..e010a015d996 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -158,7 +158,7 @@ int __init rpc_init_generic_auth(void)
158 return rpcauth_init_credcache(&generic_auth); 158 return rpcauth_init_credcache(&generic_auth);
159} 159}
160 160
161void __exit rpc_destroy_generic_auth(void) 161void rpc_destroy_generic_auth(void)
162{ 162{
163 rpcauth_destroy_credcache(&generic_auth); 163 rpcauth_destroy_credcache(&generic_auth);
164} 164}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 74a231735f67..9e4cb59ef9f0 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -4,15 +4,10 @@
4 4
5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o 5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
6 6
7auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ 7auth_rpcgss-y := auth_gss.o gss_generic_token.o \
8 gss_mech_switch.o svcauth_gss.o 8 gss_mech_switch.o svcauth_gss.o
9 9
10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o 10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 11
12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ 12rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o 13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
14
15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
16
17rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \
18 gss_spkm3_token.o
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 12c485982814..5daf6cc4faea 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -417,7 +417,7 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
417 gss_msg->msg.len += len; 417 gss_msg->msg.len += len;
418 } 418 }
419 if (mech->gm_upcall_enctypes) { 419 if (mech->gm_upcall_enctypes) {
420 len = sprintf(p, mech->gm_upcall_enctypes); 420 len = sprintf(p, "enctypes=%s ", mech->gm_upcall_enctypes);
421 p += len; 421 p += len;
422 gss_msg->msg.len += len; 422 gss_msg->msg.len += len;
423 } 423 }
@@ -520,7 +520,7 @@ gss_refresh_upcall(struct rpc_task *task)
520 warn_gssd(); 520 warn_gssd();
521 task->tk_timeout = 15*HZ; 521 task->tk_timeout = 15*HZ;
522 rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL); 522 rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
523 return 0; 523 return -EAGAIN;
524 } 524 }
525 if (IS_ERR(gss_msg)) { 525 if (IS_ERR(gss_msg)) {
526 err = PTR_ERR(gss_msg); 526 err = PTR_ERR(gss_msg);
@@ -563,10 +563,12 @@ retry:
563 if (PTR_ERR(gss_msg) == -EAGAIN) { 563 if (PTR_ERR(gss_msg) == -EAGAIN) {
564 err = wait_event_interruptible_timeout(pipe_version_waitqueue, 564 err = wait_event_interruptible_timeout(pipe_version_waitqueue,
565 pipe_version >= 0, 15*HZ); 565 pipe_version >= 0, 15*HZ);
566 if (pipe_version < 0) {
567 warn_gssd();
568 err = -EACCES;
569 }
566 if (err) 570 if (err)
567 goto out; 571 goto out;
568 if (pipe_version < 0)
569 warn_gssd();
570 goto retry; 572 goto retry;
571 } 573 }
572 if (IS_ERR(gss_msg)) { 574 if (IS_ERR(gss_msg)) {
@@ -575,13 +577,13 @@ retry:
575 } 577 }
576 inode = &gss_msg->inode->vfs_inode; 578 inode = &gss_msg->inode->vfs_inode;
577 for (;;) { 579 for (;;) {
578 prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); 580 prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE);
579 spin_lock(&inode->i_lock); 581 spin_lock(&inode->i_lock);
580 if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { 582 if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
581 break; 583 break;
582 } 584 }
583 spin_unlock(&inode->i_lock); 585 spin_unlock(&inode->i_lock);
584 if (signalled()) { 586 if (fatal_signal_pending(current)) {
585 err = -ERESTARTSYS; 587 err = -ERESTARTSYS;
586 goto out_intr; 588 goto out_intr;
587 } 589 }
@@ -1050,7 +1052,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
1050out: 1052out:
1051 if (acred->machine_cred != gss_cred->gc_machine_cred) 1053 if (acred->machine_cred != gss_cred->gc_machine_cred)
1052 return 0; 1054 return 0;
1053 return (rc->cr_uid == acred->uid); 1055 return rc->cr_uid == acred->uid;
1054} 1056}
1055 1057
1056/* 1058/*
@@ -1231,9 +1233,19 @@ out_bad:
1231 return NULL; 1233 return NULL;
1232} 1234}
1233 1235
1236static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
1237 __be32 *p, void *obj)
1238{
1239 struct xdr_stream xdr;
1240
1241 xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
1242 encode(rqstp, &xdr, obj);
1243}
1244
1234static inline int 1245static inline int
1235gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, 1246gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1236 kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj) 1247 kxdreproc_t encode, struct rpc_rqst *rqstp,
1248 __be32 *p, void *obj)
1237{ 1249{
1238 struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; 1250 struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
1239 struct xdr_buf integ_buf; 1251 struct xdr_buf integ_buf;
@@ -1249,9 +1261,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1249 offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; 1261 offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
1250 *p++ = htonl(rqstp->rq_seqno); 1262 *p++ = htonl(rqstp->rq_seqno);
1251 1263
1252 status = encode(rqstp, p, obj); 1264 gss_wrap_req_encode(encode, rqstp, p, obj);
1253 if (status)
1254 return status;
1255 1265
1256 if (xdr_buf_subsegment(snd_buf, &integ_buf, 1266 if (xdr_buf_subsegment(snd_buf, &integ_buf,
1257 offset, snd_buf->len - offset)) 1267 offset, snd_buf->len - offset))
@@ -1325,7 +1335,8 @@ out:
1325 1335
1326static inline int 1336static inline int
1327gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, 1337gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1328 kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj) 1338 kxdreproc_t encode, struct rpc_rqst *rqstp,
1339 __be32 *p, void *obj)
1329{ 1340{
1330 struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; 1341 struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
1331 u32 offset; 1342 u32 offset;
@@ -1342,9 +1353,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1342 offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; 1353 offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
1343 *p++ = htonl(rqstp->rq_seqno); 1354 *p++ = htonl(rqstp->rq_seqno);
1344 1355
1345 status = encode(rqstp, p, obj); 1356 gss_wrap_req_encode(encode, rqstp, p, obj);
1346 if (status)
1347 return status;
1348 1357
1349 status = alloc_enc_pages(rqstp); 1358 status = alloc_enc_pages(rqstp);
1350 if (status) 1359 if (status)
@@ -1394,7 +1403,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1394 1403
1395static int 1404static int
1396gss_wrap_req(struct rpc_task *task, 1405gss_wrap_req(struct rpc_task *task,
1397 kxdrproc_t encode, void *rqstp, __be32 *p, void *obj) 1406 kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
1398{ 1407{
1399 struct rpc_cred *cred = task->tk_rqstp->rq_cred; 1408 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
1400 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, 1409 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
@@ -1407,12 +1416,14 @@ gss_wrap_req(struct rpc_task *task,
1407 /* The spec seems a little ambiguous here, but I think that not 1416 /* The spec seems a little ambiguous here, but I think that not
1408 * wrapping context destruction requests makes the most sense. 1417 * wrapping context destruction requests makes the most sense.
1409 */ 1418 */
1410 status = encode(rqstp, p, obj); 1419 gss_wrap_req_encode(encode, rqstp, p, obj);
1420 status = 0;
1411 goto out; 1421 goto out;
1412 } 1422 }
1413 switch (gss_cred->gc_service) { 1423 switch (gss_cred->gc_service) {
1414 case RPC_GSS_SVC_NONE: 1424 case RPC_GSS_SVC_NONE:
1415 status = encode(rqstp, p, obj); 1425 gss_wrap_req_encode(encode, rqstp, p, obj);
1426 status = 0;
1416 break; 1427 break;
1417 case RPC_GSS_SVC_INTEGRITY: 1428 case RPC_GSS_SVC_INTEGRITY:
1418 status = gss_wrap_req_integ(cred, ctx, encode, 1429 status = gss_wrap_req_integ(cred, ctx, encode,
@@ -1494,10 +1505,19 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1494 return 0; 1505 return 0;
1495} 1506}
1496 1507
1508static int
1509gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
1510 __be32 *p, void *obj)
1511{
1512 struct xdr_stream xdr;
1513
1514 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
1515 return decode(rqstp, &xdr, obj);
1516}
1497 1517
1498static int 1518static int
1499gss_unwrap_resp(struct rpc_task *task, 1519gss_unwrap_resp(struct rpc_task *task,
1500 kxdrproc_t decode, void *rqstp, __be32 *p, void *obj) 1520 kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
1501{ 1521{
1502 struct rpc_cred *cred = task->tk_rqstp->rq_cred; 1522 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
1503 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, 1523 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
@@ -1528,7 +1548,7 @@ gss_unwrap_resp(struct rpc_task *task,
1528 cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp) 1548 cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
1529 + (savedlen - head->iov_len); 1549 + (savedlen - head->iov_len);
1530out_decode: 1550out_decode:
1531 status = decode(rqstp, p, obj); 1551 status = gss_unwrap_req_decode(decode, rqstp, p, obj);
1532out: 1552out:
1533 gss_put_ctx(ctx); 1553 gss_put_ctx(ctx);
1534 dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid, 1554 dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index 310b78e99456..c586e92bcf76 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -76,19 +76,19 @@ static int
76der_length_size( int length) 76der_length_size( int length)
77{ 77{
78 if (length < (1<<7)) 78 if (length < (1<<7))
79 return(1); 79 return 1;
80 else if (length < (1<<8)) 80 else if (length < (1<<8))
81 return(2); 81 return 2;
82#if (SIZEOF_INT == 2) 82#if (SIZEOF_INT == 2)
83 else 83 else
84 return(3); 84 return 3;
85#else 85#else
86 else if (length < (1<<16)) 86 else if (length < (1<<16))
87 return(3); 87 return 3;
88 else if (length < (1<<24)) 88 else if (length < (1<<24))
89 return(4); 89 return 4;
90 else 90 else
91 return(5); 91 return 5;
92#endif 92#endif
93} 93}
94 94
@@ -121,14 +121,14 @@ der_read_length(unsigned char **buf, int *bufsize)
121 int ret; 121 int ret;
122 122
123 if (*bufsize < 1) 123 if (*bufsize < 1)
124 return(-1); 124 return -1;
125 sf = *(*buf)++; 125 sf = *(*buf)++;
126 (*bufsize)--; 126 (*bufsize)--;
127 if (sf & 0x80) { 127 if (sf & 0x80) {
128 if ((sf &= 0x7f) > ((*bufsize)-1)) 128 if ((sf &= 0x7f) > ((*bufsize)-1))
129 return(-1); 129 return -1;
130 if (sf > SIZEOF_INT) 130 if (sf > SIZEOF_INT)
131 return (-1); 131 return -1;
132 ret = 0; 132 ret = 0;
133 for (; sf; sf--) { 133 for (; sf; sf--) {
134 ret = (ret<<8) + (*(*buf)++); 134 ret = (ret<<8) + (*(*buf)++);
@@ -138,7 +138,7 @@ der_read_length(unsigned char **buf, int *bufsize)
138 ret = sf; 138 ret = sf;
139 } 139 }
140 140
141 return(ret); 141 return ret;
142} 142}
143 143
144/* returns the length of a token, given the mech oid and the body size */ 144/* returns the length of a token, given the mech oid and the body size */
@@ -148,7 +148,7 @@ g_token_size(struct xdr_netobj *mech, unsigned int body_size)
148{ 148{
149 /* set body_size to sequence contents size */ 149 /* set body_size to sequence contents size */
150 body_size += 2 + (int) mech->len; /* NEED overflow check */ 150 body_size += 2 + (int) mech->len; /* NEED overflow check */
151 return(1 + der_length_size(body_size) + body_size); 151 return 1 + der_length_size(body_size) + body_size;
152} 152}
153 153
154EXPORT_SYMBOL_GPL(g_token_size); 154EXPORT_SYMBOL_GPL(g_token_size);
@@ -186,27 +186,27 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
186 int ret = 0; 186 int ret = 0;
187 187
188 if ((toksize-=1) < 0) 188 if ((toksize-=1) < 0)
189 return(G_BAD_TOK_HEADER); 189 return G_BAD_TOK_HEADER;
190 if (*buf++ != 0x60) 190 if (*buf++ != 0x60)
191 return(G_BAD_TOK_HEADER); 191 return G_BAD_TOK_HEADER;
192 192
193 if ((seqsize = der_read_length(&buf, &toksize)) < 0) 193 if ((seqsize = der_read_length(&buf, &toksize)) < 0)
194 return(G_BAD_TOK_HEADER); 194 return G_BAD_TOK_HEADER;
195 195
196 if (seqsize != toksize) 196 if (seqsize != toksize)
197 return(G_BAD_TOK_HEADER); 197 return G_BAD_TOK_HEADER;
198 198
199 if ((toksize-=1) < 0) 199 if ((toksize-=1) < 0)
200 return(G_BAD_TOK_HEADER); 200 return G_BAD_TOK_HEADER;
201 if (*buf++ != 0x06) 201 if (*buf++ != 0x06)
202 return(G_BAD_TOK_HEADER); 202 return G_BAD_TOK_HEADER;
203 203
204 if ((toksize-=1) < 0) 204 if ((toksize-=1) < 0)
205 return(G_BAD_TOK_HEADER); 205 return G_BAD_TOK_HEADER;
206 toid.len = *buf++; 206 toid.len = *buf++;
207 207
208 if ((toksize-=toid.len) < 0) 208 if ((toksize-=toid.len) < 0)
209 return(G_BAD_TOK_HEADER); 209 return G_BAD_TOK_HEADER;
210 toid.data = buf; 210 toid.data = buf;
211 buf+=toid.len; 211 buf+=toid.len;
212 212
@@ -217,17 +217,17 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
217 to return G_BAD_TOK_HEADER if the token header is in fact bad */ 217 to return G_BAD_TOK_HEADER if the token header is in fact bad */
218 218
219 if ((toksize-=2) < 0) 219 if ((toksize-=2) < 0)
220 return(G_BAD_TOK_HEADER); 220 return G_BAD_TOK_HEADER;
221 221
222 if (ret) 222 if (ret)
223 return(ret); 223 return ret;
224 224
225 if (!ret) { 225 if (!ret) {
226 *buf_in = buf; 226 *buf_in = buf;
227 *body_size = toksize; 227 *body_size = toksize;
228 } 228 }
229 229
230 return(ret); 230 return ret;
231} 231}
232 232
233EXPORT_SYMBOL_GPL(g_verify_token_header); 233EXPORT_SYMBOL_GPL(g_verify_token_header);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 75ee993ea057..9576f35ab701 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -137,7 +137,7 @@ arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4])
137 ms_usage = 13; 137 ms_usage = 13;
138 break; 138 break;
139 default: 139 default:
140 return EINVAL;; 140 return -EINVAL;
141 } 141 }
142 salt[0] = (ms_usage >> 0) & 0xff; 142 salt[0] = (ms_usage >> 0) & 0xff;
143 salt[1] = (ms_usage >> 8) & 0xff; 143 salt[1] = (ms_usage >> 8) & 0xff;
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 778e5dfc5144..c3b75333b821 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -43,6 +43,7 @@
43#include <linux/sunrpc/gss_krb5.h> 43#include <linux/sunrpc/gss_krb5.h>
44#include <linux/sunrpc/xdr.h> 44#include <linux/sunrpc/xdr.h>
45#include <linux/crypto.h> 45#include <linux/crypto.h>
46#include <linux/sunrpc/gss_krb5_enctypes.h>
46 47
47#ifdef RPC_DEBUG 48#ifdef RPC_DEBUG
48# define RPCDBG_FACILITY RPCDBG_AUTH 49# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -750,7 +751,7 @@ static struct gss_api_mech gss_kerberos_mech = {
750 .gm_ops = &gss_kerberos_ops, 751 .gm_ops = &gss_kerberos_ops,
751 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), 752 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
752 .gm_pfs = gss_kerberos_pfs, 753 .gm_pfs = gss_kerberos_pfs,
753 .gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ", 754 .gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES,
754}; 755};
755 756
756static int __init init_kerberos_module(void) 757static int __init init_kerberos_module(void)
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 415c013ba382..62ac90c62cb1 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -162,5 +162,5 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
162 *seqnum = ((plain[0]) | 162 *seqnum = ((plain[0]) |
163 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); 163 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
164 164
165 return (0); 165 return 0;
166} 166}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 2689de39dc78..e3c36a274412 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -160,6 +160,28 @@ gss_mech_get_by_name(const char *name)
160 160
161EXPORT_SYMBOL_GPL(gss_mech_get_by_name); 161EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
162 162
163struct gss_api_mech *
164gss_mech_get_by_OID(struct xdr_netobj *obj)
165{
166 struct gss_api_mech *pos, *gm = NULL;
167
168 spin_lock(&registered_mechs_lock);
169 list_for_each_entry(pos, &registered_mechs, gm_list) {
170 if (obj->len == pos->gm_oid.len) {
171 if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
172 if (try_module_get(pos->gm_owner))
173 gm = pos;
174 break;
175 }
176 }
177 }
178 spin_unlock(&registered_mechs_lock);
179 return gm;
180
181}
182
183EXPORT_SYMBOL_GPL(gss_mech_get_by_OID);
184
163static inline int 185static inline int
164mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) 186mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
165{ 187{
@@ -193,6 +215,22 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
193 215
194EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); 216EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
195 217
218int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
219{
220 struct gss_api_mech *pos = NULL;
221 int i = 0;
222
223 spin_lock(&registered_mechs_lock);
224 list_for_each_entry(pos, &registered_mechs, gm_list) {
225 array_ptr[i] = pos->gm_pfs->pseudoflavor;
226 i++;
227 }
228 spin_unlock(&registered_mechs_lock);
229 return i;
230}
231
232EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors);
233
196u32 234u32
197gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) 235gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
198{ 236{
@@ -331,7 +369,7 @@ gss_delete_sec_context(struct gss_ctx **context_handle)
331 *context_handle); 369 *context_handle);
332 370
333 if (!*context_handle) 371 if (!*context_handle)
334 return(GSS_S_NO_CONTEXT); 372 return GSS_S_NO_CONTEXT;
335 if ((*context_handle)->internal_ctx_id) 373 if ((*context_handle)->internal_ctx_id)
336 (*context_handle)->mech_type->gm_ops 374 (*context_handle)->mech_type->gm_ops
337 ->gss_delete_sec_context((*context_handle) 375 ->gss_delete_sec_context((*context_handle)
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
deleted file mode 100644
index adade3d313f2..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ /dev/null
@@ -1,247 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_mech.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 * J. Bruce Fields <bfields@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#include <linux/err.h>
38#include <linux/module.h>
39#include <linux/init.h>
40#include <linux/types.h>
41#include <linux/slab.h>
42#include <linux/sunrpc/auth.h>
43#include <linux/in.h>
44#include <linux/sunrpc/svcauth_gss.h>
45#include <linux/sunrpc/gss_spkm3.h>
46#include <linux/sunrpc/xdr.h>
47#include <linux/crypto.h>
48
49#ifdef RPC_DEBUG
50# define RPCDBG_FACILITY RPCDBG_AUTH
51#endif
52
53static const void *
54simple_get_bytes(const void *p, const void *end, void *res, int len)
55{
56 const void *q = (const void *)((const char *)p + len);
57 if (unlikely(q > end || q < p))
58 return ERR_PTR(-EFAULT);
59 memcpy(res, p, len);
60 return q;
61}
62
63static const void *
64simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
65{
66 const void *q;
67 unsigned int len;
68 p = simple_get_bytes(p, end, &len, sizeof(len));
69 if (IS_ERR(p))
70 return p;
71 res->len = len;
72 if (len == 0) {
73 res->data = NULL;
74 return p;
75 }
76 q = (const void *)((const char *)p + len);
77 if (unlikely(q > end || q < p))
78 return ERR_PTR(-EFAULT);
79 res->data = kmemdup(p, len, GFP_NOFS);
80 if (unlikely(res->data == NULL))
81 return ERR_PTR(-ENOMEM);
82 return q;
83}
84
85static int
86gss_import_sec_context_spkm3(const void *p, size_t len,
87 struct gss_ctx *ctx_id,
88 gfp_t gfp_mask)
89{
90 const void *end = (const void *)((const char *)p + len);
91 struct spkm3_ctx *ctx;
92 int version;
93
94 if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
95 goto out_err;
96
97 p = simple_get_bytes(p, end, &version, sizeof(version));
98 if (IS_ERR(p))
99 goto out_err_free_ctx;
100 if (version != 1) {
101 dprintk("RPC: unknown spkm3 token format: "
102 "obsolete nfs-utils?\n");
103 p = ERR_PTR(-EINVAL);
104 goto out_err_free_ctx;
105 }
106
107 p = simple_get_netobj(p, end, &ctx->ctx_id);
108 if (IS_ERR(p))
109 goto out_err_free_ctx;
110
111 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
112 if (IS_ERR(p))
113 goto out_err_free_ctx_id;
114
115 p = simple_get_netobj(p, end, &ctx->mech_used);
116 if (IS_ERR(p))
117 goto out_err_free_ctx_id;
118
119 p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
120 if (IS_ERR(p))
121 goto out_err_free_mech;
122
123 p = simple_get_netobj(p, end, &ctx->conf_alg);
124 if (IS_ERR(p))
125 goto out_err_free_mech;
126
127 p = simple_get_netobj(p, end, &ctx->derived_conf_key);
128 if (IS_ERR(p))
129 goto out_err_free_conf_alg;
130
131 p = simple_get_netobj(p, end, &ctx->intg_alg);
132 if (IS_ERR(p))
133 goto out_err_free_conf_key;
134
135 p = simple_get_netobj(p, end, &ctx->derived_integ_key);
136 if (IS_ERR(p))
137 goto out_err_free_intg_alg;
138
139 if (p != end) {
140 p = ERR_PTR(-EFAULT);
141 goto out_err_free_intg_key;
142 }
143
144 ctx_id->internal_ctx_id = ctx;
145
146 dprintk("RPC: Successfully imported new spkm context.\n");
147 return 0;
148
149out_err_free_intg_key:
150 kfree(ctx->derived_integ_key.data);
151out_err_free_intg_alg:
152 kfree(ctx->intg_alg.data);
153out_err_free_conf_key:
154 kfree(ctx->derived_conf_key.data);
155out_err_free_conf_alg:
156 kfree(ctx->conf_alg.data);
157out_err_free_mech:
158 kfree(ctx->mech_used.data);
159out_err_free_ctx_id:
160 kfree(ctx->ctx_id.data);
161out_err_free_ctx:
162 kfree(ctx);
163out_err:
164 return PTR_ERR(p);
165}
166
167static void
168gss_delete_sec_context_spkm3(void *internal_ctx)
169{
170 struct spkm3_ctx *sctx = internal_ctx;
171
172 kfree(sctx->derived_integ_key.data);
173 kfree(sctx->intg_alg.data);
174 kfree(sctx->derived_conf_key.data);
175 kfree(sctx->conf_alg.data);
176 kfree(sctx->mech_used.data);
177 kfree(sctx->ctx_id.data);
178 kfree(sctx);
179}
180
181static u32
182gss_verify_mic_spkm3(struct gss_ctx *ctx,
183 struct xdr_buf *signbuf,
184 struct xdr_netobj *checksum)
185{
186 u32 maj_stat = 0;
187 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
188
189 maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
190
191 dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
192 return maj_stat;
193}
194
195static u32
196gss_get_mic_spkm3(struct gss_ctx *ctx,
197 struct xdr_buf *message_buffer,
198 struct xdr_netobj *message_token)
199{
200 u32 err = 0;
201 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
202
203 err = spkm3_make_token(sctx, message_buffer,
204 message_token, SPKM_MIC_TOK);
205 dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err);
206 return err;
207}
208
209static const struct gss_api_ops gss_spkm3_ops = {
210 .gss_import_sec_context = gss_import_sec_context_spkm3,
211 .gss_get_mic = gss_get_mic_spkm3,
212 .gss_verify_mic = gss_verify_mic_spkm3,
213 .gss_delete_sec_context = gss_delete_sec_context_spkm3,
214};
215
216static struct pf_desc gss_spkm3_pfs[] = {
217 {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"},
218 {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"},
219};
220
221static struct gss_api_mech gss_spkm3_mech = {
222 .gm_name = "spkm3",
223 .gm_owner = THIS_MODULE,
224 .gm_oid = {7, "\053\006\001\005\005\001\003"},
225 .gm_ops = &gss_spkm3_ops,
226 .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs),
227 .gm_pfs = gss_spkm3_pfs,
228};
229
230static int __init init_spkm3_module(void)
231{
232 int status;
233
234 status = gss_mech_register(&gss_spkm3_mech);
235 if (status)
236 printk("Failed to register spkm3 gss mechanism!\n");
237 return status;
238}
239
240static void __exit cleanup_spkm3_module(void)
241{
242 gss_mech_unregister(&gss_spkm3_mech);
243}
244
245MODULE_LICENSE("GPL");
246module_init(init_spkm3_module);
247module_exit(cleanup_spkm3_module);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
deleted file mode 100644
index 5a3a65a0e2b4..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ /dev/null
@@ -1,186 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_seal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/jiffies.h>
38#include <linux/sunrpc/gss_spkm3.h>
39#include <linux/random.h>
40#include <linux/crypto.h>
41#include <linux/pagemap.h>
42#include <linux/scatterlist.h>
43#include <linux/sunrpc/xdr.h>
44
45#ifdef RPC_DEBUG
46# define RPCDBG_FACILITY RPCDBG_AUTH
47#endif
48
49const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"};
50const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"};
51
52/*
53 * spkm3_make_token()
54 *
55 * Only SPKM_MIC_TOK with md5 intg-alg is supported
56 */
57
58u32
59spkm3_make_token(struct spkm3_ctx *ctx,
60 struct xdr_buf * text, struct xdr_netobj * token,
61 int toktype)
62{
63 s32 checksum_type;
64 char tokhdrbuf[25];
65 char cksumdata[16];
66 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
67 struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
68 int tokenlen = 0;
69 unsigned char *ptr;
70 s32 now;
71 int ctxelen = 0, ctxzbit = 0;
72 int md5elen = 0, md5zbit = 0;
73
74 now = jiffies;
75
76 if (ctx->ctx_id.len != 16) {
77 dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
78 ctx->ctx_id.len);
79 goto out_err;
80 }
81
82 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
83 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
84 "algorithm. only support hmac-md5 I-ALG.\n");
85 goto out_err;
86 } else
87 checksum_type = CKSUMTYPE_HMAC_MD5;
88
89 if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) {
90 dprintk("RPC: gss_spkm3_seal: unsupported C-ALG "
91 "algorithm\n");
92 goto out_err;
93 }
94
95 if (toktype == SPKM_MIC_TOK) {
96 /* Calculate checksum over the mic-header */
97 asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
98 spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
99 ctxelen, ctxzbit);
100 if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key,
101 (char *)mic_hdr.data, mic_hdr.len,
102 text, 0, &md5cksum))
103 goto out_err;
104
105 asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
106 tokenlen = 10 + ctxelen + 1 + md5elen + 1;
107
108 /* Create token header using generic routines */
109 token->len = g_token_size(&ctx->mech_used, tokenlen + 2);
110
111 ptr = token->data;
112 g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr);
113
114 spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
115 } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
116 dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK "
117 "not supported\n");
118 goto out_err;
119 }
120
121 /* XXX need to implement sequence numbers, and ctx->expired */
122
123 return GSS_S_COMPLETE;
124out_err:
125 token->data = NULL;
126 token->len = 0;
127 return GSS_S_FAILURE;
128}
129
130static int
131spkm3_checksummer(struct scatterlist *sg, void *data)
132{
133 struct hash_desc *desc = data;
134
135 return crypto_hash_update(desc, sg, sg->length);
136}
137
138/* checksum the plaintext data and hdrlen bytes of the token header */
139s32
140make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
141 unsigned int hdrlen, struct xdr_buf *body,
142 unsigned int body_offset, struct xdr_netobj *cksum)
143{
144 char *cksumname;
145 struct hash_desc desc; /* XXX add to ctx? */
146 struct scatterlist sg[1];
147 int err;
148
149 switch (cksumtype) {
150 case CKSUMTYPE_HMAC_MD5:
151 cksumname = "hmac(md5)";
152 break;
153 default:
154 dprintk("RPC: spkm3_make_checksum:"
155 " unsupported checksum %d", cksumtype);
156 return GSS_S_FAILURE;
157 }
158
159 if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE;
160
161 desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
162 if (IS_ERR(desc.tfm))
163 return GSS_S_FAILURE;
164 cksum->len = crypto_hash_digestsize(desc.tfm);
165 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
166
167 err = crypto_hash_setkey(desc.tfm, key->data, key->len);
168 if (err)
169 goto out;
170
171 err = crypto_hash_init(&desc);
172 if (err)
173 goto out;
174
175 sg_init_one(sg, header, hdrlen);
176 crypto_hash_update(&desc, sg, sg->length);
177
178 xdr_process_buf(body, body_offset, body->len - body_offset,
179 spkm3_checksummer, &desc);
180 crypto_hash_final(&desc, cksum->data);
181
182out:
183 crypto_free_hash(desc.tfm);
184
185 return err ? GSS_S_FAILURE : 0;
186}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
deleted file mode 100644
index a99825d7caa0..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ /dev/null
@@ -1,267 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_token.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/random.h>
41#include <linux/crypto.h>
42
43#ifdef RPC_DEBUG
44# define RPCDBG_FACILITY RPCDBG_AUTH
45#endif
46
47/*
48 * asn1_bitstring_len()
49 *
50 * calculate the asn1 bitstring length of the xdr_netobject
51 */
52void
53asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
54{
55 int i, zbit = 0,elen = in->len;
56 char *ptr;
57
58 ptr = &in->data[in->len -1];
59
60 /* count trailing 0's */
61 for(i = in->len; i > 0; i--) {
62 if (*ptr == 0) {
63 ptr--;
64 elen--;
65 } else
66 break;
67 }
68
69 /* count number of 0 bits in final octet */
70 ptr = &in->data[elen - 1];
71 for(i = 0; i < 8; i++) {
72 short mask = 0x01;
73
74 if (!((mask << i) & *ptr))
75 zbit++;
76 else
77 break;
78 }
79 *enclen = elen;
80 *zerobits = zbit;
81}
82
83/*
84 * decode_asn1_bitstring()
85 *
86 * decode a bitstring into a buffer of the expected length.
87 * enclen = bit string length
88 * explen = expected length (define in rfc)
89 */
90int
91decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
92{
93 if (!(out->data = kzalloc(explen,GFP_NOFS)))
94 return 0;
95 out->len = explen;
96 memcpy(out->data, in, enclen);
97 return 1;
98}
99
100/*
101 * SPKMInnerContextToken choice SPKM_MIC asn1 token layout
102 *
103 * contextid is always 16 bytes plain data. max asn1 bitstring len = 17.
104 *
105 * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum)
106 *
107 * pos value
108 * ----------
109 * [0] a4 SPKM-MIC tag
110 * [1] ?? innertoken length (max 44)
111 *
112 *
113 * tok_hdr piece of checksum data starts here
114 *
115 * the maximum mic-header len = 9 + 17 = 26
116 * mic-header
117 * ----------
118 * [2] 30 SEQUENCE tag
119 * [3] ?? mic-header length: (max 23) = TokenID + ContextID
120 *
121 * TokenID - all fields constant and can be hardcoded
122 * -------
123 * [4] 02 Type 2
124 * [5] 02 Length 2
125 * [6][7] 01 01 TokenID (SPKM_MIC_TOK)
126 *
127 * ContextID - encoded length not constant, calculated
128 * ---------
129 * [8] 03 Type 3
130 * [9] ?? encoded length
131 * [10] ?? ctxzbit
132 * [11] contextid
133 *
134 * mic_header piece of checksum data ends here.
135 *
136 * int-cksum - encoded length not constant, calculated
137 * ---------
138 * [??] 03 Type 3
139 * [??] ?? encoded length
140 * [??] ?? md5zbit
141 * [??] int-cksum (NID_md5 = 16)
142 *
143 * maximum SPKM-MIC innercontext token length =
144 * 10 + encoded contextid_size(17 max) + 2 + encoded
145 * cksum_size (17 maxfor NID_md5) = 46
146 */
147
148/*
149 * spkm3_mic_header()
150 *
151 * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation
152 * elen: 16 byte context id asn1 bitstring encoded length
153 */
154void
155spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit)
156{
157 char *hptr = *hdrbuf;
158 char *top = *hdrbuf;
159
160 *(u8 *)hptr++ = 0x30;
161 *(u8 *)hptr++ = elen + 7; /* on the wire header length */
162
163 /* tokenid */
164 *(u8 *)hptr++ = 0x02;
165 *(u8 *)hptr++ = 0x02;
166 *(u8 *)hptr++ = 0x01;
167 *(u8 *)hptr++ = 0x01;
168
169 /* coniextid */
170 *(u8 *)hptr++ = 0x03;
171 *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */
172 *(u8 *)hptr++ = zbit;
173 memcpy(hptr, ctxdata, elen);
174 hptr += elen;
175 *hdrlen = hptr - top;
176}
177
178/*
179 * spkm3_mic_innercontext_token()
180 *
181 * *tokp points to the beginning of the SPKM_MIC token described
182 * in rfc 2025, section 3.2.1:
183 *
184 * toklen is the inner token length
185 */
186void
187spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
188{
189 unsigned char *ict = *tokp;
190
191 *(u8 *)ict++ = 0xa4;
192 *(u8 *)ict++ = toklen;
193 memcpy(ict, mic_hdr->data, mic_hdr->len);
194 ict += mic_hdr->len;
195
196 *(u8 *)ict++ = 0x03;
197 *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */
198 *(u8 *)ict++ = md5zbit;
199 memcpy(ict, md5cksum->data, md5elen);
200}
201
202u32
203spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum)
204{
205 struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL};
206 unsigned char *ptr = *tokp;
207 int ctxelen;
208 u32 ret = GSS_S_DEFECTIVE_TOKEN;
209
210 /* spkm3 innercontext token preamble */
211 if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) {
212 dprintk("RPC: BAD SPKM ictoken preamble\n");
213 goto out;
214 }
215
216 *mic_hdrlen = ptr[3];
217
218 /* token type */
219 if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) {
220 dprintk("RPC: BAD asn1 SPKM3 token type\n");
221 goto out;
222 }
223
224 /* only support SPKM_MIC_TOK */
225 if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
226 dprintk("RPC: ERROR unsupported SPKM3 token\n");
227 goto out;
228 }
229
230 /* contextid */
231 if (ptr[8] != 0x03) {
232 dprintk("RPC: BAD SPKM3 asn1 context-id type\n");
233 goto out;
234 }
235
236 ctxelen = ptr[9];
237 if (ctxelen > 17) { /* length includes asn1 zbit octet */
238 dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen);
239 goto out;
240 }
241
242 /* ignore ptr[10] */
243
244 if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16))
245 goto out;
246
247 /*
248 * in the current implementation: the optional int-alg is not present
249 * so the default int-alg (md5) is used the optional snd-seq field is
250 * also not present
251 */
252
253 if (*mic_hdrlen != 6 + ctxelen) {
254 dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only "
255 "support default int-alg (should be absent) "
256 "and do not support snd-seq\n", *mic_hdrlen);
257 goto out;
258 }
259 /* checksum */
260 *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */
261
262 ret = GSS_S_COMPLETE;
263out:
264 kfree(spkm3_ctx_id.data);
265 return ret;
266}
267
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
deleted file mode 100644
index cc21ee860bb6..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ /dev/null
@@ -1,127 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_unseal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/crypto.h>
41
42#ifdef RPC_DEBUG
43# define RPCDBG_FACILITY RPCDBG_AUTH
44#endif
45
46/*
47 * spkm3_read_token()
48 *
49 * only SPKM_MIC_TOK with md5 intg-alg is supported
50 */
51u32
52spkm3_read_token(struct spkm3_ctx *ctx,
53 struct xdr_netobj *read_token, /* checksum */
54 struct xdr_buf *message_buffer, /* signbuf */
55 int toktype)
56{
57 s32 checksum_type;
58 s32 code;
59 struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
60 char cksumdata[16];
61 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
62 unsigned char *ptr = (unsigned char *)read_token->data;
63 unsigned char *cksum;
64 int bodysize, md5elen;
65 int mic_hdrlen;
66 u32 ret = GSS_S_DEFECTIVE_TOKEN;
67
68 if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
69 &bodysize, &ptr, read_token->len))
70 goto out;
71
72 /* decode the token */
73
74 if (toktype != SPKM_MIC_TOK) {
75 dprintk("RPC: BAD SPKM3 token type: %d\n", toktype);
76 goto out;
77 }
78
79 if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
80 goto out;
81
82 if (*cksum++ != 0x03) {
83 dprintk("RPC: spkm3_read_token BAD checksum type\n");
84 goto out;
85 }
86 md5elen = *cksum++;
87 cksum++; /* move past the zbit */
88
89 if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
90 goto out;
91
92 /* HARD CODED FOR MD5 */
93
94 /* compute the checksum of the message.
95 * ptr + 2 = start of header piece of checksum
96 * mic_hdrlen + 2 = length of header piece of checksum
97 */
98 ret = GSS_S_DEFECTIVE_TOKEN;
99 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
100 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
101 "algorithm\n");
102 goto out;
103 }
104
105 checksum_type = CKSUMTYPE_HMAC_MD5;
106
107 code = make_spkm3_checksum(checksum_type,
108 &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2,
109 message_buffer, 0, &md5cksum);
110
111 if (code)
112 goto out;
113
114 ret = GSS_S_BAD_SIG;
115 code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
116 if (code) {
117 dprintk("RPC: bad MIC checksum\n");
118 goto out;
119 }
120
121
122 /* XXX: need to add expiration and sequencing */
123 ret = GSS_S_COMPLETE;
124out:
125 kfree(wire_cksum.data);
126 return ret;
127}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index cc385b3a59c2..8d0f7d3c71c8 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -67,7 +67,6 @@ static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b)
67 67
68#define RSI_HASHBITS 6 68#define RSI_HASHBITS 6
69#define RSI_HASHMAX (1<<RSI_HASHBITS) 69#define RSI_HASHMAX (1<<RSI_HASHBITS)
70#define RSI_HASHMASK (RSI_HASHMAX-1)
71 70
72struct rsi { 71struct rsi {
73 struct cache_head h; 72 struct cache_head h;
@@ -319,7 +318,6 @@ static struct rsi *rsi_update(struct rsi *new, struct rsi *old)
319 318
320#define RSC_HASHBITS 10 319#define RSC_HASHBITS 10
321#define RSC_HASHMAX (1<<RSC_HASHBITS) 320#define RSC_HASHMAX (1<<RSC_HASHBITS)
322#define RSC_HASHMASK (RSC_HASHMAX-1)
323 321
324#define GSS_SEQ_WIN 128 322#define GSS_SEQ_WIN 128
325 323
@@ -964,7 +962,7 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
964 if (rqstp->rq_gssclient == NULL) 962 if (rqstp->rq_gssclient == NULL)
965 return SVC_DENIED; 963 return SVC_DENIED;
966 stat = svcauth_unix_set_client(rqstp); 964 stat = svcauth_unix_set_client(rqstp);
967 if (stat == SVC_DROP) 965 if (stat == SVC_DROP || stat == SVC_CLOSE)
968 return stat; 966 return stat;
969 return SVC_OK; 967 return SVC_OK;
970} 968}
@@ -1018,7 +1016,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1018 return SVC_DENIED; 1016 return SVC_DENIED;
1019 memset(&rsikey, 0, sizeof(rsikey)); 1017 memset(&rsikey, 0, sizeof(rsikey));
1020 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) 1018 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
1021 return SVC_DROP; 1019 return SVC_CLOSE;
1022 *authp = rpc_autherr_badverf; 1020 *authp = rpc_autherr_badverf;
1023 if (svc_safe_getnetobj(argv, &tmpobj)) { 1021 if (svc_safe_getnetobj(argv, &tmpobj)) {
1024 kfree(rsikey.in_handle.data); 1022 kfree(rsikey.in_handle.data);
@@ -1026,38 +1024,35 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1026 } 1024 }
1027 if (dup_netobj(&rsikey.in_token, &tmpobj)) { 1025 if (dup_netobj(&rsikey.in_token, &tmpobj)) {
1028 kfree(rsikey.in_handle.data); 1026 kfree(rsikey.in_handle.data);
1029 return SVC_DROP; 1027 return SVC_CLOSE;
1030 } 1028 }
1031 1029
1032 /* Perform upcall, or find upcall result: */ 1030 /* Perform upcall, or find upcall result: */
1033 rsip = rsi_lookup(&rsikey); 1031 rsip = rsi_lookup(&rsikey);
1034 rsi_free(&rsikey); 1032 rsi_free(&rsikey);
1035 if (!rsip) 1033 if (!rsip)
1036 return SVC_DROP; 1034 return SVC_CLOSE;
1037 switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { 1035 if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0)
1038 case -EAGAIN:
1039 case -ETIMEDOUT:
1040 case -ENOENT:
1041 /* No upcall result: */ 1036 /* No upcall result: */
1042 return SVC_DROP; 1037 return SVC_CLOSE;
1043 case 0: 1038
1044 ret = SVC_DROP; 1039 ret = SVC_CLOSE;
1045 /* Got an answer to the upcall; use it: */ 1040 /* Got an answer to the upcall; use it: */
1046 if (gss_write_init_verf(rqstp, rsip)) 1041 if (gss_write_init_verf(rqstp, rsip))
1047 goto out; 1042 goto out;
1048 if (resv->iov_len + 4 > PAGE_SIZE) 1043 if (resv->iov_len + 4 > PAGE_SIZE)
1049 goto out; 1044 goto out;
1050 svc_putnl(resv, RPC_SUCCESS); 1045 svc_putnl(resv, RPC_SUCCESS);
1051 if (svc_safe_putnetobj(resv, &rsip->out_handle)) 1046 if (svc_safe_putnetobj(resv, &rsip->out_handle))
1052 goto out; 1047 goto out;
1053 if (resv->iov_len + 3 * 4 > PAGE_SIZE) 1048 if (resv->iov_len + 3 * 4 > PAGE_SIZE)
1054 goto out; 1049 goto out;
1055 svc_putnl(resv, rsip->major_status); 1050 svc_putnl(resv, rsip->major_status);
1056 svc_putnl(resv, rsip->minor_status); 1051 svc_putnl(resv, rsip->minor_status);
1057 svc_putnl(resv, GSS_SEQ_WIN); 1052 svc_putnl(resv, GSS_SEQ_WIN);
1058 if (svc_safe_putnetobj(resv, &rsip->out_token)) 1053 if (svc_safe_putnetobj(resv, &rsip->out_token))
1059 goto out; 1054 goto out;
1060 } 1055
1061 ret = SVC_COMPLETE; 1056 ret = SVC_COMPLETE;
1062out: 1057out:
1063 cache_put(&rsip->h, &rsi_cache); 1058 cache_put(&rsip->h, &rsi_cache);
@@ -1106,7 +1101,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1106 1101
1107 /* credential is: 1102 /* credential is:
1108 * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle 1103 * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle
1109 * at least 5 u32s, and is preceeded by length, so that makes 6. 1104 * at least 5 u32s, and is preceded by length, so that makes 6.
1110 */ 1105 */
1111 1106
1112 if (argv->iov_len < 5 * 4) 1107 if (argv->iov_len < 5 * 4)
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 7dcfe0cc3500..1dd1a6890007 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -59,8 +59,8 @@ int bc_send(struct rpc_rqst *req)
59 ret = task->tk_status; 59 ret = task->tk_status;
60 rpc_put_task(task); 60 rpc_put_task(task);
61 } 61 }
62 return ret;
63 dprintk("RPC: bc_send ret= %d\n", ret); 62 dprintk("RPC: bc_send ret= %d\n", ret);
63 return ret;
64} 64}
65 65
66#endif /* CONFIG_NFS_V4_1 */ 66#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2b06410e584e..72ad836e4fe0 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -28,21 +28,21 @@
28#include <linux/workqueue.h> 28#include <linux/workqueue.h>
29#include <linux/mutex.h> 29#include <linux/mutex.h>
30#include <linux/pagemap.h> 30#include <linux/pagemap.h>
31#include <linux/smp_lock.h>
32#include <asm/ioctls.h> 31#include <asm/ioctls.h>
33#include <linux/sunrpc/types.h> 32#include <linux/sunrpc/types.h>
34#include <linux/sunrpc/cache.h> 33#include <linux/sunrpc/cache.h>
35#include <linux/sunrpc/stats.h> 34#include <linux/sunrpc/stats.h>
36#include <linux/sunrpc/rpc_pipe_fs.h> 35#include <linux/sunrpc/rpc_pipe_fs.h>
36#include "netns.h"
37 37
38#define RPCDBG_FACILITY RPCDBG_CACHE 38#define RPCDBG_FACILITY RPCDBG_CACHE
39 39
40static int cache_defer_req(struct cache_req *req, struct cache_head *item); 40static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
41static void cache_revisit_request(struct cache_head *item); 41static void cache_revisit_request(struct cache_head *item);
42 42
43static void cache_init(struct cache_head *h) 43static void cache_init(struct cache_head *h)
44{ 44{
45 time_t now = get_seconds(); 45 time_t now = seconds_since_boot();
46 h->next = NULL; 46 h->next = NULL;
47 h->flags = 0; 47 h->flags = 0;
48 kref_init(&h->ref); 48 kref_init(&h->ref);
@@ -52,7 +52,7 @@ static void cache_init(struct cache_head *h)
52 52
53static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) 53static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
54{ 54{
55 return (h->expiry_time < get_seconds()) || 55 return (h->expiry_time < seconds_since_boot()) ||
56 (detail->flush_time > h->last_refresh); 56 (detail->flush_time > h->last_refresh);
57} 57}
58 58
@@ -127,7 +127,8 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
127static void cache_fresh_locked(struct cache_head *head, time_t expiry) 127static void cache_fresh_locked(struct cache_head *head, time_t expiry)
128{ 128{
129 head->expiry_time = expiry; 129 head->expiry_time = expiry;
130 head->last_refresh = get_seconds(); 130 head->last_refresh = seconds_since_boot();
131 smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
131 set_bit(CACHE_VALID, &head->flags); 132 set_bit(CACHE_VALID, &head->flags);
132} 133}
133 134
@@ -208,11 +209,36 @@ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head
208 /* entry is valid */ 209 /* entry is valid */
209 if (test_bit(CACHE_NEGATIVE, &h->flags)) 210 if (test_bit(CACHE_NEGATIVE, &h->flags))
210 return -ENOENT; 211 return -ENOENT;
211 else 212 else {
213 /*
214 * In combination with write barrier in
215 * sunrpc_cache_update, ensures that anyone
216 * using the cache entry after this sees the
217 * updated contents:
218 */
219 smp_rmb();
212 return 0; 220 return 0;
221 }
213 } 222 }
214} 223}
215 224
225static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h)
226{
227 int rv;
228
229 write_lock(&detail->hash_lock);
230 rv = cache_is_valid(detail, h);
231 if (rv != -EAGAIN) {
232 write_unlock(&detail->hash_lock);
233 return rv;
234 }
235 set_bit(CACHE_NEGATIVE, &h->flags);
236 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
237 write_unlock(&detail->hash_lock);
238 cache_fresh_unlocked(h, detail);
239 return -ENOENT;
240}
241
216/* 242/*
217 * This is the generic cache management routine for all 243 * This is the generic cache management routine for all
218 * the authentication caches. 244 * the authentication caches.
@@ -238,7 +264,7 @@ int cache_check(struct cache_detail *detail,
238 264
239 /* now see if we want to start an upcall */ 265 /* now see if we want to start an upcall */
240 refresh_age = (h->expiry_time - h->last_refresh); 266 refresh_age = (h->expiry_time - h->last_refresh);
241 age = get_seconds() - h->last_refresh; 267 age = seconds_since_boot() - h->last_refresh;
242 268
243 if (rqstp == NULL) { 269 if (rqstp == NULL) {
244 if (rv == -EAGAIN) 270 if (rv == -EAGAIN)
@@ -251,14 +277,8 @@ int cache_check(struct cache_detail *detail,
251 case -EINVAL: 277 case -EINVAL:
252 clear_bit(CACHE_PENDING, &h->flags); 278 clear_bit(CACHE_PENDING, &h->flags);
253 cache_revisit_request(h); 279 cache_revisit_request(h);
254 if (rv == -EAGAIN) { 280 rv = try_to_negate_entry(detail, h);
255 set_bit(CACHE_NEGATIVE, &h->flags);
256 cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY);
257 cache_fresh_unlocked(h, detail);
258 rv = -ENOENT;
259 }
260 break; 281 break;
261
262 case -EAGAIN: 282 case -EAGAIN:
263 clear_bit(CACHE_PENDING, &h->flags); 283 clear_bit(CACHE_PENDING, &h->flags);
264 cache_revisit_request(h); 284 cache_revisit_request(h);
@@ -268,8 +288,11 @@ int cache_check(struct cache_detail *detail,
268 } 288 }
269 289
270 if (rv == -EAGAIN) { 290 if (rv == -EAGAIN) {
271 if (cache_defer_req(rqstp, h) < 0) { 291 if (!cache_defer_req(rqstp, h)) {
272 /* Request is not deferred */ 292 /*
293 * Request was not deferred; handle it as best
294 * we can ourselves:
295 */
273 rv = cache_is_valid(detail, h); 296 rv = cache_is_valid(detail, h);
274 if (rv == -EAGAIN) 297 if (rv == -EAGAIN)
275 rv = -ETIMEDOUT; 298 rv = -ETIMEDOUT;
@@ -388,11 +411,11 @@ static int cache_clean(void)
388 return -1; 411 return -1;
389 } 412 }
390 current_detail = list_entry(next, struct cache_detail, others); 413 current_detail = list_entry(next, struct cache_detail, others);
391 if (current_detail->nextcheck > get_seconds()) 414 if (current_detail->nextcheck > seconds_since_boot())
392 current_index = current_detail->hash_size; 415 current_index = current_detail->hash_size;
393 else { 416 else {
394 current_index = 0; 417 current_index = 0;
395 current_detail->nextcheck = get_seconds()+30*60; 418 current_detail->nextcheck = seconds_since_boot()+30*60;
396 } 419 }
397 } 420 }
398 421
@@ -477,7 +500,7 @@ EXPORT_SYMBOL_GPL(cache_flush);
477void cache_purge(struct cache_detail *detail) 500void cache_purge(struct cache_detail *detail)
478{ 501{
479 detail->flush_time = LONG_MAX; 502 detail->flush_time = LONG_MAX;
480 detail->nextcheck = get_seconds(); 503 detail->nextcheck = seconds_since_boot();
481 cache_flush(); 504 cache_flush();
482 detail->flush_time = 1; 505 detail->flush_time = 1;
483} 506}
@@ -506,81 +529,157 @@ EXPORT_SYMBOL_GPL(cache_purge);
506 529
507static DEFINE_SPINLOCK(cache_defer_lock); 530static DEFINE_SPINLOCK(cache_defer_lock);
508static LIST_HEAD(cache_defer_list); 531static LIST_HEAD(cache_defer_list);
509static struct list_head cache_defer_hash[DFR_HASHSIZE]; 532static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
510static int cache_defer_cnt; 533static int cache_defer_cnt;
511 534
512static int cache_defer_req(struct cache_req *req, struct cache_head *item) 535static void __unhash_deferred_req(struct cache_deferred_req *dreq)
536{
537 hlist_del_init(&dreq->hash);
538 if (!list_empty(&dreq->recent)) {
539 list_del_init(&dreq->recent);
540 cache_defer_cnt--;
541 }
542}
543
544static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
513{ 545{
514 struct cache_deferred_req *dreq, *discard;
515 int hash = DFR_HASH(item); 546 int hash = DFR_HASH(item);
516 547
517 if (cache_defer_cnt >= DFR_MAX) { 548 INIT_LIST_HEAD(&dreq->recent);
518 /* too much in the cache, randomly drop this one, 549 hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
519 * or continue and drop the oldest below 550}
520 */ 551
521 if (net_random()&1) 552static void setup_deferral(struct cache_deferred_req *dreq,
522 return -ENOMEM; 553 struct cache_head *item,
523 } 554 int count_me)
524 dreq = req->defer(req); 555{
525 if (dreq == NULL)
526 return -ENOMEM;
527 556
528 dreq->item = item; 557 dreq->item = item;
529 558
530 spin_lock(&cache_defer_lock); 559 spin_lock(&cache_defer_lock);
531 560
532 list_add(&dreq->recent, &cache_defer_list); 561 __hash_deferred_req(dreq, item);
533 562
534 if (cache_defer_hash[hash].next == NULL) 563 if (count_me) {
535 INIT_LIST_HEAD(&cache_defer_hash[hash]); 564 cache_defer_cnt++;
536 list_add(&dreq->hash, &cache_defer_hash[hash]); 565 list_add(&dreq->recent, &cache_defer_list);
537
538 /* it is in, now maybe clean up */
539 discard = NULL;
540 if (++cache_defer_cnt > DFR_MAX) {
541 discard = list_entry(cache_defer_list.prev,
542 struct cache_deferred_req, recent);
543 list_del_init(&discard->recent);
544 list_del_init(&discard->hash);
545 cache_defer_cnt--;
546 } 566 }
567
547 spin_unlock(&cache_defer_lock); 568 spin_unlock(&cache_defer_lock);
548 569
570}
571
572struct thread_deferred_req {
573 struct cache_deferred_req handle;
574 struct completion completion;
575};
576
577static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
578{
579 struct thread_deferred_req *dr =
580 container_of(dreq, struct thread_deferred_req, handle);
581 complete(&dr->completion);
582}
583
584static void cache_wait_req(struct cache_req *req, struct cache_head *item)
585{
586 struct thread_deferred_req sleeper;
587 struct cache_deferred_req *dreq = &sleeper.handle;
588
589 sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
590 dreq->revisit = cache_restart_thread;
591
592 setup_deferral(dreq, item, 0);
593
594 if (!test_bit(CACHE_PENDING, &item->flags) ||
595 wait_for_completion_interruptible_timeout(
596 &sleeper.completion, req->thread_wait) <= 0) {
597 /* The completion wasn't completed, so we need
598 * to clean up
599 */
600 spin_lock(&cache_defer_lock);
601 if (!hlist_unhashed(&sleeper.handle.hash)) {
602 __unhash_deferred_req(&sleeper.handle);
603 spin_unlock(&cache_defer_lock);
604 } else {
605 /* cache_revisit_request already removed
606 * this from the hash table, but hasn't
607 * called ->revisit yet. It will very soon
608 * and we need to wait for it.
609 */
610 spin_unlock(&cache_defer_lock);
611 wait_for_completion(&sleeper.completion);
612 }
613 }
614}
615
616static void cache_limit_defers(void)
617{
618 /* Make sure we haven't exceed the limit of allowed deferred
619 * requests.
620 */
621 struct cache_deferred_req *discard = NULL;
622
623 if (cache_defer_cnt <= DFR_MAX)
624 return;
625
626 spin_lock(&cache_defer_lock);
627
628 /* Consider removing either the first or the last */
629 if (cache_defer_cnt > DFR_MAX) {
630 if (net_random() & 1)
631 discard = list_entry(cache_defer_list.next,
632 struct cache_deferred_req, recent);
633 else
634 discard = list_entry(cache_defer_list.prev,
635 struct cache_deferred_req, recent);
636 __unhash_deferred_req(discard);
637 }
638 spin_unlock(&cache_defer_lock);
549 if (discard) 639 if (discard)
550 /* there was one too many */
551 discard->revisit(discard, 1); 640 discard->revisit(discard, 1);
641}
552 642
553 if (!test_bit(CACHE_PENDING, &item->flags)) { 643/* Return true if and only if a deferred request is queued. */
554 /* must have just been validated... */ 644static bool cache_defer_req(struct cache_req *req, struct cache_head *item)
555 cache_revisit_request(item); 645{
556 return -EAGAIN; 646 struct cache_deferred_req *dreq;
647
648 if (req->thread_wait) {
649 cache_wait_req(req, item);
650 if (!test_bit(CACHE_PENDING, &item->flags))
651 return false;
557 } 652 }
558 return 0; 653 dreq = req->defer(req);
654 if (dreq == NULL)
655 return false;
656 setup_deferral(dreq, item, 1);
657 if (!test_bit(CACHE_PENDING, &item->flags))
658 /* Bit could have been cleared before we managed to
659 * set up the deferral, so need to revisit just in case
660 */
661 cache_revisit_request(item);
662
663 cache_limit_defers();
664 return true;
559} 665}
560 666
561static void cache_revisit_request(struct cache_head *item) 667static void cache_revisit_request(struct cache_head *item)
562{ 668{
563 struct cache_deferred_req *dreq; 669 struct cache_deferred_req *dreq;
564 struct list_head pending; 670 struct list_head pending;
565 671 struct hlist_node *lp, *tmp;
566 struct list_head *lp;
567 int hash = DFR_HASH(item); 672 int hash = DFR_HASH(item);
568 673
569 INIT_LIST_HEAD(&pending); 674 INIT_LIST_HEAD(&pending);
570 spin_lock(&cache_defer_lock); 675 spin_lock(&cache_defer_lock);
571 676
572 lp = cache_defer_hash[hash].next; 677 hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
573 if (lp) { 678 if (dreq->item == item) {
574 while (lp != &cache_defer_hash[hash]) { 679 __unhash_deferred_req(dreq);
575 dreq = list_entry(lp, struct cache_deferred_req, hash); 680 list_add(&dreq->recent, &pending);
576 lp = lp->next;
577 if (dreq->item == item) {
578 list_del_init(&dreq->hash);
579 list_move(&dreq->recent, &pending);
580 cache_defer_cnt--;
581 }
582 } 681 }
583 } 682
584 spin_unlock(&cache_defer_lock); 683 spin_unlock(&cache_defer_lock);
585 684
586 while (!list_empty(&pending)) { 685 while (!list_empty(&pending)) {
@@ -601,9 +700,8 @@ void cache_clean_deferred(void *owner)
601 700
602 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { 701 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
603 if (dreq->owner == owner) { 702 if (dreq->owner == owner) {
604 list_del_init(&dreq->hash); 703 __unhash_deferred_req(dreq);
605 list_move(&dreq->recent, &pending); 704 list_add(&dreq->recent, &pending);
606 cache_defer_cnt--;
607 } 705 }
608 } 706 }
609 spin_unlock(&cache_defer_lock); 707 spin_unlock(&cache_defer_lock);
@@ -902,7 +1000,7 @@ static int cache_release(struct inode *inode, struct file *filp,
902 filp->private_data = NULL; 1000 filp->private_data = NULL;
903 kfree(rp); 1001 kfree(rp);
904 1002
905 cd->last_close = get_seconds(); 1003 cd->last_close = seconds_since_boot();
906 atomic_dec(&cd->readers); 1004 atomic_dec(&cd->readers);
907 } 1005 }
908 module_put(cd->owner); 1006 module_put(cd->owner);
@@ -1015,6 +1113,23 @@ static void warn_no_listener(struct cache_detail *detail)
1015 } 1113 }
1016} 1114}
1017 1115
1116static bool cache_listeners_exist(struct cache_detail *detail)
1117{
1118 if (atomic_read(&detail->readers))
1119 return true;
1120 if (detail->last_close == 0)
1121 /* This cache was never opened */
1122 return false;
1123 if (detail->last_close < seconds_since_boot() - 30)
1124 /*
1125 * We allow for the possibility that someone might
1126 * restart a userspace daemon without restarting the
1127 * server; but after 30 seconds, we give up.
1128 */
1129 return false;
1130 return true;
1131}
1132
1018/* 1133/*
1019 * register an upcall request to user-space and queue it up for read() by the 1134 * register an upcall request to user-space and queue it up for read() by the
1020 * upcall daemon. 1135 * upcall daemon.
@@ -1033,10 +1148,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
1033 char *bp; 1148 char *bp;
1034 int len; 1149 int len;
1035 1150
1036 if (atomic_read(&detail->readers) == 0 && 1151 if (!cache_listeners_exist(detail)) {
1037 detail->last_close < get_seconds() - 30) { 1152 warn_no_listener(detail);
1038 warn_no_listener(detail); 1153 return -EINVAL;
1039 return -EINVAL;
1040 } 1154 }
1041 1155
1042 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1156 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
@@ -1095,13 +1209,19 @@ int qword_get(char **bpp, char *dest, int bufsize)
1095 if (bp[0] == '\\' && bp[1] == 'x') { 1209 if (bp[0] == '\\' && bp[1] == 'x') {
1096 /* HEX STRING */ 1210 /* HEX STRING */
1097 bp += 2; 1211 bp += 2;
1098 while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) { 1212 while (len < bufsize) {
1099 int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; 1213 int h, l;
1100 bp++; 1214
1101 byte <<= 4; 1215 h = hex_to_bin(bp[0]);
1102 byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; 1216 if (h < 0)
1103 *dest++ = byte; 1217 break;
1104 bp++; 1218
1219 l = hex_to_bin(bp[1]);
1220 if (l < 0)
1221 break;
1222
1223 *dest++ = (h << 4) | l;
1224 bp += 2;
1105 len++; 1225 len++;
1106 } 1226 }
1107 } else { 1227 } else {
@@ -1219,7 +1339,8 @@ static int c_show(struct seq_file *m, void *p)
1219 1339
1220 ifdebug(CACHE) 1340 ifdebug(CACHE)
1221 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", 1341 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
1222 cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); 1342 convert_to_wallclock(cp->expiry_time),
1343 atomic_read(&cp->ref.refcount), cp->flags);
1223 cache_get(cp); 1344 cache_get(cp);
1224 if (cache_check(cd, cp, NULL)) 1345 if (cache_check(cd, cp, NULL))
1225 /* cache_check does a cache_put on failure */ 1346 /* cache_check does a cache_put on failure */
@@ -1285,7 +1406,7 @@ static ssize_t read_flush(struct file *file, char __user *buf,
1285 unsigned long p = *ppos; 1406 unsigned long p = *ppos;
1286 size_t len; 1407 size_t len;
1287 1408
1288 sprintf(tbuf, "%lu\n", cd->flush_time); 1409 sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time));
1289 len = strlen(tbuf); 1410 len = strlen(tbuf);
1290 if (p >= len) 1411 if (p >= len)
1291 return 0; 1412 return 0;
@@ -1303,19 +1424,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1303 struct cache_detail *cd) 1424 struct cache_detail *cd)
1304{ 1425{
1305 char tbuf[20]; 1426 char tbuf[20];
1306 char *ep; 1427 char *bp, *ep;
1307 long flushtime; 1428
1308 if (*ppos || count > sizeof(tbuf)-1) 1429 if (*ppos || count > sizeof(tbuf)-1)
1309 return -EINVAL; 1430 return -EINVAL;
1310 if (copy_from_user(tbuf, buf, count)) 1431 if (copy_from_user(tbuf, buf, count))
1311 return -EFAULT; 1432 return -EFAULT;
1312 tbuf[count] = 0; 1433 tbuf[count] = 0;
1313 flushtime = simple_strtoul(tbuf, &ep, 0); 1434 simple_strtoul(tbuf, &ep, 0);
1314 if (*ep && *ep != '\n') 1435 if (*ep && *ep != '\n')
1315 return -EINVAL; 1436 return -EINVAL;
1316 1437
1317 cd->flush_time = flushtime; 1438 bp = tbuf;
1318 cd->nextcheck = get_seconds(); 1439 cd->flush_time = get_expiry(&bp);
1440 cd->nextcheck = seconds_since_boot();
1319 cache_flush(); 1441 cache_flush();
1320 1442
1321 *ppos += count; 1443 *ppos += count;
@@ -1348,15 +1470,10 @@ static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
1348static long cache_ioctl_procfs(struct file *filp, 1470static long cache_ioctl_procfs(struct file *filp,
1349 unsigned int cmd, unsigned long arg) 1471 unsigned int cmd, unsigned long arg)
1350{ 1472{
1351 long ret;
1352 struct inode *inode = filp->f_path.dentry->d_inode; 1473 struct inode *inode = filp->f_path.dentry->d_inode;
1353 struct cache_detail *cd = PDE(inode)->data; 1474 struct cache_detail *cd = PDE(inode)->data;
1354 1475
1355 lock_kernel(); 1476 return cache_ioctl(inode, filp, cmd, arg, cd);
1356 ret = cache_ioctl(inode, filp, cmd, arg, cd);
1357 unlock_kernel();
1358
1359 return ret;
1360} 1477}
1361 1478
1362static int cache_open_procfs(struct inode *inode, struct file *filp) 1479static int cache_open_procfs(struct inode *inode, struct file *filp)
@@ -1441,10 +1558,13 @@ static const struct file_operations cache_flush_operations_procfs = {
1441 .read = read_flush_procfs, 1558 .read = read_flush_procfs,
1442 .write = write_flush_procfs, 1559 .write = write_flush_procfs,
1443 .release = release_flush_procfs, 1560 .release = release_flush_procfs,
1561 .llseek = no_llseek,
1444}; 1562};
1445 1563
1446static void remove_cache_proc_entries(struct cache_detail *cd) 1564static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
1447{ 1565{
1566 struct sunrpc_net *sn;
1567
1448 if (cd->u.procfs.proc_ent == NULL) 1568 if (cd->u.procfs.proc_ent == NULL)
1449 return; 1569 return;
1450 if (cd->u.procfs.flush_ent) 1570 if (cd->u.procfs.flush_ent)
@@ -1454,15 +1574,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
1454 if (cd->u.procfs.content_ent) 1574 if (cd->u.procfs.content_ent)
1455 remove_proc_entry("content", cd->u.procfs.proc_ent); 1575 remove_proc_entry("content", cd->u.procfs.proc_ent);
1456 cd->u.procfs.proc_ent = NULL; 1576 cd->u.procfs.proc_ent = NULL;
1457 remove_proc_entry(cd->name, proc_net_rpc); 1577 sn = net_generic(net, sunrpc_net_id);
1578 remove_proc_entry(cd->name, sn->proc_net_rpc);
1458} 1579}
1459 1580
1460#ifdef CONFIG_PROC_FS 1581#ifdef CONFIG_PROC_FS
1461static int create_cache_proc_entries(struct cache_detail *cd) 1582static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1462{ 1583{
1463 struct proc_dir_entry *p; 1584 struct proc_dir_entry *p;
1585 struct sunrpc_net *sn;
1464 1586
1465 cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); 1587 sn = net_generic(net, sunrpc_net_id);
1588 cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
1466 if (cd->u.procfs.proc_ent == NULL) 1589 if (cd->u.procfs.proc_ent == NULL)
1467 goto out_nomem; 1590 goto out_nomem;
1468 cd->u.procfs.channel_ent = NULL; 1591 cd->u.procfs.channel_ent = NULL;
@@ -1493,11 +1616,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
1493 } 1616 }
1494 return 0; 1617 return 0;
1495out_nomem: 1618out_nomem:
1496 remove_cache_proc_entries(cd); 1619 remove_cache_proc_entries(cd, net);
1497 return -ENOMEM; 1620 return -ENOMEM;
1498} 1621}
1499#else /* CONFIG_PROC_FS */ 1622#else /* CONFIG_PROC_FS */
1500static int create_cache_proc_entries(struct cache_detail *cd) 1623static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1501{ 1624{
1502 return 0; 1625 return 0;
1503} 1626}
@@ -1508,23 +1631,33 @@ void __init cache_initialize(void)
1508 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); 1631 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
1509} 1632}
1510 1633
1511int cache_register(struct cache_detail *cd) 1634int cache_register_net(struct cache_detail *cd, struct net *net)
1512{ 1635{
1513 int ret; 1636 int ret;
1514 1637
1515 sunrpc_init_cache_detail(cd); 1638 sunrpc_init_cache_detail(cd);
1516 ret = create_cache_proc_entries(cd); 1639 ret = create_cache_proc_entries(cd, net);
1517 if (ret) 1640 if (ret)
1518 sunrpc_destroy_cache_detail(cd); 1641 sunrpc_destroy_cache_detail(cd);
1519 return ret; 1642 return ret;
1520} 1643}
1644
1645int cache_register(struct cache_detail *cd)
1646{
1647 return cache_register_net(cd, &init_net);
1648}
1521EXPORT_SYMBOL_GPL(cache_register); 1649EXPORT_SYMBOL_GPL(cache_register);
1522 1650
1523void cache_unregister(struct cache_detail *cd) 1651void cache_unregister_net(struct cache_detail *cd, struct net *net)
1524{ 1652{
1525 remove_cache_proc_entries(cd); 1653 remove_cache_proc_entries(cd, net);
1526 sunrpc_destroy_cache_detail(cd); 1654 sunrpc_destroy_cache_detail(cd);
1527} 1655}
1656
1657void cache_unregister(struct cache_detail *cd)
1658{
1659 cache_unregister_net(cd, &init_net);
1660}
1528EXPORT_SYMBOL_GPL(cache_unregister); 1661EXPORT_SYMBOL_GPL(cache_unregister);
1529 1662
1530static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, 1663static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
@@ -1555,13 +1688,8 @@ static long cache_ioctl_pipefs(struct file *filp,
1555{ 1688{
1556 struct inode *inode = filp->f_dentry->d_inode; 1689 struct inode *inode = filp->f_dentry->d_inode;
1557 struct cache_detail *cd = RPC_I(inode)->private; 1690 struct cache_detail *cd = RPC_I(inode)->private;
1558 long ret;
1559 1691
1560 lock_kernel(); 1692 return cache_ioctl(inode, filp, cmd, arg, cd);
1561 ret = cache_ioctl(inode, filp, cmd, arg, cd);
1562 unlock_kernel();
1563
1564 return ret;
1565} 1693}
1566 1694
1567static int cache_open_pipefs(struct inode *inode, struct file *filp) 1695static int cache_open_pipefs(struct inode *inode, struct file *filp)
@@ -1646,6 +1774,7 @@ const struct file_operations cache_flush_operations_pipefs = {
1646 .read = read_flush_pipefs, 1774 .read = read_flush_pipefs,
1647 .write = write_flush_pipefs, 1775 .write = write_flush_pipefs,
1648 .release = release_flush_pipefs, 1776 .release = release_flush_pipefs,
1777 .llseek = no_llseek,
1649}; 1778};
1650 1779
1651int sunrpc_cache_register_pipefs(struct dentry *parent, 1780int sunrpc_cache_register_pipefs(struct dentry *parent,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fa5549079d79..8c9141583d6f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -13,10 +13,6 @@
13 * and need to be refreshed, or when a packet was damaged in transit. 13 * and need to be refreshed, or when a packet was damaged in transit.
14 * This may be have to be moved to the VFS layer. 14 * This may be have to be moved to the VFS layer.
15 * 15 *
16 * NB: BSD uses a more intelligent approach to guessing when a request
17 * or reply has been lost by keeping the RTO estimate for each procedure.
18 * We currently make do with a constant timeout value.
19 *
20 * Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com> 16 * Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com>
21 * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de> 17 * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
22 */ 18 */
@@ -32,7 +28,9 @@
32#include <linux/slab.h> 28#include <linux/slab.h>
33#include <linux/utsname.h> 29#include <linux/utsname.h>
34#include <linux/workqueue.h> 30#include <linux/workqueue.h>
31#include <linux/in.h>
35#include <linux/in6.h> 32#include <linux/in6.h>
33#include <linux/un.h>
36 34
37#include <linux/sunrpc/clnt.h> 35#include <linux/sunrpc/clnt.h>
38#include <linux/sunrpc/rpc_pipe_fs.h> 36#include <linux/sunrpc/rpc_pipe_fs.h>
@@ -284,6 +282,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
284 struct rpc_xprt *xprt; 282 struct rpc_xprt *xprt;
285 struct rpc_clnt *clnt; 283 struct rpc_clnt *clnt;
286 struct xprt_create xprtargs = { 284 struct xprt_create xprtargs = {
285 .net = args->net,
287 .ident = args->protocol, 286 .ident = args->protocol,
288 .srcaddr = args->saddress, 287 .srcaddr = args->saddress,
289 .dstaddr = args->address, 288 .dstaddr = args->address,
@@ -297,22 +296,27 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
297 * up a string representation of the passed-in address. 296 * up a string representation of the passed-in address.
298 */ 297 */
299 if (args->servername == NULL) { 298 if (args->servername == NULL) {
299 struct sockaddr_un *sun =
300 (struct sockaddr_un *)args->address;
301 struct sockaddr_in *sin =
302 (struct sockaddr_in *)args->address;
303 struct sockaddr_in6 *sin6 =
304 (struct sockaddr_in6 *)args->address;
305
300 servername[0] = '\0'; 306 servername[0] = '\0';
301 switch (args->address->sa_family) { 307 switch (args->address->sa_family) {
302 case AF_INET: { 308 case AF_LOCAL:
303 struct sockaddr_in *sin = 309 snprintf(servername, sizeof(servername), "%s",
304 (struct sockaddr_in *)args->address; 310 sun->sun_path);
311 break;
312 case AF_INET:
305 snprintf(servername, sizeof(servername), "%pI4", 313 snprintf(servername, sizeof(servername), "%pI4",
306 &sin->sin_addr.s_addr); 314 &sin->sin_addr.s_addr);
307 break; 315 break;
308 } 316 case AF_INET6:
309 case AF_INET6: {
310 struct sockaddr_in6 *sin =
311 (struct sockaddr_in6 *)args->address;
312 snprintf(servername, sizeof(servername), "%pI6", 317 snprintf(servername, sizeof(servername), "%pI6",
313 &sin->sin6_addr); 318 &sin6->sin6_addr);
314 break; 319 break;
315 }
316 default: 320 default:
317 /* caller wants default server name, but 321 /* caller wants default server name, but
318 * address family isn't recognized. */ 322 * address family isn't recognized. */
@@ -435,7 +439,9 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
435 if (!(rovr->tk_flags & RPC_TASK_KILLED)) { 439 if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
436 rovr->tk_flags |= RPC_TASK_KILLED; 440 rovr->tk_flags |= RPC_TASK_KILLED;
437 rpc_exit(rovr, -EIO); 441 rpc_exit(rovr, -EIO);
438 rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr); 442 if (RPC_IS_QUEUED(rovr))
443 rpc_wake_up_queued_task(rovr->tk_waitqueue,
444 rovr);
439 } 445 }
440 } 446 }
441 spin_unlock(&clnt->cl_lock); 447 spin_unlock(&clnt->cl_lock);
@@ -596,6 +602,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
596 } 602 }
597} 603}
598 604
605void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
606{
607 rpc_task_release_client(task);
608 rpc_task_set_client(task, clnt);
609}
610EXPORT_SYMBOL_GPL(rpc_task_reset_client);
611
612
599static void 613static void
600rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) 614rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
601{ 615{
@@ -635,12 +649,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
635 rpc_task_set_client(task, task_setup_data->rpc_client); 649 rpc_task_set_client(task, task_setup_data->rpc_client);
636 rpc_task_set_rpc_message(task, task_setup_data->rpc_message); 650 rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
637 651
638 if (task->tk_status != 0) {
639 int ret = task->tk_status;
640 rpc_put_task(task);
641 return ERR_PTR(ret);
642 }
643
644 if (task->tk_action == NULL) 652 if (task->tk_action == NULL)
645 rpc_call_start(task); 653 rpc_call_start(task);
646 654
@@ -988,20 +996,26 @@ call_refreshresult(struct rpc_task *task)
988 dprint_status(task); 996 dprint_status(task);
989 997
990 task->tk_status = 0; 998 task->tk_status = 0;
991 task->tk_action = call_allocate; 999 task->tk_action = call_refresh;
992 if (status >= 0 && rpcauth_uptodatecred(task))
993 return;
994 switch (status) { 1000 switch (status) {
995 case -EACCES: 1001 case 0:
996 rpc_exit(task, -EACCES); 1002 if (rpcauth_uptodatecred(task))
997 return; 1003 task->tk_action = call_allocate;
998 case -ENOMEM:
999 rpc_exit(task, -ENOMEM);
1000 return; 1004 return;
1001 case -ETIMEDOUT: 1005 case -ETIMEDOUT:
1002 rpc_delay(task, 3*HZ); 1006 rpc_delay(task, 3*HZ);
1007 case -EAGAIN:
1008 status = -EACCES;
1009 if (!task->tk_cred_retry)
1010 break;
1011 task->tk_cred_retry--;
1012 dprintk("RPC: %5u %s: retry refresh creds\n",
1013 task->tk_pid, __func__);
1014 return;
1003 } 1015 }
1004 task->tk_action = call_refresh; 1016 dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
1017 task->tk_pid, __func__, status);
1018 rpc_exit(task, status);
1005} 1019}
1006 1020
1007/* 1021/*
@@ -1047,7 +1061,7 @@ call_allocate(struct rpc_task *task)
1047 1061
1048 dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); 1062 dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
1049 1063
1050 if (RPC_IS_ASYNC(task) || !signalled()) { 1064 if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) {
1051 task->tk_action = call_allocate; 1065 task->tk_action = call_allocate;
1052 rpc_delay(task, HZ>>4); 1066 rpc_delay(task, HZ>>4);
1053 return; 1067 return;
@@ -1088,7 +1102,7 @@ static void
1088rpc_xdr_encode(struct rpc_task *task) 1102rpc_xdr_encode(struct rpc_task *task)
1089{ 1103{
1090 struct rpc_rqst *req = task->tk_rqstp; 1104 struct rpc_rqst *req = task->tk_rqstp;
1091 kxdrproc_t encode; 1105 kxdreproc_t encode;
1092 __be32 *p; 1106 __be32 *p;
1093 1107
1094 dprint_status(task); 1108 dprint_status(task);
@@ -1161,6 +1175,9 @@ call_bind_status(struct rpc_task *task)
1161 status = -EOPNOTSUPP; 1175 status = -EOPNOTSUPP;
1162 break; 1176 break;
1163 } 1177 }
1178 if (task->tk_rebind_retry == 0)
1179 break;
1180 task->tk_rebind_retry--;
1164 rpc_delay(task, 3*HZ); 1181 rpc_delay(task, 3*HZ);
1165 goto retry_timeout; 1182 goto retry_timeout;
1166 case -ETIMEDOUT: 1183 case -ETIMEDOUT:
@@ -1497,7 +1514,10 @@ call_timeout(struct rpc_task *task)
1497 if (clnt->cl_chatty) 1514 if (clnt->cl_chatty)
1498 printk(KERN_NOTICE "%s: server %s not responding, timed out\n", 1515 printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
1499 clnt->cl_protname, clnt->cl_server); 1516 clnt->cl_protname, clnt->cl_server);
1500 rpc_exit(task, -EIO); 1517 if (task->tk_flags & RPC_TASK_TIMEOUT)
1518 rpc_exit(task, -ETIMEDOUT);
1519 else
1520 rpc_exit(task, -EIO);
1501 return; 1521 return;
1502 } 1522 }
1503 1523
@@ -1528,7 +1548,7 @@ call_decode(struct rpc_task *task)
1528{ 1548{
1529 struct rpc_clnt *clnt = task->tk_client; 1549 struct rpc_clnt *clnt = task->tk_client;
1530 struct rpc_rqst *req = task->tk_rqstp; 1550 struct rpc_rqst *req = task->tk_rqstp;
1531 kxdrproc_t decode = task->tk_msg.rpc_proc->p_decode; 1551 kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
1532 __be32 *p; 1552 __be32 *p;
1533 1553
1534 dprintk("RPC: %5u call_decode (status %d)\n", 1554 dprintk("RPC: %5u call_decode (status %d)\n",
@@ -1675,7 +1695,7 @@ rpc_verify_header(struct rpc_task *task)
1675 rpcauth_invalcred(task); 1695 rpcauth_invalcred(task);
1676 /* Ensure we obtain a new XID! */ 1696 /* Ensure we obtain a new XID! */
1677 xprt_release(task); 1697 xprt_release(task);
1678 task->tk_action = call_refresh; 1698 task->tk_action = call_reserve;
1679 goto out_retry; 1699 goto out_retry;
1680 case RPC_AUTH_BADCRED: 1700 case RPC_AUTH_BADCRED:
1681 case RPC_AUTH_BADVERF: 1701 case RPC_AUTH_BADVERF:
@@ -1769,12 +1789,11 @@ out_overflow:
1769 goto out_garbage; 1789 goto out_garbage;
1770} 1790}
1771 1791
1772static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj) 1792static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
1773{ 1793{
1774 return 0;
1775} 1794}
1776 1795
1777static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj) 1796static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
1778{ 1797{
1779 return 0; 1798 return 0;
1780} 1799}
@@ -1823,23 +1842,15 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
1823 const struct rpc_task *task) 1842 const struct rpc_task *task)
1824{ 1843{
1825 const char *rpc_waitq = "none"; 1844 const char *rpc_waitq = "none";
1826 char *p, action[KSYM_SYMBOL_LEN];
1827 1845
1828 if (RPC_IS_QUEUED(task)) 1846 if (RPC_IS_QUEUED(task))
1829 rpc_waitq = rpc_qname(task->tk_waitqueue); 1847 rpc_waitq = rpc_qname(task->tk_waitqueue);
1830 1848
1831 /* map tk_action pointer to a function name; then trim off 1849 printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
1832 * the "+0x0 [sunrpc]" */
1833 sprint_symbol(action, (unsigned long)task->tk_action);
1834 p = strchr(action, '+');
1835 if (p)
1836 *p = '\0';
1837
1838 printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n",
1839 task->tk_pid, task->tk_flags, task->tk_status, 1850 task->tk_pid, task->tk_flags, task->tk_status,
1840 clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, 1851 clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
1841 clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), 1852 clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task),
1842 action, rpc_waitq); 1853 task->tk_action, rpc_waitq);
1843} 1854}
1844 1855
1845void rpc_show_tasks(void) 1856void rpc_show_tasks(void)
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
new file mode 100644
index 000000000000..d013bf211cae
--- /dev/null
+++ b/net/sunrpc/netns.h
@@ -0,0 +1,19 @@
1#ifndef __SUNRPC_NETNS_H__
2#define __SUNRPC_NETNS_H__
3
4#include <net/net_namespace.h>
5#include <net/netns/generic.h>
6
7struct cache_detail;
8
9struct sunrpc_net {
10 struct proc_dir_entry *proc_net_rpc;
11 struct cache_detail *ip_map_cache;
12};
13
14extern int sunrpc_net_id;
15
16int ip_map_cache_create(struct net *);
17void ip_map_cache_destroy(struct net *);
18
19#endif
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 8c8eef2b8f26..72bc53683965 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -27,9 +27,8 @@
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/sunrpc/rpc_pipe_fs.h> 28#include <linux/sunrpc/rpc_pipe_fs.h>
29#include <linux/sunrpc/cache.h> 29#include <linux/sunrpc/cache.h>
30#include <linux/smp_lock.h>
31 30
32static struct vfsmount *rpc_mount __read_mostly; 31static struct vfsmount *rpc_mnt __read_mostly;
33static int rpc_mount_count; 32static int rpc_mount_count;
34 33
35static struct file_system_type rpc_pipe_fs_type; 34static struct file_system_type rpc_pipe_fs_type;
@@ -163,11 +162,19 @@ rpc_alloc_inode(struct super_block *sb)
163} 162}
164 163
165static void 164static void
166rpc_destroy_inode(struct inode *inode) 165rpc_i_callback(struct rcu_head *head)
167{ 166{
167 struct inode *inode = container_of(head, struct inode, i_rcu);
168 INIT_LIST_HEAD(&inode->i_dentry);
168 kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); 169 kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
169} 170}
170 171
172static void
173rpc_destroy_inode(struct inode *inode)
174{
175 call_rcu(&inode->i_rcu, rpc_i_callback);
176}
177
171static int 178static int
172rpc_pipe_open(struct inode *inode, struct file *filp) 179rpc_pipe_open(struct inode *inode, struct file *filp)
173{ 180{
@@ -204,7 +211,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
204 mutex_lock(&inode->i_mutex); 211 mutex_lock(&inode->i_mutex);
205 if (rpci->ops == NULL) 212 if (rpci->ops == NULL)
206 goto out; 213 goto out;
207 msg = (struct rpc_pipe_msg *)filp->private_data; 214 msg = filp->private_data;
208 if (msg != NULL) { 215 if (msg != NULL) {
209 spin_lock(&inode->i_lock); 216 spin_lock(&inode->i_lock);
210 msg->errno = -EAGAIN; 217 msg->errno = -EAGAIN;
@@ -309,40 +316,33 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
309 return mask; 316 return mask;
310} 317}
311 318
312static int 319static long
313rpc_pipe_ioctl_unlocked(struct file *filp, unsigned int cmd, unsigned long arg) 320rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
314{ 321{
315 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); 322 struct inode *inode = filp->f_path.dentry->d_inode;
323 struct rpc_inode *rpci = RPC_I(inode);
316 int len; 324 int len;
317 325
318 switch (cmd) { 326 switch (cmd) {
319 case FIONREAD: 327 case FIONREAD:
320 if (rpci->ops == NULL) 328 spin_lock(&inode->i_lock);
329 if (rpci->ops == NULL) {
330 spin_unlock(&inode->i_lock);
321 return -EPIPE; 331 return -EPIPE;
332 }
322 len = rpci->pipelen; 333 len = rpci->pipelen;
323 if (filp->private_data) { 334 if (filp->private_data) {
324 struct rpc_pipe_msg *msg; 335 struct rpc_pipe_msg *msg;
325 msg = (struct rpc_pipe_msg *)filp->private_data; 336 msg = filp->private_data;
326 len += msg->len - msg->copied; 337 len += msg->len - msg->copied;
327 } 338 }
339 spin_unlock(&inode->i_lock);
328 return put_user(len, (int __user *)arg); 340 return put_user(len, (int __user *)arg);
329 default: 341 default:
330 return -EINVAL; 342 return -EINVAL;
331 } 343 }
332} 344}
333 345
334static long
335rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
336{
337 long ret;
338
339 lock_kernel();
340 ret = rpc_pipe_ioctl_unlocked(filp, cmd, arg);
341 unlock_kernel();
342
343 return ret;
344}
345
346static const struct file_operations rpc_pipe_fops = { 346static const struct file_operations rpc_pipe_fops = {
347 .owner = THIS_MODULE, 347 .owner = THIS_MODULE,
348 .llseek = no_llseek, 348 .llseek = no_llseek,
@@ -425,20 +425,20 @@ struct vfsmount *rpc_get_mount(void)
425{ 425{
426 int err; 426 int err;
427 427
428 err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count); 428 err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count);
429 if (err != 0) 429 if (err != 0)
430 return ERR_PTR(err); 430 return ERR_PTR(err);
431 return rpc_mount; 431 return rpc_mnt;
432} 432}
433EXPORT_SYMBOL_GPL(rpc_get_mount); 433EXPORT_SYMBOL_GPL(rpc_get_mount);
434 434
435void rpc_put_mount(void) 435void rpc_put_mount(void)
436{ 436{
437 simple_release_fs(&rpc_mount, &rpc_mount_count); 437 simple_release_fs(&rpc_mnt, &rpc_mount_count);
438} 438}
439EXPORT_SYMBOL_GPL(rpc_put_mount); 439EXPORT_SYMBOL_GPL(rpc_put_mount);
440 440
441static int rpc_delete_dentry(struct dentry *dentry) 441static int rpc_delete_dentry(const struct dentry *dentry)
442{ 442{
443 return 1; 443 return 1;
444} 444}
@@ -453,6 +453,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
453 struct inode *inode = new_inode(sb); 453 struct inode *inode = new_inode(sb);
454 if (!inode) 454 if (!inode)
455 return NULL; 455 return NULL;
456 inode->i_ino = get_next_ino();
456 inode->i_mode = mode; 457 inode->i_mode = mode;
457 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 458 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
458 switch(mode & S_IFMT) { 459 switch(mode & S_IFMT) {
@@ -473,7 +474,7 @@ static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
473{ 474{
474 struct inode *inode; 475 struct inode *inode;
475 476
476 BUG_ON(!d_unhashed(dentry)); 477 d_drop(dentry);
477 inode = rpc_get_inode(dir->i_sb, mode); 478 inode = rpc_get_inode(dir->i_sb, mode);
478 if (!inode) 479 if (!inode)
479 goto out_err; 480 goto out_err;
@@ -590,7 +591,7 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent,
590 } 591 }
591 } 592 }
592 if (!dentry->d_inode) 593 if (!dentry->d_inode)
593 dentry->d_op = &rpc_dentry_operations; 594 d_set_d_op(dentry, &rpc_dentry_operations);
594out_err: 595out_err:
595 return dentry; 596 return dentry;
596} 597}
@@ -1025,17 +1026,17 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1025 return 0; 1026 return 0;
1026} 1027}
1027 1028
1028static int 1029static struct dentry *
1029rpc_get_sb(struct file_system_type *fs_type, 1030rpc_mount(struct file_system_type *fs_type,
1030 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1031 int flags, const char *dev_name, void *data)
1031{ 1032{
1032 return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt); 1033 return mount_single(fs_type, flags, data, rpc_fill_super);
1033} 1034}
1034 1035
1035static struct file_system_type rpc_pipe_fs_type = { 1036static struct file_system_type rpc_pipe_fs_type = {
1036 .owner = THIS_MODULE, 1037 .owner = THIS_MODULE,
1037 .name = "rpc_pipefs", 1038 .name = "rpc_pipefs",
1038 .get_sb = rpc_get_sb, 1039 .mount = rpc_mount,
1039 .kill_sb = kill_litter_super, 1040 .kill_sb = kill_litter_super,
1040}; 1041};
1041 1042
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index dac219a56ae1..e45d2fbbe5a8 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -16,6 +16,7 @@
16 16
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/socket.h> 18#include <linux/socket.h>
19#include <linux/un.h>
19#include <linux/in.h> 20#include <linux/in.h>
20#include <linux/in6.h> 21#include <linux/in6.h>
21#include <linux/kernel.h> 22#include <linux/kernel.h>
@@ -32,6 +33,8 @@
32# define RPCDBG_FACILITY RPCDBG_BIND 33# define RPCDBG_FACILITY RPCDBG_BIND
33#endif 34#endif
34 35
36#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock"
37
35#define RPCBIND_PROGRAM (100000u) 38#define RPCBIND_PROGRAM (100000u)
36#define RPCBIND_PORT (111u) 39#define RPCBIND_PORT (111u)
37 40
@@ -57,10 +60,6 @@ enum {
57 RPCBPROC_GETSTAT, 60 RPCBPROC_GETSTAT,
58}; 61};
59 62
60#define RPCB_HIGHPROC_2 RPCBPROC_CALLIT
61#define RPCB_HIGHPROC_3 RPCBPROC_TADDR2UADDR
62#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT
63
64/* 63/*
65 * r_owner 64 * r_owner
66 * 65 *
@@ -162,21 +161,71 @@ static void rpcb_map_release(void *data)
162 kfree(map); 161 kfree(map);
163} 162}
164 163
165static const struct sockaddr_in rpcb_inaddr_loopback = { 164/*
166 .sin_family = AF_INET, 165 * Returns zero on success, otherwise a negative errno value
167 .sin_addr.s_addr = htonl(INADDR_LOOPBACK), 166 * is returned.
168 .sin_port = htons(RPCBIND_PORT), 167 */
169}; 168static int rpcb_create_local_unix(void)
169{
170 static const struct sockaddr_un rpcb_localaddr_rpcbind = {
171 .sun_family = AF_LOCAL,
172 .sun_path = RPCBIND_SOCK_PATHNAME,
173 };
174 struct rpc_create_args args = {
175 .net = &init_net,
176 .protocol = XPRT_TRANSPORT_LOCAL,
177 .address = (struct sockaddr *)&rpcb_localaddr_rpcbind,
178 .addrsize = sizeof(rpcb_localaddr_rpcbind),
179 .servername = "localhost",
180 .program = &rpcb_program,
181 .version = RPCBVERS_2,
182 .authflavor = RPC_AUTH_NULL,
183 };
184 struct rpc_clnt *clnt, *clnt4;
185 int result = 0;
186
187 /*
188 * Because we requested an RPC PING at transport creation time,
189 * this works only if the user space portmapper is rpcbind, and
190 * it's listening on AF_LOCAL on the named socket.
191 */
192 clnt = rpc_create(&args);
193 if (IS_ERR(clnt)) {
194 dprintk("RPC: failed to create AF_LOCAL rpcbind "
195 "client (errno %ld).\n", PTR_ERR(clnt));
196 result = -PTR_ERR(clnt);
197 goto out;
198 }
199
200 clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
201 if (IS_ERR(clnt4)) {
202 dprintk("RPC: failed to bind second program to "
203 "rpcbind v4 client (errno %ld).\n",
204 PTR_ERR(clnt4));
205 clnt4 = NULL;
206 }
207
208 /* Protected by rpcb_create_local_mutex */
209 rpcb_local_clnt = clnt;
210 rpcb_local_clnt4 = clnt4;
170 211
171static DEFINE_MUTEX(rpcb_create_local_mutex); 212out:
213 return result;
214}
172 215
173/* 216/*
174 * Returns zero on success, otherwise a negative errno value 217 * Returns zero on success, otherwise a negative errno value
175 * is returned. 218 * is returned.
176 */ 219 */
177static int rpcb_create_local(void) 220static int rpcb_create_local_net(void)
178{ 221{
222 static const struct sockaddr_in rpcb_inaddr_loopback = {
223 .sin_family = AF_INET,
224 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
225 .sin_port = htons(RPCBIND_PORT),
226 };
179 struct rpc_create_args args = { 227 struct rpc_create_args args = {
228 .net = &init_net,
180 .protocol = XPRT_TRANSPORT_TCP, 229 .protocol = XPRT_TRANSPORT_TCP,
181 .address = (struct sockaddr *)&rpcb_inaddr_loopback, 230 .address = (struct sockaddr *)&rpcb_inaddr_loopback,
182 .addrsize = sizeof(rpcb_inaddr_loopback), 231 .addrsize = sizeof(rpcb_inaddr_loopback),
@@ -189,13 +238,6 @@ static int rpcb_create_local(void)
189 struct rpc_clnt *clnt, *clnt4; 238 struct rpc_clnt *clnt, *clnt4;
190 int result = 0; 239 int result = 0;
191 240
192 if (rpcb_local_clnt)
193 return result;
194
195 mutex_lock(&rpcb_create_local_mutex);
196 if (rpcb_local_clnt)
197 goto out;
198
199 clnt = rpc_create(&args); 241 clnt = rpc_create(&args);
200 if (IS_ERR(clnt)) { 242 if (IS_ERR(clnt)) {
201 dprintk("RPC: failed to create local rpcbind " 243 dprintk("RPC: failed to create local rpcbind "
@@ -211,15 +253,40 @@ static int rpcb_create_local(void)
211 */ 253 */
212 clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); 254 clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
213 if (IS_ERR(clnt4)) { 255 if (IS_ERR(clnt4)) {
214 dprintk("RPC: failed to create local rpcbind v4 " 256 dprintk("RPC: failed to bind second program to "
215 "cleint (errno %ld).\n", PTR_ERR(clnt4)); 257 "rpcbind v4 client (errno %ld).\n",
258 PTR_ERR(clnt4));
216 clnt4 = NULL; 259 clnt4 = NULL;
217 } 260 }
218 261
262 /* Protected by rpcb_create_local_mutex */
219 rpcb_local_clnt = clnt; 263 rpcb_local_clnt = clnt;
220 rpcb_local_clnt4 = clnt4; 264 rpcb_local_clnt4 = clnt4;
221 265
222out: 266out:
267 return result;
268}
269
270/*
271 * Returns zero on success, otherwise a negative errno value
272 * is returned.
273 */
274static int rpcb_create_local(void)
275{
276 static DEFINE_MUTEX(rpcb_create_local_mutex);
277 int result = 0;
278
279 if (rpcb_local_clnt)
280 return result;
281
282 mutex_lock(&rpcb_create_local_mutex);
283 if (rpcb_local_clnt)
284 goto out;
285
286 if (rpcb_create_local_unix() != 0)
287 result = rpcb_create_local_net();
288
289out:
223 mutex_unlock(&rpcb_create_local_mutex); 290 mutex_unlock(&rpcb_create_local_mutex);
224 return result; 291 return result;
225} 292}
@@ -228,6 +295,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
228 size_t salen, int proto, u32 version) 295 size_t salen, int proto, u32 version)
229{ 296{
230 struct rpc_create_args args = { 297 struct rpc_create_args args = {
298 .net = &init_net,
231 .protocol = proto, 299 .protocol = proto,
232 .address = srvaddr, 300 .address = srvaddr,
233 .addrsize = salen, 301 .addrsize = salen,
@@ -247,7 +315,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
247 ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); 315 ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
248 break; 316 break;
249 default: 317 default:
250 return NULL; 318 return ERR_PTR(-EAFNOSUPPORT);
251 } 319 }
252 320
253 return rpc_create(&args); 321 return rpc_create(&args);
@@ -475,57 +543,6 @@ int rpcb_v4_register(const u32 program, const u32 version,
475 return -EAFNOSUPPORT; 543 return -EAFNOSUPPORT;
476} 544}
477 545
478/**
479 * rpcb_getport_sync - obtain the port for an RPC service on a given host
480 * @sin: address of remote peer
481 * @prog: RPC program number to bind
482 * @vers: RPC version number to bind
483 * @prot: transport protocol to use to make this request
484 *
485 * Return value is the requested advertised port number,
486 * or a negative errno value.
487 *
488 * Called from outside the RPC client in a synchronous task context.
489 * Uses default timeout parameters specified by underlying transport.
490 *
491 * XXX: Needs to support IPv6
492 */
493int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
494{
495 struct rpcbind_args map = {
496 .r_prog = prog,
497 .r_vers = vers,
498 .r_prot = prot,
499 .r_port = 0,
500 };
501 struct rpc_message msg = {
502 .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT],
503 .rpc_argp = &map,
504 .rpc_resp = &map,
505 };
506 struct rpc_clnt *rpcb_clnt;
507 int status;
508
509 dprintk("RPC: %s(%pI4, %u, %u, %d)\n",
510 __func__, &sin->sin_addr.s_addr, prog, vers, prot);
511
512 rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
513 sizeof(*sin), prot, RPCBVERS_2);
514 if (IS_ERR(rpcb_clnt))
515 return PTR_ERR(rpcb_clnt);
516
517 status = rpc_call_sync(rpcb_clnt, &msg, 0);
518 rpc_shutdown_client(rpcb_clnt);
519
520 if (status >= 0) {
521 if (map.r_port != 0)
522 return map.r_port;
523 status = -EACCES;
524 }
525 return status;
526}
527EXPORT_SYMBOL_GPL(rpcb_getport_sync);
528
529static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc) 546static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
530{ 547{
531 struct rpc_message msg = { 548 struct rpc_message msg = {
@@ -580,7 +597,7 @@ void rpcb_getport_async(struct rpc_task *task)
580 u32 bind_version; 597 u32 bind_version;
581 struct rpc_xprt *xprt; 598 struct rpc_xprt *xprt;
582 struct rpc_clnt *rpcb_clnt; 599 struct rpc_clnt *rpcb_clnt;
583 static struct rpcbind_args *map; 600 struct rpcbind_args *map;
584 struct rpc_task *child; 601 struct rpc_task *child;
585 struct sockaddr_storage addr; 602 struct sockaddr_storage addr;
586 struct sockaddr *sap = (struct sockaddr *)&addr; 603 struct sockaddr *sap = (struct sockaddr *)&addr;
@@ -741,46 +758,37 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
741 * XDR functions for rpcbind 758 * XDR functions for rpcbind
742 */ 759 */
743 760
744static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p, 761static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
745 const struct rpcbind_args *rpcb) 762 const struct rpcbind_args *rpcb)
746{ 763{
747 struct rpc_task *task = req->rq_task; 764 struct rpc_task *task = req->rq_task;
748 struct xdr_stream xdr; 765 __be32 *p;
749 766
750 dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", 767 dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
751 task->tk_pid, task->tk_msg.rpc_proc->p_name, 768 task->tk_pid, task->tk_msg.rpc_proc->p_name,
752 rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); 769 rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
753 770
754 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 771 p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2);
755 772 *p++ = cpu_to_be32(rpcb->r_prog);
756 p = xdr_reserve_space(&xdr, sizeof(__be32) * RPCB_mappingargs_sz); 773 *p++ = cpu_to_be32(rpcb->r_vers);
757 if (unlikely(p == NULL)) 774 *p++ = cpu_to_be32(rpcb->r_prot);
758 return -EIO; 775 *p = cpu_to_be32(rpcb->r_port);
759
760 *p++ = htonl(rpcb->r_prog);
761 *p++ = htonl(rpcb->r_vers);
762 *p++ = htonl(rpcb->r_prot);
763 *p = htonl(rpcb->r_port);
764
765 return 0;
766} 776}
767 777
768static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, 778static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
769 struct rpcbind_args *rpcb) 779 struct rpcbind_args *rpcb)
770{ 780{
771 struct rpc_task *task = req->rq_task; 781 struct rpc_task *task = req->rq_task;
772 struct xdr_stream xdr;
773 unsigned long port; 782 unsigned long port;
774 783 __be32 *p;
775 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
776 784
777 rpcb->r_port = 0; 785 rpcb->r_port = 0;
778 786
779 p = xdr_inline_decode(&xdr, sizeof(__be32)); 787 p = xdr_inline_decode(xdr, 4);
780 if (unlikely(p == NULL)) 788 if (unlikely(p == NULL))
781 return -EIO; 789 return -EIO;
782 790
783 port = ntohl(*p); 791 port = be32_to_cpup(p);
784 dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, 792 dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
785 task->tk_msg.rpc_proc->p_name, port); 793 task->tk_msg.rpc_proc->p_name, port);
786 if (unlikely(port > USHRT_MAX)) 794 if (unlikely(port > USHRT_MAX))
@@ -790,20 +798,18 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
790 return 0; 798 return 0;
791} 799}
792 800
793static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p, 801static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
794 unsigned int *boolp) 802 unsigned int *boolp)
795{ 803{
796 struct rpc_task *task = req->rq_task; 804 struct rpc_task *task = req->rq_task;
797 struct xdr_stream xdr; 805 __be32 *p;
798
799 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
800 806
801 p = xdr_inline_decode(&xdr, sizeof(__be32)); 807 p = xdr_inline_decode(xdr, 4);
802 if (unlikely(p == NULL)) 808 if (unlikely(p == NULL))
803 return -EIO; 809 return -EIO;
804 810
805 *boolp = 0; 811 *boolp = 0;
806 if (*p) 812 if (*p != xdr_zero)
807 *boolp = 1; 813 *boolp = 1;
808 814
809 dprintk("RPC: %5u RPCB_%s call %s\n", 815 dprintk("RPC: %5u RPCB_%s call %s\n",
@@ -812,73 +818,53 @@ static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p,
812 return 0; 818 return 0;
813} 819}
814 820
815static int encode_rpcb_string(struct xdr_stream *xdr, const char *string, 821static void encode_rpcb_string(struct xdr_stream *xdr, const char *string,
816 const u32 maxstrlen) 822 const u32 maxstrlen)
817{ 823{
818 u32 len;
819 __be32 *p; 824 __be32 *p;
825 u32 len;
820 826
821 if (unlikely(string == NULL))
822 return -EIO;
823 len = strlen(string); 827 len = strlen(string);
824 if (unlikely(len > maxstrlen)) 828 BUG_ON(len > maxstrlen);
825 return -EIO; 829 p = xdr_reserve_space(xdr, 4 + len);
826
827 p = xdr_reserve_space(xdr, sizeof(__be32) + len);
828 if (unlikely(p == NULL))
829 return -EIO;
830 xdr_encode_opaque(p, string, len); 830 xdr_encode_opaque(p, string, len);
831
832 return 0;
833} 831}
834 832
835static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p, 833static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
836 const struct rpcbind_args *rpcb) 834 const struct rpcbind_args *rpcb)
837{ 835{
838 struct rpc_task *task = req->rq_task; 836 struct rpc_task *task = req->rq_task;
839 struct xdr_stream xdr; 837 __be32 *p;
840 838
841 dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", 839 dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
842 task->tk_pid, task->tk_msg.rpc_proc->p_name, 840 task->tk_pid, task->tk_msg.rpc_proc->p_name,
843 rpcb->r_prog, rpcb->r_vers, 841 rpcb->r_prog, rpcb->r_vers,
844 rpcb->r_netid, rpcb->r_addr); 842 rpcb->r_netid, rpcb->r_addr);
845 843
846 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 844 p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2);
847 845 *p++ = cpu_to_be32(rpcb->r_prog);
848 p = xdr_reserve_space(&xdr, 846 *p = cpu_to_be32(rpcb->r_vers);
849 sizeof(__be32) * (RPCB_program_sz + RPCB_version_sz));
850 if (unlikely(p == NULL))
851 return -EIO;
852 *p++ = htonl(rpcb->r_prog);
853 *p = htonl(rpcb->r_vers);
854 847
855 if (encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN)) 848 encode_rpcb_string(xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN);
856 return -EIO; 849 encode_rpcb_string(xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN);
857 if (encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN)) 850 encode_rpcb_string(xdr, rpcb->r_owner, RPCB_MAXOWNERLEN);
858 return -EIO;
859 if (encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN))
860 return -EIO;
861
862 return 0;
863} 851}
864 852
865static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p, 853static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
866 struct rpcbind_args *rpcb) 854 struct rpcbind_args *rpcb)
867{ 855{
868 struct sockaddr_storage address; 856 struct sockaddr_storage address;
869 struct sockaddr *sap = (struct sockaddr *)&address; 857 struct sockaddr *sap = (struct sockaddr *)&address;
870 struct rpc_task *task = req->rq_task; 858 struct rpc_task *task = req->rq_task;
871 struct xdr_stream xdr; 859 __be32 *p;
872 u32 len; 860 u32 len;
873 861
874 rpcb->r_port = 0; 862 rpcb->r_port = 0;
875 863
876 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 864 p = xdr_inline_decode(xdr, 4);
877
878 p = xdr_inline_decode(&xdr, sizeof(__be32));
879 if (unlikely(p == NULL)) 865 if (unlikely(p == NULL))
880 goto out_fail; 866 goto out_fail;
881 len = ntohl(*p); 867 len = be32_to_cpup(p);
882 868
883 /* 869 /*
884 * If the returned universal address is a null string, 870 * If the returned universal address is a null string,
@@ -893,7 +879,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
893 if (unlikely(len > RPCBIND_MAXUADDRLEN)) 879 if (unlikely(len > RPCBIND_MAXUADDRLEN))
894 goto out_fail; 880 goto out_fail;
895 881
896 p = xdr_inline_decode(&xdr, len); 882 p = xdr_inline_decode(xdr, len);
897 if (unlikely(p == NULL)) 883 if (unlikely(p == NULL))
898 goto out_fail; 884 goto out_fail;
899 dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, 885 dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid,
@@ -919,8 +905,8 @@ out_fail:
919static struct rpc_procinfo rpcb_procedures2[] = { 905static struct rpc_procinfo rpcb_procedures2[] = {
920 [RPCBPROC_SET] = { 906 [RPCBPROC_SET] = {
921 .p_proc = RPCBPROC_SET, 907 .p_proc = RPCBPROC_SET,
922 .p_encode = (kxdrproc_t)rpcb_enc_mapping, 908 .p_encode = (kxdreproc_t)rpcb_enc_mapping,
923 .p_decode = (kxdrproc_t)rpcb_dec_set, 909 .p_decode = (kxdrdproc_t)rpcb_dec_set,
924 .p_arglen = RPCB_mappingargs_sz, 910 .p_arglen = RPCB_mappingargs_sz,
925 .p_replen = RPCB_setres_sz, 911 .p_replen = RPCB_setres_sz,
926 .p_statidx = RPCBPROC_SET, 912 .p_statidx = RPCBPROC_SET,
@@ -929,8 +915,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
929 }, 915 },
930 [RPCBPROC_UNSET] = { 916 [RPCBPROC_UNSET] = {
931 .p_proc = RPCBPROC_UNSET, 917 .p_proc = RPCBPROC_UNSET,
932 .p_encode = (kxdrproc_t)rpcb_enc_mapping, 918 .p_encode = (kxdreproc_t)rpcb_enc_mapping,
933 .p_decode = (kxdrproc_t)rpcb_dec_set, 919 .p_decode = (kxdrdproc_t)rpcb_dec_set,
934 .p_arglen = RPCB_mappingargs_sz, 920 .p_arglen = RPCB_mappingargs_sz,
935 .p_replen = RPCB_setres_sz, 921 .p_replen = RPCB_setres_sz,
936 .p_statidx = RPCBPROC_UNSET, 922 .p_statidx = RPCBPROC_UNSET,
@@ -939,8 +925,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
939 }, 925 },
940 [RPCBPROC_GETPORT] = { 926 [RPCBPROC_GETPORT] = {
941 .p_proc = RPCBPROC_GETPORT, 927 .p_proc = RPCBPROC_GETPORT,
942 .p_encode = (kxdrproc_t)rpcb_enc_mapping, 928 .p_encode = (kxdreproc_t)rpcb_enc_mapping,
943 .p_decode = (kxdrproc_t)rpcb_dec_getport, 929 .p_decode = (kxdrdproc_t)rpcb_dec_getport,
944 .p_arglen = RPCB_mappingargs_sz, 930 .p_arglen = RPCB_mappingargs_sz,
945 .p_replen = RPCB_getportres_sz, 931 .p_replen = RPCB_getportres_sz,
946 .p_statidx = RPCBPROC_GETPORT, 932 .p_statidx = RPCBPROC_GETPORT,
@@ -952,8 +938,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
952static struct rpc_procinfo rpcb_procedures3[] = { 938static struct rpc_procinfo rpcb_procedures3[] = {
953 [RPCBPROC_SET] = { 939 [RPCBPROC_SET] = {
954 .p_proc = RPCBPROC_SET, 940 .p_proc = RPCBPROC_SET,
955 .p_encode = (kxdrproc_t)rpcb_enc_getaddr, 941 .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
956 .p_decode = (kxdrproc_t)rpcb_dec_set, 942 .p_decode = (kxdrdproc_t)rpcb_dec_set,
957 .p_arglen = RPCB_getaddrargs_sz, 943 .p_arglen = RPCB_getaddrargs_sz,
958 .p_replen = RPCB_setres_sz, 944 .p_replen = RPCB_setres_sz,
959 .p_statidx = RPCBPROC_SET, 945 .p_statidx = RPCBPROC_SET,
@@ -962,8 +948,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
962 }, 948 },
963 [RPCBPROC_UNSET] = { 949 [RPCBPROC_UNSET] = {
964 .p_proc = RPCBPROC_UNSET, 950 .p_proc = RPCBPROC_UNSET,
965 .p_encode = (kxdrproc_t)rpcb_enc_getaddr, 951 .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
966 .p_decode = (kxdrproc_t)rpcb_dec_set, 952 .p_decode = (kxdrdproc_t)rpcb_dec_set,
967 .p_arglen = RPCB_getaddrargs_sz, 953 .p_arglen = RPCB_getaddrargs_sz,
968 .p_replen = RPCB_setres_sz, 954 .p_replen = RPCB_setres_sz,
969 .p_statidx = RPCBPROC_UNSET, 955 .p_statidx = RPCBPROC_UNSET,
@@ -972,8 +958,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
972 }, 958 },
973 [RPCBPROC_GETADDR] = { 959 [RPCBPROC_GETADDR] = {
974 .p_proc = RPCBPROC_GETADDR, 960 .p_proc = RPCBPROC_GETADDR,
975 .p_encode = (kxdrproc_t)rpcb_enc_getaddr, 961 .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
976 .p_decode = (kxdrproc_t)rpcb_dec_getaddr, 962 .p_decode = (kxdrdproc_t)rpcb_dec_getaddr,
977 .p_arglen = RPCB_getaddrargs_sz, 963 .p_arglen = RPCB_getaddrargs_sz,
978 .p_replen = RPCB_getaddrres_sz, 964 .p_replen = RPCB_getaddrres_sz,
979 .p_statidx = RPCBPROC_GETADDR, 965 .p_statidx = RPCBPROC_GETADDR,
@@ -985,8 +971,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
985static struct rpc_procinfo rpcb_procedures4[] = { 971static struct rpc_procinfo rpcb_procedures4[] = {
986 [RPCBPROC_SET] = { 972 [RPCBPROC_SET] = {
987 .p_proc = RPCBPROC_SET, 973 .p_proc = RPCBPROC_SET,
988 .p_encode = (kxdrproc_t)rpcb_enc_getaddr, 974 .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
989 .p_decode = (kxdrproc_t)rpcb_dec_set, 975 .p_decode = (kxdrdproc_t)rpcb_dec_set,
990 .p_arglen = RPCB_getaddrargs_sz, 976 .p_arglen = RPCB_getaddrargs_sz,
991 .p_replen = RPCB_setres_sz, 977 .p_replen = RPCB_setres_sz,
992 .p_statidx = RPCBPROC_SET, 978 .p_statidx = RPCBPROC_SET,
@@ -995,8 +981,8 @@ static struct rpc_procinfo rpcb_procedures4[] = {
995 }, 981 },
996 [RPCBPROC_UNSET] = { 982 [RPCBPROC_UNSET] = {
997 .p_proc = RPCBPROC_UNSET, 983 .p_proc = RPCBPROC_UNSET,
998 .p_encode = (kxdrproc_t)rpcb_enc_getaddr, 984 .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
999 .p_decode = (kxdrproc_t)rpcb_dec_set, 985 .p_decode = (kxdrdproc_t)rpcb_dec_set,
1000 .p_arglen = RPCB_getaddrargs_sz, 986 .p_arglen = RPCB_getaddrargs_sz,
1001 .p_replen = RPCB_setres_sz, 987 .p_replen = RPCB_setres_sz,
1002 .p_statidx = RPCBPROC_UNSET, 988 .p_statidx = RPCBPROC_UNSET,
@@ -1005,8 +991,8 @@ static struct rpc_procinfo rpcb_procedures4[] = {
1005 }, 991 },
1006 [RPCBPROC_GETADDR] = { 992 [RPCBPROC_GETADDR] = {
1007 .p_proc = RPCBPROC_GETADDR, 993 .p_proc = RPCBPROC_GETADDR,
1008 .p_encode = (kxdrproc_t)rpcb_enc_getaddr, 994 .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
1009 .p_decode = (kxdrproc_t)rpcb_dec_getaddr, 995 .p_decode = (kxdrdproc_t)rpcb_dec_getaddr,
1010 .p_arglen = RPCB_getaddrargs_sz, 996 .p_arglen = RPCB_getaddrargs_sz,
1011 .p_replen = RPCB_getaddrres_sz, 997 .p_replen = RPCB_getaddrres_sz,
1012 .p_statidx = RPCBPROC_GETADDR, 998 .p_statidx = RPCBPROC_GETADDR,
@@ -1041,19 +1027,19 @@ static struct rpcb_info rpcb_next_version6[] = {
1041 1027
1042static struct rpc_version rpcb_version2 = { 1028static struct rpc_version rpcb_version2 = {
1043 .number = RPCBVERS_2, 1029 .number = RPCBVERS_2,
1044 .nrprocs = RPCB_HIGHPROC_2, 1030 .nrprocs = ARRAY_SIZE(rpcb_procedures2),
1045 .procs = rpcb_procedures2 1031 .procs = rpcb_procedures2
1046}; 1032};
1047 1033
1048static struct rpc_version rpcb_version3 = { 1034static struct rpc_version rpcb_version3 = {
1049 .number = RPCBVERS_3, 1035 .number = RPCBVERS_3,
1050 .nrprocs = RPCB_HIGHPROC_3, 1036 .nrprocs = ARRAY_SIZE(rpcb_procedures3),
1051 .procs = rpcb_procedures3 1037 .procs = rpcb_procedures3
1052}; 1038};
1053 1039
1054static struct rpc_version rpcb_version4 = { 1040static struct rpc_version rpcb_version4 = {
1055 .number = RPCBVERS_4, 1041 .number = RPCBVERS_4,
1056 .nrprocs = RPCB_HIGHPROC_4, 1042 .nrprocs = ARRAY_SIZE(rpcb_procedures4),
1057 .procs = rpcb_procedures4 1043 .procs = rpcb_procedures4
1058}; 1044};
1059 1045
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cace6049e4a5..4814e246a874 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task)
252 252
253/* 253/*
254 * Mark an RPC call as having completed by clearing the 'active' bit 254 * Mark an RPC call as having completed by clearing the 'active' bit
255 * and then waking up all tasks that were sleeping.
255 */ 256 */
256static void rpc_mark_complete_task(struct rpc_task *task) 257static int rpc_complete_task(struct rpc_task *task)
257{ 258{
258 smp_mb__before_clear_bit(); 259 void *m = &task->tk_runstate;
260 wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
261 struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
262 unsigned long flags;
263 int ret;
264
265 spin_lock_irqsave(&wq->lock, flags);
259 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); 266 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
260 smp_mb__after_clear_bit(); 267 ret = atomic_dec_and_test(&task->tk_count);
261 wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); 268 if (waitqueue_active(wq))
269 __wake_up_locked_key(wq, TASK_NORMAL, &k);
270 spin_unlock_irqrestore(&wq->lock, flags);
271 return ret;
262} 272}
263 273
264/* 274/*
265 * Allow callers to wait for completion of an RPC call 275 * Allow callers to wait for completion of an RPC call
276 *
277 * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
278 * to enforce taking of the wq->lock and hence avoid races with
279 * rpc_complete_task().
266 */ 280 */
267int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) 281int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
268{ 282{
269 if (action == NULL) 283 if (action == NULL)
270 action = rpc_wait_bit_killable; 284 action = rpc_wait_bit_killable;
271 return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, 285 return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
272 action, TASK_KILLABLE); 286 action, TASK_KILLABLE);
273} 287}
274EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); 288EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
@@ -285,15 +299,8 @@ static void rpc_make_runnable(struct rpc_task *task)
285 if (rpc_test_and_set_running(task)) 299 if (rpc_test_and_set_running(task))
286 return; 300 return;
287 if (RPC_IS_ASYNC(task)) { 301 if (RPC_IS_ASYNC(task)) {
288 int status;
289
290 INIT_WORK(&task->u.tk_work, rpc_async_schedule); 302 INIT_WORK(&task->u.tk_work, rpc_async_schedule);
291 status = queue_work(rpciod_workqueue, &task->u.tk_work); 303 queue_work(rpciod_workqueue, &task->u.tk_work);
292 if (status < 0) {
293 printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
294 task->tk_status = status;
295 return;
296 }
297 } else 304 } else
298 wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); 305 wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
299} 306}
@@ -376,7 +383,7 @@ int rpc_queue_empty(struct rpc_wait_queue *queue)
376 spin_lock_bh(&queue->lock); 383 spin_lock_bh(&queue->lock);
377 res = queue->qlen; 384 res = queue->qlen;
378 spin_unlock_bh(&queue->lock); 385 spin_unlock_bh(&queue->lock);
379 return (res == 0); 386 return res == 0;
380} 387}
381EXPORT_SYMBOL_GPL(rpc_queue_empty); 388EXPORT_SYMBOL_GPL(rpc_queue_empty);
382 389
@@ -609,32 +616,25 @@ static void __rpc_execute(struct rpc_task *task)
609 BUG_ON(RPC_IS_QUEUED(task)); 616 BUG_ON(RPC_IS_QUEUED(task));
610 617
611 for (;;) { 618 for (;;) {
619 void (*do_action)(struct rpc_task *);
612 620
613 /* 621 /*
614 * Execute any pending callback. 622 * Execute any pending callback first.
615 */ 623 */
616 if (task->tk_callback) { 624 do_action = task->tk_callback;
617 void (*save_callback)(struct rpc_task *); 625 task->tk_callback = NULL;
618 626 if (do_action == NULL) {
619 /* 627 /*
620 * We set tk_callback to NULL before calling it, 628 * Perform the next FSM step.
621 * in case it sets the tk_callback field itself: 629 * tk_action may be NULL if the task has been killed.
630 * In particular, note that rpc_killall_tasks may
631 * do this at any time, so beware when dereferencing.
622 */ 632 */
623 save_callback = task->tk_callback; 633 do_action = task->tk_action;
624 task->tk_callback = NULL; 634 if (do_action == NULL)
625 save_callback(task);
626 }
627
628 /*
629 * Perform the next FSM step.
630 * tk_action may be NULL when the task has been killed
631 * by someone else.
632 */
633 if (!RPC_IS_QUEUED(task)) {
634 if (task->tk_action == NULL)
635 break; 635 break;
636 task->tk_action(task);
637 } 636 }
637 do_action(task);
638 638
639 /* 639 /*
640 * Lockless check for whether task is sleeping or not. 640 * Lockless check for whether task is sleeping or not.
@@ -787,6 +787,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
787 /* Initialize retry counters */ 787 /* Initialize retry counters */
788 task->tk_garb_retry = 2; 788 task->tk_garb_retry = 2;
789 task->tk_cred_retry = 2; 789 task->tk_cred_retry = 2;
790 task->tk_rebind_retry = 2;
790 791
791 task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; 792 task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
792 task->tk_owner = current->tgid; 793 task->tk_owner = current->tgid;
@@ -829,12 +830,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
829 } 830 }
830 831
831 rpc_init_task(task, setup_data); 832 rpc_init_task(task, setup_data);
832 if (task->tk_status < 0) {
833 int err = task->tk_status;
834 rpc_put_task(task);
835 return ERR_PTR(err);
836 }
837
838 task->tk_flags |= flags; 833 task->tk_flags |= flags;
839 dprintk("RPC: allocated task %p\n", task); 834 dprintk("RPC: allocated task %p\n", task);
840 return task; 835 return task;
@@ -857,34 +852,69 @@ static void rpc_async_release(struct work_struct *work)
857 rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); 852 rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
858} 853}
859 854
860void rpc_put_task(struct rpc_task *task) 855static void rpc_release_resources_task(struct rpc_task *task)
861{ 856{
862 if (!atomic_dec_and_test(&task->tk_count))
863 return;
864 /* Release resources */
865 if (task->tk_rqstp) 857 if (task->tk_rqstp)
866 xprt_release(task); 858 xprt_release(task);
867 if (task->tk_msg.rpc_cred) 859 if (task->tk_msg.rpc_cred) {
868 put_rpccred(task->tk_msg.rpc_cred); 860 put_rpccred(task->tk_msg.rpc_cred);
861 task->tk_msg.rpc_cred = NULL;
862 }
869 rpc_task_release_client(task); 863 rpc_task_release_client(task);
870 if (task->tk_workqueue != NULL) { 864}
865
866static void rpc_final_put_task(struct rpc_task *task,
867 struct workqueue_struct *q)
868{
869 if (q != NULL) {
871 INIT_WORK(&task->u.tk_work, rpc_async_release); 870 INIT_WORK(&task->u.tk_work, rpc_async_release);
872 queue_work(task->tk_workqueue, &task->u.tk_work); 871 queue_work(q, &task->u.tk_work);
873 } else 872 } else
874 rpc_free_task(task); 873 rpc_free_task(task);
875} 874}
875
876static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
877{
878 if (atomic_dec_and_test(&task->tk_count)) {
879 rpc_release_resources_task(task);
880 rpc_final_put_task(task, q);
881 }
882}
883
884void rpc_put_task(struct rpc_task *task)
885{
886 rpc_do_put_task(task, NULL);
887}
876EXPORT_SYMBOL_GPL(rpc_put_task); 888EXPORT_SYMBOL_GPL(rpc_put_task);
877 889
890void rpc_put_task_async(struct rpc_task *task)
891{
892 rpc_do_put_task(task, task->tk_workqueue);
893}
894EXPORT_SYMBOL_GPL(rpc_put_task_async);
895
878static void rpc_release_task(struct rpc_task *task) 896static void rpc_release_task(struct rpc_task *task)
879{ 897{
880 dprintk("RPC: %5u release task\n", task->tk_pid); 898 dprintk("RPC: %5u release task\n", task->tk_pid);
881 899
882 BUG_ON (RPC_IS_QUEUED(task)); 900 BUG_ON (RPC_IS_QUEUED(task));
883 901
884 /* Wake up anyone who is waiting for task completion */ 902 rpc_release_resources_task(task);
885 rpc_mark_complete_task(task);
886 903
887 rpc_put_task(task); 904 /*
905 * Note: at this point we have been removed from rpc_clnt->cl_tasks,
906 * so it should be safe to use task->tk_count as a test for whether
907 * or not any other processes still hold references to our rpc_task.
908 */
909 if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
910 /* Wake up anyone who may be waiting for task completion */
911 if (!rpc_complete_task(task))
912 return;
913 } else {
914 if (!atomic_dec_and_test(&task->tk_count))
915 return;
916 }
917 rpc_final_put_task(task, task->tk_workqueue);
888} 918}
889 919
890int rpciod_up(void) 920int rpciod_up(void)
@@ -908,7 +938,7 @@ static int rpciod_start(void)
908 * Create the rpciod thread and wait for it to start. 938 * Create the rpciod thread and wait for it to start.
909 */ 939 */
910 dprintk("RPC: creating workqueue rpciod\n"); 940 dprintk("RPC: creating workqueue rpciod\n");
911 wq = create_workqueue("rpciod"); 941 wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0);
912 rpciod_workqueue = wq; 942 rpciod_workqueue = wq;
913 return rpciod_workqueue != NULL; 943 return rpciod_workqueue != NULL;
914} 944}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index ea1046f3f9a3..80df89d957ba 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -22,11 +22,10 @@
22#include <linux/sunrpc/clnt.h> 22#include <linux/sunrpc/clnt.h>
23#include <linux/sunrpc/svcsock.h> 23#include <linux/sunrpc/svcsock.h>
24#include <linux/sunrpc/metrics.h> 24#include <linux/sunrpc/metrics.h>
25#include <net/net_namespace.h>
26 25
27#define RPCDBG_FACILITY RPCDBG_MISC 26#include "netns.h"
28 27
29struct proc_dir_entry *proc_net_rpc = NULL; 28#define RPCDBG_FACILITY RPCDBG_MISC
30 29
31/* 30/*
32 * Get RPC client stats 31 * Get RPC client stats
@@ -116,9 +115,7 @@ EXPORT_SYMBOL_GPL(svc_seq_show);
116 */ 115 */
117struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) 116struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
118{ 117{
119 struct rpc_iostats *new; 118 return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
120 new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
121 return new;
122} 119}
123EXPORT_SYMBOL_GPL(rpc_alloc_iostats); 120EXPORT_SYMBOL_GPL(rpc_alloc_iostats);
124 121
@@ -218,10 +215,11 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats);
218static inline struct proc_dir_entry * 215static inline struct proc_dir_entry *
219do_register(const char *name, void *data, const struct file_operations *fops) 216do_register(const char *name, void *data, const struct file_operations *fops)
220{ 217{
221 rpc_proc_init(); 218 struct sunrpc_net *sn;
222 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
223 219
224 return proc_create_data(name, 0, proc_net_rpc, fops, data); 220 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
221 sn = net_generic(&init_net, sunrpc_net_id);
222 return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
225} 223}
226 224
227struct proc_dir_entry * 225struct proc_dir_entry *
@@ -234,7 +232,10 @@ EXPORT_SYMBOL_GPL(rpc_proc_register);
234void 232void
235rpc_proc_unregister(const char *name) 233rpc_proc_unregister(const char *name)
236{ 234{
237 remove_proc_entry(name, proc_net_rpc); 235 struct sunrpc_net *sn;
236
237 sn = net_generic(&init_net, sunrpc_net_id);
238 remove_proc_entry(name, sn->proc_net_rpc);
238} 239}
239EXPORT_SYMBOL_GPL(rpc_proc_unregister); 240EXPORT_SYMBOL_GPL(rpc_proc_unregister);
240 241
@@ -248,25 +249,29 @@ EXPORT_SYMBOL_GPL(svc_proc_register);
248void 249void
249svc_proc_unregister(const char *name) 250svc_proc_unregister(const char *name)
250{ 251{
251 remove_proc_entry(name, proc_net_rpc); 252 struct sunrpc_net *sn;
253
254 sn = net_generic(&init_net, sunrpc_net_id);
255 remove_proc_entry(name, sn->proc_net_rpc);
252} 256}
253EXPORT_SYMBOL_GPL(svc_proc_unregister); 257EXPORT_SYMBOL_GPL(svc_proc_unregister);
254 258
255void 259int rpc_proc_init(struct net *net)
256rpc_proc_init(void)
257{ 260{
261 struct sunrpc_net *sn;
262
258 dprintk("RPC: registering /proc/net/rpc\n"); 263 dprintk("RPC: registering /proc/net/rpc\n");
259 if (!proc_net_rpc) 264 sn = net_generic(net, sunrpc_net_id);
260 proc_net_rpc = proc_mkdir("rpc", init_net.proc_net); 265 sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
266 if (sn->proc_net_rpc == NULL)
267 return -ENOMEM;
268
269 return 0;
261} 270}
262 271
263void 272void rpc_proc_exit(struct net *net)
264rpc_proc_exit(void)
265{ 273{
266 dprintk("RPC: unregistering /proc/net/rpc\n"); 274 dprintk("RPC: unregistering /proc/net/rpc\n");
267 if (proc_net_rpc) { 275 remove_proc_entry("rpc", net->proc_net);
268 proc_net_rpc = NULL;
269 remove_proc_entry("rpc", init_net.proc_net);
270 }
271} 276}
272 277
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c0d085013a2b..9d0809160994 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -22,7 +22,44 @@
22#include <linux/sunrpc/rpc_pipe_fs.h> 22#include <linux/sunrpc/rpc_pipe_fs.h>
23#include <linux/sunrpc/xprtsock.h> 23#include <linux/sunrpc/xprtsock.h>
24 24
25extern struct cache_detail ip_map_cache, unix_gid_cache; 25#include "netns.h"
26
27int sunrpc_net_id;
28
29static __net_init int sunrpc_init_net(struct net *net)
30{
31 int err;
32
33 err = rpc_proc_init(net);
34 if (err)
35 goto err_proc;
36
37 err = ip_map_cache_create(net);
38 if (err)
39 goto err_ipmap;
40
41 return 0;
42
43err_ipmap:
44 rpc_proc_exit(net);
45err_proc:
46 return err;
47}
48
49static __net_exit void sunrpc_exit_net(struct net *net)
50{
51 ip_map_cache_destroy(net);
52 rpc_proc_exit(net);
53}
54
55static struct pernet_operations sunrpc_net_ops = {
56 .init = sunrpc_init_net,
57 .exit = sunrpc_exit_net,
58 .id = &sunrpc_net_id,
59 .size = sizeof(struct sunrpc_net),
60};
61
62extern struct cache_detail unix_gid_cache;
26 63
27extern void cleanup_rpcb_clnt(void); 64extern void cleanup_rpcb_clnt(void);
28 65
@@ -38,18 +75,22 @@ init_sunrpc(void)
38 err = rpcauth_init_module(); 75 err = rpcauth_init_module();
39 if (err) 76 if (err)
40 goto out3; 77 goto out3;
78
79 cache_initialize();
80
81 err = register_pernet_subsys(&sunrpc_net_ops);
82 if (err)
83 goto out4;
41#ifdef RPC_DEBUG 84#ifdef RPC_DEBUG
42 rpc_register_sysctl(); 85 rpc_register_sysctl();
43#endif 86#endif
44#ifdef CONFIG_PROC_FS
45 rpc_proc_init();
46#endif
47 cache_initialize();
48 cache_register(&ip_map_cache);
49 cache_register(&unix_gid_cache); 87 cache_register(&unix_gid_cache);
50 svc_init_xprt_sock(); /* svc sock transport */ 88 svc_init_xprt_sock(); /* svc sock transport */
51 init_socket_xprt(); /* clnt sock transport */ 89 init_socket_xprt(); /* clnt sock transport */
52 return 0; 90 return 0;
91
92out4:
93 rpcauth_remove_module();
53out3: 94out3:
54 rpc_destroy_mempool(); 95 rpc_destroy_mempool();
55out2: 96out2:
@@ -67,14 +108,11 @@ cleanup_sunrpc(void)
67 svc_cleanup_xprt_sock(); 108 svc_cleanup_xprt_sock();
68 unregister_rpc_pipefs(); 109 unregister_rpc_pipefs();
69 rpc_destroy_mempool(); 110 rpc_destroy_mempool();
70 cache_unregister(&ip_map_cache);
71 cache_unregister(&unix_gid_cache); 111 cache_unregister(&unix_gid_cache);
112 unregister_pernet_subsys(&sunrpc_net_ops);
72#ifdef RPC_DEBUG 113#ifdef RPC_DEBUG
73 rpc_unregister_sysctl(); 114 rpc_unregister_sysctl();
74#endif 115#endif
75#ifdef CONFIG_PROC_FS
76 rpc_proc_exit();
77#endif
78 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 116 rcu_barrier(); /* Wait for completion of call_rcu()'s */
79} 117}
80MODULE_LICENSE("GPL"); 118MODULE_LICENSE("GPL");
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d9017d64597e..2b90292e9505 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -488,10 +488,6 @@ svc_destroy(struct svc_serv *serv)
488 if (svc_serv_is_pooled(serv)) 488 if (svc_serv_is_pooled(serv))
489 svc_pool_map_put(); 489 svc_pool_map_put();
490 490
491#if defined(CONFIG_NFS_V4_1)
492 svc_sock_destroy(serv->bc_xprt);
493#endif /* CONFIG_NFS_V4_1 */
494
495 svc_unregister(serv); 491 svc_unregister(serv);
496 kfree(serv->sv_pools); 492 kfree(serv->sv_pools);
497 kfree(serv); 493 kfree(serv);
@@ -946,6 +942,8 @@ static void svc_unregister(const struct svc_serv *serv)
946 if (progp->pg_vers[i]->vs_hidden) 942 if (progp->pg_vers[i]->vs_hidden)
947 continue; 943 continue;
948 944
945 dprintk("svc: attempting to unregister %sv%u\n",
946 progp->pg_name, i);
949 __svc_unregister(progp->pg_prog, i, progp->pg_name); 947 __svc_unregister(progp->pg_prog, i, progp->pg_name);
950 } 948 }
951 } 949 }
@@ -1005,6 +1003,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1005 rqstp->rq_splice_ok = 1; 1003 rqstp->rq_splice_ok = 1;
1006 /* Will be turned off only when NFSv4 Sessions are used */ 1004 /* Will be turned off only when NFSv4 Sessions are used */
1007 rqstp->rq_usedeferral = 1; 1005 rqstp->rq_usedeferral = 1;
1006 rqstp->rq_dropme = false;
1008 1007
1009 /* Setup reply header */ 1008 /* Setup reply header */
1010 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); 1009 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
@@ -1055,6 +1054,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1055 goto err_bad; 1054 goto err_bad;
1056 case SVC_DENIED: 1055 case SVC_DENIED:
1057 goto err_bad_auth; 1056 goto err_bad_auth;
1057 case SVC_CLOSE:
1058 if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
1059 svc_close_xprt(rqstp->rq_xprt);
1058 case SVC_DROP: 1060 case SVC_DROP:
1059 goto dropit; 1061 goto dropit;
1060 case SVC_COMPLETE: 1062 case SVC_COMPLETE:
@@ -1103,7 +1105,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1103 *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); 1105 *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
1104 1106
1105 /* Encode reply */ 1107 /* Encode reply */
1106 if (*statp == rpc_drop_reply) { 1108 if (rqstp->rq_dropme) {
1107 if (procp->pc_release) 1109 if (procp->pc_release)
1108 procp->pc_release(rqstp, NULL, rqstp->rq_resp); 1110 procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1109 goto dropit; 1111 goto dropit;
@@ -1144,7 +1146,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1144 dropit: 1146 dropit:
1145 svc_authorise(rqstp); /* doesn't hurt to call this twice */ 1147 svc_authorise(rqstp); /* doesn't hurt to call this twice */
1146 dprintk("svc: svc_process dropit\n"); 1148 dprintk("svc: svc_process dropit\n");
1147 svc_drop(rqstp);
1148 return 0; 1149 return 0;
1149 1150
1150err_short_len: 1151err_short_len:
@@ -1215,7 +1216,6 @@ svc_process(struct svc_rqst *rqstp)
1215 struct kvec *resv = &rqstp->rq_res.head[0]; 1216 struct kvec *resv = &rqstp->rq_res.head[0];
1216 struct svc_serv *serv = rqstp->rq_server; 1217 struct svc_serv *serv = rqstp->rq_server;
1217 u32 dir; 1218 u32 dir;
1218 int error;
1219 1219
1220 /* 1220 /*
1221 * Setup response xdr_buf. 1221 * Setup response xdr_buf.
@@ -1243,11 +1243,13 @@ svc_process(struct svc_rqst *rqstp)
1243 return 0; 1243 return 0;
1244 } 1244 }
1245 1245
1246 error = svc_process_common(rqstp, argv, resv); 1246 /* Returns 1 for send, 0 for drop */
1247 if (error <= 0) 1247 if (svc_process_common(rqstp, argv, resv))
1248 return error; 1248 return svc_send(rqstp);
1249 1249 else {
1250 return svc_send(rqstp); 1250 svc_drop(rqstp);
1251 return 0;
1252 }
1251} 1253}
1252 1254
1253#if defined(CONFIG_NFS_V4_1) 1255#if defined(CONFIG_NFS_V4_1)
@@ -1261,10 +1263,9 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1261{ 1263{
1262 struct kvec *argv = &rqstp->rq_arg.head[0]; 1264 struct kvec *argv = &rqstp->rq_arg.head[0];
1263 struct kvec *resv = &rqstp->rq_res.head[0]; 1265 struct kvec *resv = &rqstp->rq_res.head[0];
1264 int error;
1265 1266
1266 /* Build the svc_rqst used by the common processing routine */ 1267 /* Build the svc_rqst used by the common processing routine */
1267 rqstp->rq_xprt = serv->bc_xprt; 1268 rqstp->rq_xprt = serv->sv_bc_xprt;
1268 rqstp->rq_xid = req->rq_xid; 1269 rqstp->rq_xid = req->rq_xid;
1269 rqstp->rq_prot = req->rq_xprt->prot; 1270 rqstp->rq_prot = req->rq_xprt->prot;
1270 rqstp->rq_server = serv; 1271 rqstp->rq_server = serv;
@@ -1289,12 +1290,15 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1289 svc_getu32(argv); /* XID */ 1290 svc_getu32(argv); /* XID */
1290 svc_getnl(argv); /* CALLDIR */ 1291 svc_getnl(argv); /* CALLDIR */
1291 1292
1292 error = svc_process_common(rqstp, argv, resv); 1293 /* Returns 1 for send, 0 for drop */
1293 if (error <= 0) 1294 if (svc_process_common(rqstp, argv, resv)) {
1294 return error; 1295 memcpy(&req->rq_snd_buf, &rqstp->rq_res,
1295 1296 sizeof(req->rq_snd_buf));
1296 memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); 1297 return bc_send(req);
1297 return bc_send(req); 1298 } else {
1299 /* Nothing to do to drop request */
1300 return 0;
1301 }
1298} 1302}
1299EXPORT_SYMBOL(bc_svc_process); 1303EXPORT_SYMBOL(bc_svc_process);
1300#endif /* CONFIG_NFS_V4_1 */ 1304#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index cbc084939dd8..ab86b7927f84 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -5,7 +5,6 @@
5 */ 5 */
6 6
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/smp_lock.h>
9#include <linux/errno.h> 8#include <linux/errno.h>
10#include <linux/freezer.h> 9#include <linux/freezer.h>
11#include <linux/kthread.h> 10#include <linux/kthread.h>
@@ -14,6 +13,7 @@
14#include <linux/sunrpc/stats.h> 13#include <linux/sunrpc/stats.h>
15#include <linux/sunrpc/svc_xprt.h> 14#include <linux/sunrpc/svc_xprt.h>
16#include <linux/sunrpc/svcsock.h> 15#include <linux/sunrpc/svcsock.h>
16#include <linux/sunrpc/xprt.h>
17 17
18#define RPCDBG_FACILITY RPCDBG_SVCXPRT 18#define RPCDBG_FACILITY RPCDBG_SVCXPRT
19 19
@@ -100,16 +100,14 @@ EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
100 */ 100 */
101int svc_print_xprts(char *buf, int maxlen) 101int svc_print_xprts(char *buf, int maxlen)
102{ 102{
103 struct list_head *le; 103 struct svc_xprt_class *xcl;
104 char tmpstr[80]; 104 char tmpstr[80];
105 int len = 0; 105 int len = 0;
106 buf[0] = '\0'; 106 buf[0] = '\0';
107 107
108 spin_lock(&svc_xprt_class_lock); 108 spin_lock(&svc_xprt_class_lock);
109 list_for_each(le, &svc_xprt_class_list) { 109 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
110 int slen; 110 int slen;
111 struct svc_xprt_class *xcl =
112 list_entry(le, struct svc_xprt_class, xcl_list);
113 111
114 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); 112 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
115 slen = strlen(tmpstr); 113 slen = strlen(tmpstr);
@@ -128,9 +126,12 @@ static void svc_xprt_free(struct kref *kref)
128 struct svc_xprt *xprt = 126 struct svc_xprt *xprt =
129 container_of(kref, struct svc_xprt, xpt_ref); 127 container_of(kref, struct svc_xprt, xpt_ref);
130 struct module *owner = xprt->xpt_class->xcl_owner; 128 struct module *owner = xprt->xpt_class->xcl_owner;
131 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) && 129 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
132 xprt->xpt_auth_cache != NULL) 130 svcauth_unix_info_release(xprt);
133 svcauth_unix_info_release(xprt->xpt_auth_cache); 131 put_net(xprt->xpt_net);
132 /* See comment on corresponding get in xs_setup_bc_tcp(): */
133 if (xprt->xpt_bc_xprt)
134 xprt_put(xprt->xpt_bc_xprt);
134 xprt->xpt_ops->xpo_free(xprt); 135 xprt->xpt_ops->xpo_free(xprt);
135 module_put(owner); 136 module_put(owner);
136} 137}
@@ -156,15 +157,18 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
156 INIT_LIST_HEAD(&xprt->xpt_list); 157 INIT_LIST_HEAD(&xprt->xpt_list);
157 INIT_LIST_HEAD(&xprt->xpt_ready); 158 INIT_LIST_HEAD(&xprt->xpt_ready);
158 INIT_LIST_HEAD(&xprt->xpt_deferred); 159 INIT_LIST_HEAD(&xprt->xpt_deferred);
160 INIT_LIST_HEAD(&xprt->xpt_users);
159 mutex_init(&xprt->xpt_mutex); 161 mutex_init(&xprt->xpt_mutex);
160 spin_lock_init(&xprt->xpt_lock); 162 spin_lock_init(&xprt->xpt_lock);
161 set_bit(XPT_BUSY, &xprt->xpt_flags); 163 set_bit(XPT_BUSY, &xprt->xpt_flags);
162 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); 164 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
165 xprt->xpt_net = get_net(&init_net);
163} 166}
164EXPORT_SYMBOL_GPL(svc_xprt_init); 167EXPORT_SYMBOL_GPL(svc_xprt_init);
165 168
166static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, 169static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
167 struct svc_serv *serv, 170 struct svc_serv *serv,
171 struct net *net,
168 const int family, 172 const int family,
169 const unsigned short port, 173 const unsigned short port,
170 int flags) 174 int flags)
@@ -199,12 +203,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
199 return ERR_PTR(-EAFNOSUPPORT); 203 return ERR_PTR(-EAFNOSUPPORT);
200 } 204 }
201 205
202 return xcl->xcl_ops->xpo_create(serv, sap, len, flags); 206 return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
203} 207}
204 208
205int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, 209int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
206 const int family, const unsigned short port, 210 struct net *net, const int family,
207 int flags) 211 const unsigned short port, int flags)
208{ 212{
209 struct svc_xprt_class *xcl; 213 struct svc_xprt_class *xcl;
210 214
@@ -212,6 +216,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
212 spin_lock(&svc_xprt_class_lock); 216 spin_lock(&svc_xprt_class_lock);
213 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { 217 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
214 struct svc_xprt *newxprt; 218 struct svc_xprt *newxprt;
219 unsigned short newport;
215 220
216 if (strcmp(xprt_name, xcl->xcl_name)) 221 if (strcmp(xprt_name, xcl->xcl_name))
217 continue; 222 continue;
@@ -220,7 +225,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
220 goto err; 225 goto err;
221 226
222 spin_unlock(&svc_xprt_class_lock); 227 spin_unlock(&svc_xprt_class_lock);
223 newxprt = __svc_xpo_create(xcl, serv, family, port, flags); 228 newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
224 if (IS_ERR(newxprt)) { 229 if (IS_ERR(newxprt)) {
225 module_put(xcl->xcl_owner); 230 module_put(xcl->xcl_owner);
226 return PTR_ERR(newxprt); 231 return PTR_ERR(newxprt);
@@ -230,8 +235,9 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
230 spin_lock_bh(&serv->sv_lock); 235 spin_lock_bh(&serv->sv_lock);
231 list_add(&newxprt->xpt_list, &serv->sv_permsocks); 236 list_add(&newxprt->xpt_list, &serv->sv_permsocks);
232 spin_unlock_bh(&serv->sv_lock); 237 spin_unlock_bh(&serv->sv_lock);
238 newport = svc_xprt_local_port(newxprt);
233 clear_bit(XPT_BUSY, &newxprt->xpt_flags); 239 clear_bit(XPT_BUSY, &newxprt->xpt_flags);
234 return svc_xprt_local_port(newxprt); 240 return newport;
235 } 241 }
236 err: 242 err:
237 spin_unlock(&svc_xprt_class_lock); 243 spin_unlock(&svc_xprt_class_lock);
@@ -301,6 +307,15 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
301 list_del(&rqstp->rq_list); 307 list_del(&rqstp->rq_list);
302} 308}
303 309
310static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
311{
312 if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
313 return true;
314 if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED)))
315 return xprt->xpt_ops->xpo_has_wspace(xprt);
316 return false;
317}
318
304/* 319/*
305 * Queue up a transport with data pending. If there are idle nfsd 320 * Queue up a transport with data pending. If there are idle nfsd
306 * processes, wake 'em up. 321 * processes, wake 'em up.
@@ -313,8 +328,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
313 struct svc_rqst *rqstp; 328 struct svc_rqst *rqstp;
314 int cpu; 329 int cpu;
315 330
316 if (!(xprt->xpt_flags & 331 if (!svc_xprt_has_something_to_do(xprt))
317 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
318 return; 332 return;
319 333
320 cpu = get_cpu(); 334 cpu = get_cpu();
@@ -329,12 +343,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
329 "svc_xprt_enqueue: " 343 "svc_xprt_enqueue: "
330 "threads and transports both waiting??\n"); 344 "threads and transports both waiting??\n");
331 345
332 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
333 /* Don't enqueue dead transports */
334 dprintk("svc: transport %p is dead, not enqueued\n", xprt);
335 goto out_unlock;
336 }
337
338 pool->sp_stats.packets++; 346 pool->sp_stats.packets++;
339 347
340 /* Mark transport as busy. It will remain in this state until 348 /* Mark transport as busy. It will remain in this state until
@@ -347,28 +355,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
347 dprintk("svc: transport %p busy, not enqueued\n", xprt); 355 dprintk("svc: transport %p busy, not enqueued\n", xprt);
348 goto out_unlock; 356 goto out_unlock;
349 } 357 }
350 BUG_ON(xprt->xpt_pool != NULL);
351 xprt->xpt_pool = pool;
352
353 /* Handle pending connection */
354 if (test_bit(XPT_CONN, &xprt->xpt_flags))
355 goto process;
356
357 /* Handle close in-progress */
358 if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
359 goto process;
360
361 /* Check if we have space to reply to a request */
362 if (!xprt->xpt_ops->xpo_has_wspace(xprt)) {
363 /* Don't enqueue while not enough space for reply */
364 dprintk("svc: no write space, transport %p not enqueued\n",
365 xprt);
366 xprt->xpt_pool = NULL;
367 clear_bit(XPT_BUSY, &xprt->xpt_flags);
368 goto out_unlock;
369 }
370 358
371 process:
372 if (!list_empty(&pool->sp_threads)) { 359 if (!list_empty(&pool->sp_threads)) {
373 rqstp = list_entry(pool->sp_threads.next, 360 rqstp = list_entry(pool->sp_threads.next,
374 struct svc_rqst, 361 struct svc_rqst,
@@ -385,13 +372,11 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
385 rqstp->rq_reserved = serv->sv_max_mesg; 372 rqstp->rq_reserved = serv->sv_max_mesg;
386 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); 373 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
387 pool->sp_stats.threads_woken++; 374 pool->sp_stats.threads_woken++;
388 BUG_ON(xprt->xpt_pool != pool);
389 wake_up(&rqstp->rq_wait); 375 wake_up(&rqstp->rq_wait);
390 } else { 376 } else {
391 dprintk("svc: transport %p put into queue\n", xprt); 377 dprintk("svc: transport %p put into queue\n", xprt);
392 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); 378 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
393 pool->sp_stats.sockets_queued++; 379 pool->sp_stats.sockets_queued++;
394 BUG_ON(xprt->xpt_pool != pool);
395 } 380 }
396 381
397out_unlock: 382out_unlock:
@@ -430,9 +415,13 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
430void svc_xprt_received(struct svc_xprt *xprt) 415void svc_xprt_received(struct svc_xprt *xprt)
431{ 416{
432 BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); 417 BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags));
433 xprt->xpt_pool = NULL; 418 /* As soon as we clear busy, the xprt could be closed and
419 * 'put', so we need a reference to call svc_xprt_enqueue with:
420 */
421 svc_xprt_get(xprt);
434 clear_bit(XPT_BUSY, &xprt->xpt_flags); 422 clear_bit(XPT_BUSY, &xprt->xpt_flags);
435 svc_xprt_enqueue(xprt); 423 svc_xprt_enqueue(xprt);
424 svc_xprt_put(xprt);
436} 425}
437EXPORT_SYMBOL_GPL(svc_xprt_received); 426EXPORT_SYMBOL_GPL(svc_xprt_received);
438 427
@@ -651,6 +640,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
651 if (signalled() || kthread_should_stop()) 640 if (signalled() || kthread_should_stop())
652 return -EINTR; 641 return -EINTR;
653 642
643 /* Normally we will wait up to 5 seconds for any required
644 * cache information to be provided.
645 */
646 rqstp->rq_chandle.thread_wait = 5*HZ;
647
654 spin_lock_bh(&pool->sp_lock); 648 spin_lock_bh(&pool->sp_lock);
655 xprt = svc_xprt_dequeue(pool); 649 xprt = svc_xprt_dequeue(pool);
656 if (xprt) { 650 if (xprt) {
@@ -658,6 +652,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
658 svc_xprt_get(xprt); 652 svc_xprt_get(xprt);
659 rqstp->rq_reserved = serv->sv_max_mesg; 653 rqstp->rq_reserved = serv->sv_max_mesg;
660 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); 654 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
655
656 /* As there is a shortage of threads and this request
657 * had to be queued, don't allow the thread to wait so
658 * long for cache updates.
659 */
660 rqstp->rq_chandle.thread_wait = 1*HZ;
661 } else { 661 } else {
662 /* No data pending. Go to sleep */ 662 /* No data pending. Go to sleep */
663 svc_thread_enqueue(pool, rqstp); 663 svc_thread_enqueue(pool, rqstp);
@@ -710,7 +710,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
710 if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { 710 if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
711 dprintk("svc_recv: found XPT_CLOSE\n"); 711 dprintk("svc_recv: found XPT_CLOSE\n");
712 svc_delete_xprt(xprt); 712 svc_delete_xprt(xprt);
713 } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { 713 /* Leave XPT_BUSY set on the dead xprt: */
714 goto out;
715 }
716 if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
714 struct svc_xprt *newxpt; 717 struct svc_xprt *newxpt;
715 newxpt = xprt->xpt_ops->xpo_accept(xprt); 718 newxpt = xprt->xpt_ops->xpo_accept(xprt);
716 if (newxpt) { 719 if (newxpt) {
@@ -735,28 +738,23 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
735 spin_unlock_bh(&serv->sv_lock); 738 spin_unlock_bh(&serv->sv_lock);
736 svc_xprt_received(newxpt); 739 svc_xprt_received(newxpt);
737 } 740 }
738 svc_xprt_received(xprt); 741 } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
739 } else {
740 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", 742 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
741 rqstp, pool->sp_id, xprt, 743 rqstp, pool->sp_id, xprt,
742 atomic_read(&xprt->xpt_ref.refcount)); 744 atomic_read(&xprt->xpt_ref.refcount));
743 rqstp->rq_deferred = svc_deferred_dequeue(xprt); 745 rqstp->rq_deferred = svc_deferred_dequeue(xprt);
744 if (rqstp->rq_deferred) { 746 if (rqstp->rq_deferred)
745 svc_xprt_received(xprt);
746 len = svc_deferred_recv(rqstp); 747 len = svc_deferred_recv(rqstp);
747 } else { 748 else
748 len = xprt->xpt_ops->xpo_recvfrom(rqstp); 749 len = xprt->xpt_ops->xpo_recvfrom(rqstp);
749 svc_xprt_received(xprt);
750 }
751 dprintk("svc: got len=%d\n", len); 750 dprintk("svc: got len=%d\n", len);
752 } 751 }
752 svc_xprt_received(xprt);
753 753
754 /* No data, incomplete (TCP) read, or accept() */ 754 /* No data, incomplete (TCP) read, or accept() */
755 if (len == 0 || len == -EAGAIN) { 755 if (len == 0 || len == -EAGAIN)
756 rqstp->rq_res.len = 0; 756 goto out;
757 svc_xprt_release(rqstp); 757
758 return -EAGAIN;
759 }
760 clear_bit(XPT_OLD, &xprt->xpt_flags); 758 clear_bit(XPT_OLD, &xprt->xpt_flags);
761 759
762 rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); 760 rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
@@ -765,6 +763,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
765 if (serv->sv_stats) 763 if (serv->sv_stats)
766 serv->sv_stats->netcnt++; 764 serv->sv_stats->netcnt++;
767 return len; 765 return len;
766out:
767 rqstp->rq_res.len = 0;
768 svc_xprt_release(rqstp);
769 return -EAGAIN;
768} 770}
769EXPORT_SYMBOL_GPL(svc_recv); 771EXPORT_SYMBOL_GPL(svc_recv);
770 772
@@ -868,6 +870,19 @@ static void svc_age_temp_xprts(unsigned long closure)
868 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); 870 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
869} 871}
870 872
873static void call_xpt_users(struct svc_xprt *xprt)
874{
875 struct svc_xpt_user *u;
876
877 spin_lock(&xprt->xpt_lock);
878 while (!list_empty(&xprt->xpt_users)) {
879 u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list);
880 list_del(&u->list);
881 u->callback(u);
882 }
883 spin_unlock(&xprt->xpt_lock);
884}
885
871/* 886/*
872 * Remove a dead transport 887 * Remove a dead transport
873 */ 888 */
@@ -878,7 +893,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
878 893
879 /* Only do this once */ 894 /* Only do this once */
880 if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) 895 if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
881 return; 896 BUG();
882 897
883 dprintk("svc: svc_delete_xprt(%p)\n", xprt); 898 dprintk("svc: svc_delete_xprt(%p)\n", xprt);
884 xprt->xpt_ops->xpo_detach(xprt); 899 xprt->xpt_ops->xpo_detach(xprt);
@@ -900,6 +915,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
900 while ((dr = svc_deferred_dequeue(xprt)) != NULL) 915 while ((dr = svc_deferred_dequeue(xprt)) != NULL)
901 kfree(dr); 916 kfree(dr);
902 917
918 call_xpt_users(xprt);
903 svc_xprt_put(xprt); 919 svc_xprt_put(xprt);
904} 920}
905 921
@@ -909,11 +925,13 @@ void svc_close_xprt(struct svc_xprt *xprt)
909 if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) 925 if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
910 /* someone else will have to effect the close */ 926 /* someone else will have to effect the close */
911 return; 927 return;
912 928 /*
913 svc_xprt_get(xprt); 929 * We expect svc_close_xprt() to work even when no threads are
930 * running (e.g., while configuring the server before starting
931 * any threads), so if the transport isn't busy, we delete
932 * it ourself:
933 */
914 svc_delete_xprt(xprt); 934 svc_delete_xprt(xprt);
915 clear_bit(XPT_BUSY, &xprt->xpt_flags);
916 svc_xprt_put(xprt);
917} 935}
918EXPORT_SYMBOL_GPL(svc_close_xprt); 936EXPORT_SYMBOL_GPL(svc_close_xprt);
919 937
@@ -922,16 +940,16 @@ void svc_close_all(struct list_head *xprt_list)
922 struct svc_xprt *xprt; 940 struct svc_xprt *xprt;
923 struct svc_xprt *tmp; 941 struct svc_xprt *tmp;
924 942
943 /*
944 * The server is shutting down, and no more threads are running.
945 * svc_xprt_enqueue() might still be running, but at worst it
946 * will re-add the xprt to sp_sockets, which will soon get
947 * freed. So we don't bother with any more locking, and don't
948 * leave the close to the (nonexistent) server threads:
949 */
925 list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { 950 list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
926 set_bit(XPT_CLOSE, &xprt->xpt_flags); 951 set_bit(XPT_CLOSE, &xprt->xpt_flags);
927 if (test_bit(XPT_BUSY, &xprt->xpt_flags)) { 952 svc_delete_xprt(xprt);
928 /* Waiting to be processed, but no threads left,
929 * So just remove it from the waiting list
930 */
931 list_del_init(&xprt->xpt_ready);
932 clear_bit(XPT_BUSY, &xprt->xpt_flags);
933 }
934 svc_close_xprt(xprt);
935 } 953 }
936} 954}
937 955
@@ -1005,6 +1023,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
1005 } 1023 }
1006 svc_xprt_get(rqstp->rq_xprt); 1024 svc_xprt_get(rqstp->rq_xprt);
1007 dr->xprt = rqstp->rq_xprt; 1025 dr->xprt = rqstp->rq_xprt;
1026 rqstp->rq_dropme = true;
1008 1027
1009 dr->handle.revisit = svc_revisit; 1028 dr->handle.revisit = svc_revisit;
1010 return &dr->handle; 1029 return &dr->handle;
@@ -1042,14 +1061,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
1042 if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) 1061 if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags))
1043 return NULL; 1062 return NULL;
1044 spin_lock(&xprt->xpt_lock); 1063 spin_lock(&xprt->xpt_lock);
1045 clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
1046 if (!list_empty(&xprt->xpt_deferred)) { 1064 if (!list_empty(&xprt->xpt_deferred)) {
1047 dr = list_entry(xprt->xpt_deferred.next, 1065 dr = list_entry(xprt->xpt_deferred.next,
1048 struct svc_deferred_req, 1066 struct svc_deferred_req,
1049 handle.recent); 1067 handle.recent);
1050 list_del_init(&dr->handle.recent); 1068 list_del_init(&dr->handle.recent);
1051 set_bit(XPT_DEFERRED, &xprt->xpt_flags); 1069 } else
1052 } 1070 clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
1053 spin_unlock(&xprt->xpt_lock); 1071 spin_unlock(&xprt->xpt_lock);
1054 return dr; 1072 return dr;
1055} 1073}
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 4e9393c24687..7963569fc04f 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -118,7 +118,6 @@ EXPORT_SYMBOL_GPL(svc_auth_unregister);
118 118
119#define DN_HASHBITS 6 119#define DN_HASHBITS 6
120#define DN_HASHMAX (1<<DN_HASHBITS) 120#define DN_HASHMAX (1<<DN_HASHBITS)
121#define DN_HASHMASK (DN_HASHMAX-1)
122 121
123static struct hlist_head auth_domain_table[DN_HASHMAX]; 122static struct hlist_head auth_domain_table[DN_HASHMAX];
124static spinlock_t auth_domain_lock = 123static spinlock_t auth_domain_lock =
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 207311610988..c8e10216c113 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -18,6 +18,8 @@
18 18
19#include <linux/sunrpc/clnt.h> 19#include <linux/sunrpc/clnt.h>
20 20
21#include "netns.h"
22
21/* 23/*
22 * AUTHUNIX and AUTHNULL credentials are both handled here. 24 * AUTHUNIX and AUTHNULL credentials are both handled here.
23 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid 25 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid
@@ -28,12 +30,22 @@
28 30
29struct unix_domain { 31struct unix_domain {
30 struct auth_domain h; 32 struct auth_domain h;
33#ifdef CONFIG_NFSD_DEPRECATED
31 int addr_changes; 34 int addr_changes;
35#endif /* CONFIG_NFSD_DEPRECATED */
32 /* other stuff later */ 36 /* other stuff later */
33}; 37};
34 38
35extern struct auth_ops svcauth_unix; 39extern struct auth_ops svcauth_unix;
36 40
41static void svcauth_unix_domain_release(struct auth_domain *dom)
42{
43 struct unix_domain *ud = container_of(dom, struct unix_domain, h);
44
45 kfree(dom->name);
46 kfree(ud);
47}
48
37struct auth_domain *unix_domain_find(char *name) 49struct auth_domain *unix_domain_find(char *name)
38{ 50{
39 struct auth_domain *rv; 51 struct auth_domain *rv;
@@ -43,7 +55,7 @@ struct auth_domain *unix_domain_find(char *name)
43 while(1) { 55 while(1) {
44 if (rv) { 56 if (rv) {
45 if (new && rv != &new->h) 57 if (new && rv != &new->h)
46 auth_domain_put(&new->h); 58 svcauth_unix_domain_release(&new->h);
47 59
48 if (rv->flavour != &svcauth_unix) { 60 if (rv->flavour != &svcauth_unix) {
49 auth_domain_put(rv); 61 auth_domain_put(rv);
@@ -62,20 +74,14 @@ struct auth_domain *unix_domain_find(char *name)
62 return NULL; 74 return NULL;
63 } 75 }
64 new->h.flavour = &svcauth_unix; 76 new->h.flavour = &svcauth_unix;
77#ifdef CONFIG_NFSD_DEPRECATED
65 new->addr_changes = 0; 78 new->addr_changes = 0;
79#endif /* CONFIG_NFSD_DEPRECATED */
66 rv = auth_domain_lookup(name, &new->h); 80 rv = auth_domain_lookup(name, &new->h);
67 } 81 }
68} 82}
69EXPORT_SYMBOL_GPL(unix_domain_find); 83EXPORT_SYMBOL_GPL(unix_domain_find);
70 84
71static void svcauth_unix_domain_release(struct auth_domain *dom)
72{
73 struct unix_domain *ud = container_of(dom, struct unix_domain, h);
74
75 kfree(dom->name);
76 kfree(ud);
77}
78
79 85
80/************************************************** 86/**************************************************
81 * cache for IP address to unix_domain 87 * cache for IP address to unix_domain
@@ -83,16 +89,16 @@ static void svcauth_unix_domain_release(struct auth_domain *dom)
83 */ 89 */
84#define IP_HASHBITS 8 90#define IP_HASHBITS 8
85#define IP_HASHMAX (1<<IP_HASHBITS) 91#define IP_HASHMAX (1<<IP_HASHBITS)
86#define IP_HASHMASK (IP_HASHMAX-1)
87 92
88struct ip_map { 93struct ip_map {
89 struct cache_head h; 94 struct cache_head h;
90 char m_class[8]; /* e.g. "nfsd" */ 95 char m_class[8]; /* e.g. "nfsd" */
91 struct in6_addr m_addr; 96 struct in6_addr m_addr;
92 struct unix_domain *m_client; 97 struct unix_domain *m_client;
98#ifdef CONFIG_NFSD_DEPRECATED
93 int m_add_change; 99 int m_add_change;
100#endif /* CONFIG_NFSD_DEPRECATED */
94}; 101};
95static struct cache_head *ip_table[IP_HASHMAX];
96 102
97static void ip_map_put(struct kref *kref) 103static void ip_map_put(struct kref *kref)
98{ 104{
@@ -145,7 +151,9 @@ static void update(struct cache_head *cnew, struct cache_head *citem)
145 151
146 kref_get(&item->m_client->h.ref); 152 kref_get(&item->m_client->h.ref);
147 new->m_client = item->m_client; 153 new->m_client = item->m_client;
154#ifdef CONFIG_NFSD_DEPRECATED
148 new->m_add_change = item->m_add_change; 155 new->m_add_change = item->m_add_change;
156#endif /* CONFIG_NFSD_DEPRECATED */
149} 157}
150static struct cache_head *ip_map_alloc(void) 158static struct cache_head *ip_map_alloc(void)
151{ 159{
@@ -178,8 +186,8 @@ static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
178 return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); 186 return sunrpc_cache_pipe_upcall(cd, h, ip_map_request);
179} 187}
180 188
181static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); 189static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
182static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); 190static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
183 191
184static int ip_map_parse(struct cache_detail *cd, 192static int ip_map_parse(struct cache_detail *cd,
185 char *mesg, int mlen) 193 char *mesg, int mlen)
@@ -219,10 +227,9 @@ static int ip_map_parse(struct cache_detail *cd,
219 switch (address.sa.sa_family) { 227 switch (address.sa.sa_family) {
220 case AF_INET: 228 case AF_INET:
221 /* Form a mapped IPv4 address in sin6 */ 229 /* Form a mapped IPv4 address in sin6 */
222 memset(&sin6, 0, sizeof(sin6));
223 sin6.sin6_family = AF_INET6; 230 sin6.sin6_family = AF_INET6;
224 sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); 231 ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
225 sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr; 232 &sin6.sin6_addr);
226 break; 233 break;
227#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 234#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
228 case AF_INET6: 235 case AF_INET6:
@@ -249,9 +256,9 @@ static int ip_map_parse(struct cache_detail *cd,
249 dom = NULL; 256 dom = NULL;
250 257
251 /* IPv6 scope IDs are ignored for now */ 258 /* IPv6 scope IDs are ignored for now */
252 ipmp = ip_map_lookup(class, &sin6.sin6_addr); 259 ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr);
253 if (ipmp) { 260 if (ipmp) {
254 err = ip_map_update(ipmp, 261 err = __ip_map_update(cd, ipmp,
255 container_of(dom, struct unix_domain, h), 262 container_of(dom, struct unix_domain, h),
256 expiry); 263 expiry);
257 } else 264 } else
@@ -294,29 +301,15 @@ static int ip_map_show(struct seq_file *m,
294} 301}
295 302
296 303
297struct cache_detail ip_map_cache = { 304static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
298 .owner = THIS_MODULE, 305 struct in6_addr *addr)
299 .hash_size = IP_HASHMAX,
300 .hash_table = ip_table,
301 .name = "auth.unix.ip",
302 .cache_put = ip_map_put,
303 .cache_upcall = ip_map_upcall,
304 .cache_parse = ip_map_parse,
305 .cache_show = ip_map_show,
306 .match = ip_map_match,
307 .init = ip_map_init,
308 .update = update,
309 .alloc = ip_map_alloc,
310};
311
312static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
313{ 306{
314 struct ip_map ip; 307 struct ip_map ip;
315 struct cache_head *ch; 308 struct cache_head *ch;
316 309
317 strcpy(ip.m_class, class); 310 strcpy(ip.m_class, class);
318 ipv6_addr_copy(&ip.m_addr, addr); 311 ipv6_addr_copy(&ip.m_addr, addr);
319 ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, 312 ch = sunrpc_cache_lookup(cd, &ip.h,
320 hash_str(class, IP_HASHBITS) ^ 313 hash_str(class, IP_HASHBITS) ^
321 hash_ip6(*addr)); 314 hash_ip6(*addr));
322 315
@@ -326,7 +319,17 @@ static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
326 return NULL; 319 return NULL;
327} 320}
328 321
329static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) 322static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
323 struct in6_addr *addr)
324{
325 struct sunrpc_net *sn;
326
327 sn = net_generic(net, sunrpc_net_id);
328 return __ip_map_lookup(sn->ip_map_cache, class, addr);
329}
330
331static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
332 struct unix_domain *udom, time_t expiry)
330{ 333{
331 struct ip_map ip; 334 struct ip_map ip;
332 struct cache_head *ch; 335 struct cache_head *ch;
@@ -335,6 +338,7 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
335 ip.h.flags = 0; 338 ip.h.flags = 0;
336 if (!udom) 339 if (!udom)
337 set_bit(CACHE_NEGATIVE, &ip.h.flags); 340 set_bit(CACHE_NEGATIVE, &ip.h.flags);
341#ifdef CONFIG_NFSD_DEPRECATED
338 else { 342 else {
339 ip.m_add_change = udom->addr_changes; 343 ip.m_add_change = udom->addr_changes;
340 /* if this is from the legacy set_client system call, 344 /* if this is from the legacy set_client system call,
@@ -343,18 +347,28 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
343 if (expiry == NEVER) 347 if (expiry == NEVER)
344 ip.m_add_change++; 348 ip.m_add_change++;
345 } 349 }
350#endif /* CONFIG_NFSD_DEPRECATED */
346 ip.h.expiry_time = expiry; 351 ip.h.expiry_time = expiry;
347 ch = sunrpc_cache_update(&ip_map_cache, 352 ch = sunrpc_cache_update(cd, &ip.h, &ipm->h,
348 &ip.h, &ipm->h,
349 hash_str(ipm->m_class, IP_HASHBITS) ^ 353 hash_str(ipm->m_class, IP_HASHBITS) ^
350 hash_ip6(ipm->m_addr)); 354 hash_ip6(ipm->m_addr));
351 if (!ch) 355 if (!ch)
352 return -ENOMEM; 356 return -ENOMEM;
353 cache_put(ch, &ip_map_cache); 357 cache_put(ch, cd);
354 return 0; 358 return 0;
355} 359}
356 360
357int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) 361static inline int ip_map_update(struct net *net, struct ip_map *ipm,
362 struct unix_domain *udom, time_t expiry)
363{
364 struct sunrpc_net *sn;
365
366 sn = net_generic(net, sunrpc_net_id);
367 return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
368}
369
370#ifdef CONFIG_NFSD_DEPRECATED
371int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom)
358{ 372{
359 struct unix_domain *udom; 373 struct unix_domain *udom;
360 struct ip_map *ipmp; 374 struct ip_map *ipmp;
@@ -362,10 +376,10 @@ int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
362 if (dom->flavour != &svcauth_unix) 376 if (dom->flavour != &svcauth_unix)
363 return -EINVAL; 377 return -EINVAL;
364 udom = container_of(dom, struct unix_domain, h); 378 udom = container_of(dom, struct unix_domain, h);
365 ipmp = ip_map_lookup("nfsd", addr); 379 ipmp = ip_map_lookup(net, "nfsd", addr);
366 380
367 if (ipmp) 381 if (ipmp)
368 return ip_map_update(ipmp, udom, NEVER); 382 return ip_map_update(net, ipmp, udom, NEVER);
369 else 383 else
370 return -ENOMEM; 384 return -ENOMEM;
371} 385}
@@ -383,42 +397,51 @@ int auth_unix_forget_old(struct auth_domain *dom)
383} 397}
384EXPORT_SYMBOL_GPL(auth_unix_forget_old); 398EXPORT_SYMBOL_GPL(auth_unix_forget_old);
385 399
386struct auth_domain *auth_unix_lookup(struct in6_addr *addr) 400struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr)
387{ 401{
388 struct ip_map *ipm; 402 struct ip_map *ipm;
389 struct auth_domain *rv; 403 struct auth_domain *rv;
404 struct sunrpc_net *sn;
390 405
391 ipm = ip_map_lookup("nfsd", addr); 406 sn = net_generic(net, sunrpc_net_id);
407 ipm = ip_map_lookup(net, "nfsd", addr);
392 408
393 if (!ipm) 409 if (!ipm)
394 return NULL; 410 return NULL;
395 if (cache_check(&ip_map_cache, &ipm->h, NULL)) 411 if (cache_check(sn->ip_map_cache, &ipm->h, NULL))
396 return NULL; 412 return NULL;
397 413
398 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { 414 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) {
399 if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0) 415 sunrpc_invalidate(&ipm->h, sn->ip_map_cache);
400 auth_domain_put(&ipm->m_client->h);
401 rv = NULL; 416 rv = NULL;
402 } else { 417 } else {
403 rv = &ipm->m_client->h; 418 rv = &ipm->m_client->h;
404 kref_get(&rv->ref); 419 kref_get(&rv->ref);
405 } 420 }
406 cache_put(&ipm->h, &ip_map_cache); 421 cache_put(&ipm->h, sn->ip_map_cache);
407 return rv; 422 return rv;
408} 423}
409EXPORT_SYMBOL_GPL(auth_unix_lookup); 424EXPORT_SYMBOL_GPL(auth_unix_lookup);
425#endif /* CONFIG_NFSD_DEPRECATED */
410 426
411void svcauth_unix_purge(void) 427void svcauth_unix_purge(void)
412{ 428{
413 cache_purge(&ip_map_cache); 429 struct net *net;
430
431 for_each_net(net) {
432 struct sunrpc_net *sn;
433
434 sn = net_generic(net, sunrpc_net_id);
435 cache_purge(sn->ip_map_cache);
436 }
414} 437}
415EXPORT_SYMBOL_GPL(svcauth_unix_purge); 438EXPORT_SYMBOL_GPL(svcauth_unix_purge);
416 439
417static inline struct ip_map * 440static inline struct ip_map *
418ip_map_cached_get(struct svc_rqst *rqstp) 441ip_map_cached_get(struct svc_xprt *xprt)
419{ 442{
420 struct ip_map *ipm = NULL; 443 struct ip_map *ipm = NULL;
421 struct svc_xprt *xprt = rqstp->rq_xprt; 444 struct sunrpc_net *sn;
422 445
423 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { 446 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
424 spin_lock(&xprt->xpt_lock); 447 spin_lock(&xprt->xpt_lock);
@@ -430,9 +453,10 @@ ip_map_cached_get(struct svc_rqst *rqstp)
430 * remembered, e.g. by a second mount from the 453 * remembered, e.g. by a second mount from the
431 * same IP address. 454 * same IP address.
432 */ 455 */
456 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
433 xprt->xpt_auth_cache = NULL; 457 xprt->xpt_auth_cache = NULL;
434 spin_unlock(&xprt->xpt_lock); 458 spin_unlock(&xprt->xpt_lock);
435 cache_put(&ipm->h, &ip_map_cache); 459 cache_put(&ipm->h, sn->ip_map_cache);
436 return NULL; 460 return NULL;
437 } 461 }
438 cache_get(&ipm->h); 462 cache_get(&ipm->h);
@@ -443,10 +467,8 @@ ip_map_cached_get(struct svc_rqst *rqstp)
443} 467}
444 468
445static inline void 469static inline void
446ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) 470ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm)
447{ 471{
448 struct svc_xprt *xprt = rqstp->rq_xprt;
449
450 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { 472 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
451 spin_lock(&xprt->xpt_lock); 473 spin_lock(&xprt->xpt_lock);
452 if (xprt->xpt_auth_cache == NULL) { 474 if (xprt->xpt_auth_cache == NULL) {
@@ -456,15 +478,26 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
456 } 478 }
457 spin_unlock(&xprt->xpt_lock); 479 spin_unlock(&xprt->xpt_lock);
458 } 480 }
459 if (ipm) 481 if (ipm) {
460 cache_put(&ipm->h, &ip_map_cache); 482 struct sunrpc_net *sn;
483
484 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
485 cache_put(&ipm->h, sn->ip_map_cache);
486 }
461} 487}
462 488
463void 489void
464svcauth_unix_info_release(void *info) 490svcauth_unix_info_release(struct svc_xprt *xpt)
465{ 491{
466 struct ip_map *ipm = info; 492 struct ip_map *ipm;
467 cache_put(&ipm->h, &ip_map_cache); 493
494 ipm = xpt->xpt_auth_cache;
495 if (ipm != NULL) {
496 struct sunrpc_net *sn;
497
498 sn = net_generic(xpt->xpt_net, sunrpc_net_id);
499 cache_put(&ipm->h, sn->ip_map_cache);
500 }
468} 501}
469 502
470/**************************************************************************** 503/****************************************************************************
@@ -474,7 +507,6 @@ svcauth_unix_info_release(void *info)
474 */ 507 */
475#define GID_HASHBITS 8 508#define GID_HASHBITS 8
476#define GID_HASHMAX (1<<GID_HASHBITS) 509#define GID_HASHMAX (1<<GID_HASHBITS)
477#define GID_HASHMASK (GID_HASHMAX - 1)
478 510
479struct unix_gid { 511struct unix_gid {
480 struct cache_head h; 512 struct cache_head h;
@@ -674,6 +706,8 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
674 switch (ret) { 706 switch (ret) {
675 case -ENOENT: 707 case -ENOENT:
676 return ERR_PTR(-ENOENT); 708 return ERR_PTR(-ENOENT);
709 case -ETIMEDOUT:
710 return ERR_PTR(-ESHUTDOWN);
677 case 0: 711 case 0:
678 gi = get_group_info(ug->gi); 712 gi = get_group_info(ug->gi);
679 cache_put(&ug->h, &unix_gid_cache); 713 cache_put(&ug->h, &unix_gid_cache);
@@ -691,6 +725,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
691 struct ip_map *ipm; 725 struct ip_map *ipm;
692 struct group_info *gi; 726 struct group_info *gi;
693 struct svc_cred *cred = &rqstp->rq_cred; 727 struct svc_cred *cred = &rqstp->rq_cred;
728 struct svc_xprt *xprt = rqstp->rq_xprt;
729 struct net *net = xprt->xpt_net;
730 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
694 731
695 switch (rqstp->rq_addr.ss_family) { 732 switch (rqstp->rq_addr.ss_family) {
696 case AF_INET: 733 case AF_INET:
@@ -709,26 +746,27 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
709 if (rqstp->rq_proc == 0) 746 if (rqstp->rq_proc == 0)
710 return SVC_OK; 747 return SVC_OK;
711 748
712 ipm = ip_map_cached_get(rqstp); 749 ipm = ip_map_cached_get(xprt);
713 if (ipm == NULL) 750 if (ipm == NULL)
714 ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, 751 ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
715 &sin6->sin6_addr); 752 &sin6->sin6_addr);
716 753
717 if (ipm == NULL) 754 if (ipm == NULL)
718 return SVC_DENIED; 755 return SVC_DENIED;
719 756
720 switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { 757 switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
721 default: 758 default:
722 BUG(); 759 BUG();
723 case -EAGAIN:
724 case -ETIMEDOUT: 760 case -ETIMEDOUT:
761 return SVC_CLOSE;
762 case -EAGAIN:
725 return SVC_DROP; 763 return SVC_DROP;
726 case -ENOENT: 764 case -ENOENT:
727 return SVC_DENIED; 765 return SVC_DENIED;
728 case 0: 766 case 0:
729 rqstp->rq_client = &ipm->m_client->h; 767 rqstp->rq_client = &ipm->m_client->h;
730 kref_get(&rqstp->rq_client->ref); 768 kref_get(&rqstp->rq_client->ref);
731 ip_map_cached_put(rqstp, ipm); 769 ip_map_cached_put(xprt, ipm);
732 break; 770 break;
733 } 771 }
734 772
@@ -736,6 +774,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
736 switch (PTR_ERR(gi)) { 774 switch (PTR_ERR(gi)) {
737 case -EAGAIN: 775 case -EAGAIN:
738 return SVC_DROP; 776 return SVC_DROP;
777 case -ESHUTDOWN:
778 return SVC_CLOSE;
739 case -ENOENT: 779 case -ENOENT:
740 break; 780 break;
741 default: 781 default:
@@ -776,7 +816,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
776 cred->cr_gid = (gid_t) -1; 816 cred->cr_gid = (gid_t) -1;
777 cred->cr_group_info = groups_alloc(0); 817 cred->cr_group_info = groups_alloc(0);
778 if (cred->cr_group_info == NULL) 818 if (cred->cr_group_info == NULL)
779 return SVC_DROP; /* kmalloc failure - client must retry */ 819 return SVC_CLOSE; /* kmalloc failure - client must retry */
780 820
781 /* Put NULL verifier */ 821 /* Put NULL verifier */
782 svc_putnl(resv, RPC_AUTH_NULL); 822 svc_putnl(resv, RPC_AUTH_NULL);
@@ -840,7 +880,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
840 goto badcred; 880 goto badcred;
841 cred->cr_group_info = groups_alloc(slen); 881 cred->cr_group_info = groups_alloc(slen);
842 if (cred->cr_group_info == NULL) 882 if (cred->cr_group_info == NULL)
843 return SVC_DROP; 883 return SVC_CLOSE;
844 for (i = 0; i < slen; i++) 884 for (i = 0; i < slen; i++)
845 GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); 885 GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
846 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { 886 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
@@ -886,3 +926,56 @@ struct auth_ops svcauth_unix = {
886 .set_client = svcauth_unix_set_client, 926 .set_client = svcauth_unix_set_client,
887}; 927};
888 928
929int ip_map_cache_create(struct net *net)
930{
931 int err = -ENOMEM;
932 struct cache_detail *cd;
933 struct cache_head **tbl;
934 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
935
936 cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
937 if (cd == NULL)
938 goto err_cd;
939
940 tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL);
941 if (tbl == NULL)
942 goto err_tbl;
943
944 cd->owner = THIS_MODULE,
945 cd->hash_size = IP_HASHMAX,
946 cd->hash_table = tbl,
947 cd->name = "auth.unix.ip",
948 cd->cache_put = ip_map_put,
949 cd->cache_upcall = ip_map_upcall,
950 cd->cache_parse = ip_map_parse,
951 cd->cache_show = ip_map_show,
952 cd->match = ip_map_match,
953 cd->init = ip_map_init,
954 cd->update = update,
955 cd->alloc = ip_map_alloc,
956
957 err = cache_register_net(cd, net);
958 if (err)
959 goto err_reg;
960
961 sn->ip_map_cache = cd;
962 return 0;
963
964err_reg:
965 kfree(tbl);
966err_tbl:
967 kfree(cd);
968err_cd:
969 return err;
970}
971
972void ip_map_cache_destroy(struct net *net)
973{
974 struct sunrpc_net *sn;
975
976 sn = net_generic(net, sunrpc_net_id);
977 cache_purge(sn->ip_map_cache);
978 cache_unregister_net(sn->ip_map_cache, net);
979 kfree(sn->ip_map_cache->hash_table);
980 kfree(sn->ip_map_cache);
981}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7e534dd09077..af04f779ce9f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -64,7 +64,15 @@ static void svc_tcp_sock_detach(struct svc_xprt *);
64static void svc_sock_free(struct svc_xprt *); 64static void svc_sock_free(struct svc_xprt *);
65 65
66static struct svc_xprt *svc_create_socket(struct svc_serv *, int, 66static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
67 struct sockaddr *, int, int); 67 struct net *, struct sockaddr *,
68 int, int);
69#if defined(CONFIG_NFS_V4_1)
70static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
71 struct net *, struct sockaddr *,
72 int, int);
73static void svc_bc_sock_free(struct svc_xprt *xprt);
74#endif /* CONFIG_NFS_V4_1 */
75
68#ifdef CONFIG_DEBUG_LOCK_ALLOC 76#ifdef CONFIG_DEBUG_LOCK_ALLOC
69static struct lock_class_key svc_key[2]; 77static struct lock_class_key svc_key[2];
70static struct lock_class_key svc_slock_key[2]; 78static struct lock_class_key svc_slock_key[2];
@@ -323,19 +331,21 @@ int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen,
323 len = onelen; 331 len = onelen;
324 break; 332 break;
325 } 333 }
326 if (toclose && strcmp(toclose, buf + len) == 0) 334 if (toclose && strcmp(toclose, buf + len) == 0) {
327 closesk = svsk; 335 closesk = svsk;
328 else 336 svc_xprt_get(&closesk->sk_xprt);
337 } else
329 len += onelen; 338 len += onelen;
330 } 339 }
331 spin_unlock_bh(&serv->sv_lock); 340 spin_unlock_bh(&serv->sv_lock);
332 341
333 if (closesk) 342 if (closesk) {
334 /* Should unregister with portmap, but you cannot 343 /* Should unregister with portmap, but you cannot
335 * unregister just one protocol... 344 * unregister just one protocol...
336 */ 345 */
337 svc_close_xprt(&closesk->sk_xprt); 346 svc_close_xprt(&closesk->sk_xprt);
338 else if (toclose) 347 svc_xprt_put(&closesk->sk_xprt);
348 } else if (toclose)
339 return -ENOENT; 349 return -ENOENT;
340 return len; 350 return len;
341} 351}
@@ -377,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
377 return len; 387 return len;
378} 388}
379 389
390static int svc_partial_recvfrom(struct svc_rqst *rqstp,
391 struct kvec *iov, int nr,
392 int buflen, unsigned int base)
393{
394 size_t save_iovlen;
395 void __user *save_iovbase;
396 unsigned int i;
397 int ret;
398
399 if (base == 0)
400 return svc_recvfrom(rqstp, iov, nr, buflen);
401
402 for (i = 0; i < nr; i++) {
403 if (iov[i].iov_len > base)
404 break;
405 base -= iov[i].iov_len;
406 }
407 save_iovlen = iov[i].iov_len;
408 save_iovbase = iov[i].iov_base;
409 iov[i].iov_len -= base;
410 iov[i].iov_base += base;
411 ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
412 iov[i].iov_len = save_iovlen;
413 iov[i].iov_base = save_iovbase;
414 return ret;
415}
416
380/* 417/*
381 * Set socket snd and rcv buffer lengths 418 * Set socket snd and rcv buffer lengths
382 */ 419 */
@@ -399,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
399 lock_sock(sock->sk); 436 lock_sock(sock->sk);
400 sock->sk->sk_sndbuf = snd * 2; 437 sock->sk->sk_sndbuf = snd * 2;
401 sock->sk->sk_rcvbuf = rcv * 2; 438 sock->sk->sk_rcvbuf = rcv * 2;
402 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
403 sock->sk->sk_write_space(sock->sk); 439 sock->sk->sk_write_space(sock->sk);
404 release_sock(sock->sk); 440 release_sock(sock->sk);
405#endif 441#endif
@@ -410,6 +446,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
410static void svc_udp_data_ready(struct sock *sk, int count) 446static void svc_udp_data_ready(struct sock *sk, int count)
411{ 447{
412 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 448 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
449 wait_queue_head_t *wq = sk_sleep(sk);
413 450
414 if (svsk) { 451 if (svsk) {
415 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", 452 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
@@ -418,8 +455,8 @@ static void svc_udp_data_ready(struct sock *sk, int count)
418 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 455 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
419 svc_xprt_enqueue(&svsk->sk_xprt); 456 svc_xprt_enqueue(&svsk->sk_xprt);
420 } 457 }
421 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 458 if (wq && waitqueue_active(wq))
422 wake_up_interruptible(sk_sleep(sk)); 459 wake_up_interruptible(wq);
423} 460}
424 461
425/* 462/*
@@ -428,6 +465,7 @@ static void svc_udp_data_ready(struct sock *sk, int count)
428static void svc_write_space(struct sock *sk) 465static void svc_write_space(struct sock *sk)
429{ 466{
430 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); 467 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
468 wait_queue_head_t *wq = sk_sleep(sk);
431 469
432 if (svsk) { 470 if (svsk) {
433 dprintk("svc: socket %p(inet %p), write_space busy=%d\n", 471 dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
@@ -435,10 +473,10 @@ static void svc_write_space(struct sock *sk)
435 svc_xprt_enqueue(&svsk->sk_xprt); 473 svc_xprt_enqueue(&svsk->sk_xprt);
436 } 474 }
437 475
438 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { 476 if (wq && waitqueue_active(wq)) {
439 dprintk("RPC svc_write_space: someone sleeping on %p\n", 477 dprintk("RPC svc_write_space: someone sleeping on %p\n",
440 svsk); 478 svsk);
441 wake_up_interruptible(sk_sleep(sk)); 479 wake_up_interruptible(wq);
442 } 480 }
443} 481}
444 482
@@ -657,10 +695,11 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
657} 695}
658 696
659static struct svc_xprt *svc_udp_create(struct svc_serv *serv, 697static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
698 struct net *net,
660 struct sockaddr *sa, int salen, 699 struct sockaddr *sa, int salen,
661 int flags) 700 int flags)
662{ 701{
663 return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); 702 return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
664} 703}
665 704
666static struct svc_xprt_ops svc_udp_ops = { 705static struct svc_xprt_ops svc_udp_ops = {
@@ -728,6 +767,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
728static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) 767static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
729{ 768{
730 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 769 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
770 wait_queue_head_t *wq;
731 771
732 dprintk("svc: socket %p TCP (listen) state change %d\n", 772 dprintk("svc: socket %p TCP (listen) state change %d\n",
733 sk, sk->sk_state); 773 sk, sk->sk_state);
@@ -750,8 +790,9 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
750 printk("svc: socket %p: no user data\n", sk); 790 printk("svc: socket %p: no user data\n", sk);
751 } 791 }
752 792
753 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 793 wq = sk_sleep(sk);
754 wake_up_interruptible_all(sk_sleep(sk)); 794 if (wq && waitqueue_active(wq))
795 wake_up_interruptible_all(wq);
755} 796}
756 797
757/* 798/*
@@ -760,6 +801,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
760static void svc_tcp_state_change(struct sock *sk) 801static void svc_tcp_state_change(struct sock *sk)
761{ 802{
762 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 803 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
804 wait_queue_head_t *wq = sk_sleep(sk);
763 805
764 dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", 806 dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
765 sk, sk->sk_state, sk->sk_user_data); 807 sk, sk->sk_state, sk->sk_user_data);
@@ -770,13 +812,14 @@ static void svc_tcp_state_change(struct sock *sk)
770 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 812 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
771 svc_xprt_enqueue(&svsk->sk_xprt); 813 svc_xprt_enqueue(&svsk->sk_xprt);
772 } 814 }
773 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 815 if (wq && waitqueue_active(wq))
774 wake_up_interruptible_all(sk_sleep(sk)); 816 wake_up_interruptible_all(wq);
775} 817}
776 818
777static void svc_tcp_data_ready(struct sock *sk, int count) 819static void svc_tcp_data_ready(struct sock *sk, int count)
778{ 820{
779 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 821 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
822 wait_queue_head_t *wq = sk_sleep(sk);
780 823
781 dprintk("svc: socket %p TCP data ready (svsk %p)\n", 824 dprintk("svc: socket %p TCP data ready (svsk %p)\n",
782 sk, sk->sk_user_data); 825 sk, sk->sk_user_data);
@@ -784,8 +827,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count)
784 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 827 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
785 svc_xprt_enqueue(&svsk->sk_xprt); 828 svc_xprt_enqueue(&svsk->sk_xprt);
786 } 829 }
787 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 830 if (wq && waitqueue_active(wq))
788 wake_up_interruptible(sk_sleep(sk)); 831 wake_up_interruptible(wq);
789} 832}
790 833
791/* 834/*
@@ -867,6 +910,56 @@ failed:
867 return NULL; 910 return NULL;
868} 911}
869 912
913static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
914{
915 unsigned int i, len, npages;
916
917 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
918 return 0;
919 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
920 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
921 for (i = 0; i < npages; i++) {
922 if (rqstp->rq_pages[i] != NULL)
923 put_page(rqstp->rq_pages[i]);
924 BUG_ON(svsk->sk_pages[i] == NULL);
925 rqstp->rq_pages[i] = svsk->sk_pages[i];
926 svsk->sk_pages[i] = NULL;
927 }
928 rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
929 return len;
930}
931
932static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
933{
934 unsigned int i, len, npages;
935
936 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
937 return;
938 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
939 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
940 for (i = 0; i < npages; i++) {
941 svsk->sk_pages[i] = rqstp->rq_pages[i];
942 rqstp->rq_pages[i] = NULL;
943 }
944}
945
946static void svc_tcp_clear_pages(struct svc_sock *svsk)
947{
948 unsigned int i, len, npages;
949
950 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
951 goto out;
952 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
953 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
954 for (i = 0; i < npages; i++) {
955 BUG_ON(svsk->sk_pages[i] == NULL);
956 put_page(svsk->sk_pages[i]);
957 svsk->sk_pages[i] = NULL;
958 }
959out:
960 svsk->sk_tcplen = 0;
961}
962
870/* 963/*
871 * Receive data. 964 * Receive data.
872 * If we haven't gotten the record length yet, get the next four bytes. 965 * If we haven't gotten the record length yet, get the next four bytes.
@@ -876,31 +969,15 @@ failed:
876static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) 969static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
877{ 970{
878 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 971 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
972 unsigned int want;
879 int len; 973 int len;
880 974
881 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
882 /* sndbuf needs to have room for one request
883 * per thread, otherwise we can stall even when the
884 * network isn't a bottleneck.
885 *
886 * We count all threads rather than threads in a
887 * particular pool, which provides an upper bound
888 * on the number of threads which will access the socket.
889 *
890 * rcvbuf just needs to be able to hold a few requests.
891 * Normally they will be removed from the queue
892 * as soon a a complete request arrives.
893 */
894 svc_sock_setbufsize(svsk->sk_sock,
895 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
896 3 * serv->sv_max_mesg);
897
898 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 975 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
899 976
900 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 977 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
901 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
902 struct kvec iov; 978 struct kvec iov;
903 979
980 want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
904 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; 981 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
905 iov.iov_len = want; 982 iov.iov_len = want;
906 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) 983 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
@@ -910,7 +987,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
910 if (len < want) { 987 if (len < want) {
911 dprintk("svc: short recvfrom while reading record " 988 dprintk("svc: short recvfrom while reading record "
912 "length (%d of %d)\n", len, want); 989 "length (%d of %d)\n", len, want);
913 goto err_again; /* record header not complete */ 990 return -EAGAIN;
914 } 991 }
915 992
916 svsk->sk_reclen = ntohl(svsk->sk_reclen); 993 svsk->sk_reclen = ntohl(svsk->sk_reclen);
@@ -937,81 +1014,75 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
937 } 1014 }
938 } 1015 }
939 1016
940 /* Check whether enough data is available */ 1017 if (svsk->sk_reclen < 8)
941 len = svc_recv_available(svsk); 1018 goto err_delete; /* client is nuts. */
942 if (len < 0)
943 goto error;
944 1019
945 if (len < svsk->sk_reclen) {
946 dprintk("svc: incomplete TCP record (%d of %d)\n",
947 len, svsk->sk_reclen);
948 goto err_again; /* record not complete */
949 }
950 len = svsk->sk_reclen; 1020 len = svsk->sk_reclen;
951 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
952 1021
953 return len; 1022 return len;
954 error: 1023error:
955 if (len == -EAGAIN) 1024 dprintk("RPC: TCP recv_record got %d\n", len);
956 dprintk("RPC: TCP recv_record got EAGAIN\n");
957 return len; 1025 return len;
958 err_delete: 1026err_delete:
959 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1027 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
960 err_again:
961 return -EAGAIN; 1028 return -EAGAIN;
962} 1029}
963 1030
964static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, 1031static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
965 struct rpc_rqst **reqpp, struct kvec *vec)
966{ 1032{
1033 struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
967 struct rpc_rqst *req = NULL; 1034 struct rpc_rqst *req = NULL;
968 u32 *p; 1035 struct kvec *src, *dst;
969 u32 xid; 1036 __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
970 u32 calldir; 1037 __be32 xid;
971 int len; 1038 __be32 calldir;
972 1039
973 len = svc_recvfrom(rqstp, vec, 1, 8);
974 if (len < 0)
975 goto error;
976
977 p = (u32 *)rqstp->rq_arg.head[0].iov_base;
978 xid = *p++; 1040 xid = *p++;
979 calldir = *p; 1041 calldir = *p;
980 1042
981 if (calldir == 0) { 1043 if (bc_xprt)
982 /* REQUEST is the most common case */ 1044 req = xprt_lookup_rqst(bc_xprt, xid);
983 vec[0] = rqstp->rq_arg.head[0];
984 } else {
985 /* REPLY */
986 if (svsk->sk_bc_xprt)
987 req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid);
988
989 if (!req) {
990 printk(KERN_NOTICE
991 "%s: Got unrecognized reply: "
992 "calldir 0x%x sk_bc_xprt %p xid %08x\n",
993 __func__, ntohl(calldir),
994 svsk->sk_bc_xprt, xid);
995 vec[0] = rqstp->rq_arg.head[0];
996 goto out;
997 }
998 1045
999 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 1046 if (!req) {
1000 sizeof(struct xdr_buf)); 1047 printk(KERN_NOTICE
1001 /* copy the xid and call direction */ 1048 "%s: Got unrecognized reply: "
1002 memcpy(req->rq_private_buf.head[0].iov_base, 1049 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
1003 rqstp->rq_arg.head[0].iov_base, 8); 1050 __func__, ntohl(calldir),
1004 vec[0] = req->rq_private_buf.head[0]; 1051 bc_xprt, xid);
1052 return -EAGAIN;
1005 } 1053 }
1006 out: 1054
1007 vec[0].iov_base += 8; 1055 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
1008 vec[0].iov_len -= 8; 1056 /*
1009 len = svsk->sk_reclen - 8; 1057 * XXX!: cheating for now! Only copying HEAD.
1010 error: 1058 * But we know this is good enough for now (in fact, for any
1011 *reqpp = req; 1059 * callback reply in the forseeable future).
1012 return len; 1060 */
1061 dst = &req->rq_private_buf.head[0];
1062 src = &rqstp->rq_arg.head[0];
1063 if (dst->iov_len < src->iov_len)
1064 return -EAGAIN; /* whatever; just giving up. */
1065 memcpy(dst->iov_base, src->iov_base, src->iov_len);
1066 xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
1067 rqstp->rq_arg.len = 0;
1068 return 0;
1069}
1070
1071static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
1072{
1073 int i = 0;
1074 int t = 0;
1075
1076 while (t < len) {
1077 vec[i].iov_base = page_address(pages[i]);
1078 vec[i].iov_len = PAGE_SIZE;
1079 i++;
1080 t += PAGE_SIZE;
1081 }
1082 return i;
1013} 1083}
1014 1084
1085
1015/* 1086/*
1016 * Receive data from a TCP socket. 1087 * Receive data from a TCP socket.
1017 */ 1088 */
@@ -1022,8 +1093,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1022 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 1093 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1023 int len; 1094 int len;
1024 struct kvec *vec; 1095 struct kvec *vec;
1025 int pnum, vlen; 1096 unsigned int want, base;
1026 struct rpc_rqst *req = NULL; 1097 __be32 *p;
1098 __be32 calldir;
1099 int pnum;
1027 1100
1028 dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 1101 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
1029 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), 1102 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
@@ -1034,87 +1107,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1034 if (len < 0) 1107 if (len < 0)
1035 goto error; 1108 goto error;
1036 1109
1110 base = svc_tcp_restore_pages(svsk, rqstp);
1111 want = svsk->sk_reclen - base;
1112
1037 vec = rqstp->rq_vec; 1113 vec = rqstp->rq_vec;
1038 vec[0] = rqstp->rq_arg.head[0];
1039 vlen = PAGE_SIZE;
1040 1114
1041 /* 1115 pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
1042 * We have enough data for the whole tcp record. Let's try and read the 1116 svsk->sk_reclen);
1043 * first 8 bytes to get the xid and the call direction. We can use this
1044 * to figure out if this is a call or a reply to a callback. If
1045 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
1046 * In that case, don't bother with the calldir and just read the data.
1047 * It will be rejected in svc_process.
1048 */
1049 if (len >= 8) {
1050 len = svc_process_calldir(svsk, rqstp, &req, vec);
1051 if (len < 0)
1052 goto err_again;
1053 vlen -= 8;
1054 }
1055 1117
1056 pnum = 1;
1057 while (vlen < len) {
1058 vec[pnum].iov_base = (req) ?
1059 page_address(req->rq_private_buf.pages[pnum - 1]) :
1060 page_address(rqstp->rq_pages[pnum]);
1061 vec[pnum].iov_len = PAGE_SIZE;
1062 pnum++;
1063 vlen += PAGE_SIZE;
1064 }
1065 rqstp->rq_respages = &rqstp->rq_pages[pnum]; 1118 rqstp->rq_respages = &rqstp->rq_pages[pnum];
1066 1119
1067 /* Now receive data */ 1120 /* Now receive data */
1068 len = svc_recvfrom(rqstp, vec, pnum, len); 1121 len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
1069 if (len < 0) 1122 if (len >= 0)
1070 goto err_again; 1123 svsk->sk_tcplen += len;
1071 1124 if (len != want) {
1072 /* 1125 if (len < 0 && len != -EAGAIN)
1073 * Account for the 8 bytes we read earlier 1126 goto err_other;
1074 */ 1127 svc_tcp_save_pages(svsk, rqstp);
1075 len += 8; 1128 dprintk("svc: incomplete TCP record (%d of %d)\n",
1076 1129 svsk->sk_tcplen, svsk->sk_reclen);
1077 if (req) { 1130 goto err_noclose;
1078 xprt_complete_rqst(req->rq_task, len);
1079 len = 0;
1080 goto out;
1081 } 1131 }
1082 dprintk("svc: TCP complete record (%d bytes)\n", len); 1132
1083 rqstp->rq_arg.len = len; 1133 rqstp->rq_arg.len = svsk->sk_reclen;
1084 rqstp->rq_arg.page_base = 0; 1134 rqstp->rq_arg.page_base = 0;
1085 if (len <= rqstp->rq_arg.head[0].iov_len) { 1135 if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
1086 rqstp->rq_arg.head[0].iov_len = len; 1136 rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
1087 rqstp->rq_arg.page_len = 0; 1137 rqstp->rq_arg.page_len = 0;
1088 } else { 1138 } else
1089 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 1139 rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
1090 }
1091 1140
1092 rqstp->rq_xprt_ctxt = NULL; 1141 rqstp->rq_xprt_ctxt = NULL;
1093 rqstp->rq_prot = IPPROTO_TCP; 1142 rqstp->rq_prot = IPPROTO_TCP;
1094 1143
1095out: 1144 p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
1145 calldir = p[1];
1146 if (calldir)
1147 len = receive_cb_reply(svsk, rqstp);
1148
1096 /* Reset TCP read info */ 1149 /* Reset TCP read info */
1097 svsk->sk_reclen = 0; 1150 svsk->sk_reclen = 0;
1098 svsk->sk_tcplen = 0; 1151 svsk->sk_tcplen = 0;
1152 /* If we have more data, signal svc_xprt_enqueue() to try again */
1153 if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
1154 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1155
1156 if (len < 0)
1157 goto error;
1099 1158
1100 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); 1159 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
1101 if (serv->sv_stats) 1160 if (serv->sv_stats)
1102 serv->sv_stats->nettcpcnt++; 1161 serv->sv_stats->nettcpcnt++;
1103 1162
1104 return len; 1163 dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len);
1164 return rqstp->rq_arg.len;
1105 1165
1106err_again:
1107 if (len == -EAGAIN) {
1108 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1109 return len;
1110 }
1111error: 1166error:
1112 if (len != -EAGAIN) { 1167 if (len != -EAGAIN)
1113 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1168 goto err_other;
1114 svsk->sk_xprt.xpt_server->sv_name, -len); 1169 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1115 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1116 }
1117 return -EAGAIN; 1170 return -EAGAIN;
1171err_other:
1172 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
1173 svsk->sk_xprt.xpt_server->sv_name, -len);
1174 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1175err_noclose:
1176 return -EAGAIN; /* record not complete */
1118} 1177}
1119 1178
1120/* 1179/*
@@ -1133,9 +1192,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
1133 reclen = htonl(0x80000000|((xbufp->len ) - 4)); 1192 reclen = htonl(0x80000000|((xbufp->len ) - 4));
1134 memcpy(xbufp->head[0].iov_base, &reclen, 4); 1193 memcpy(xbufp->head[0].iov_base, &reclen, 4);
1135 1194
1136 if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags))
1137 return -ENOTCONN;
1138
1139 sent = svc_sendto(rqstp, &rqstp->rq_res); 1195 sent = svc_sendto(rqstp, &rqstp->rq_res);
1140 if (sent != xbufp->len) { 1196 if (sent != xbufp->len) {
1141 printk(KERN_NOTICE 1197 printk(KERN_NOTICE
@@ -1178,11 +1234,63 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
1178} 1234}
1179 1235
1180static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, 1236static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
1237 struct net *net,
1181 struct sockaddr *sa, int salen, 1238 struct sockaddr *sa, int salen,
1182 int flags) 1239 int flags)
1183{ 1240{
1184 return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); 1241 return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
1242}
1243
1244#if defined(CONFIG_NFS_V4_1)
1245static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
1246 struct net *, struct sockaddr *,
1247 int, int);
1248static void svc_bc_sock_free(struct svc_xprt *xprt);
1249
1250static struct svc_xprt *svc_bc_tcp_create(struct svc_serv *serv,
1251 struct net *net,
1252 struct sockaddr *sa, int salen,
1253 int flags)
1254{
1255 return svc_bc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
1256}
1257
1258static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
1259{
1260}
1261
1262static struct svc_xprt_ops svc_tcp_bc_ops = {
1263 .xpo_create = svc_bc_tcp_create,
1264 .xpo_detach = svc_bc_tcp_sock_detach,
1265 .xpo_free = svc_bc_sock_free,
1266 .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
1267};
1268
1269static struct svc_xprt_class svc_tcp_bc_class = {
1270 .xcl_name = "tcp-bc",
1271 .xcl_owner = THIS_MODULE,
1272 .xcl_ops = &svc_tcp_bc_ops,
1273 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
1274};
1275
1276static void svc_init_bc_xprt_sock(void)
1277{
1278 svc_reg_xprt_class(&svc_tcp_bc_class);
1279}
1280
1281static void svc_cleanup_bc_xprt_sock(void)
1282{
1283 svc_unreg_xprt_class(&svc_tcp_bc_class);
1284}
1285#else /* CONFIG_NFS_V4_1 */
1286static void svc_init_bc_xprt_sock(void)
1287{
1288}
1289
1290static void svc_cleanup_bc_xprt_sock(void)
1291{
1185} 1292}
1293#endif /* CONFIG_NFS_V4_1 */
1186 1294
1187static struct svc_xprt_ops svc_tcp_ops = { 1295static struct svc_xprt_ops svc_tcp_ops = {
1188 .xpo_create = svc_tcp_create, 1296 .xpo_create = svc_tcp_create,
@@ -1207,12 +1315,14 @@ void svc_init_xprt_sock(void)
1207{ 1315{
1208 svc_reg_xprt_class(&svc_tcp_class); 1316 svc_reg_xprt_class(&svc_tcp_class);
1209 svc_reg_xprt_class(&svc_udp_class); 1317 svc_reg_xprt_class(&svc_udp_class);
1318 svc_init_bc_xprt_sock();
1210} 1319}
1211 1320
1212void svc_cleanup_xprt_sock(void) 1321void svc_cleanup_xprt_sock(void)
1213{ 1322{
1214 svc_unreg_xprt_class(&svc_tcp_class); 1323 svc_unreg_xprt_class(&svc_tcp_class);
1215 svc_unreg_xprt_class(&svc_udp_class); 1324 svc_unreg_xprt_class(&svc_udp_class);
1325 svc_cleanup_bc_xprt_sock();
1216} 1326}
1217 1327
1218static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) 1328static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
@@ -1234,18 +1344,10 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1234 1344
1235 svsk->sk_reclen = 0; 1345 svsk->sk_reclen = 0;
1236 svsk->sk_tcplen = 0; 1346 svsk->sk_tcplen = 0;
1347 memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
1237 1348
1238 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1349 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1239 1350
1240 /* initialise setting must have enough space to
1241 * receive and respond to one request.
1242 * svc_tcp_recvfrom will re-adjust if necessary
1243 */
1244 svc_sock_setbufsize(svsk->sk_sock,
1245 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
1246 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
1247
1248 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1249 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1351 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1250 if (sk->sk_state != TCP_ESTABLISHED) 1352 if (sk->sk_state != TCP_ESTABLISHED)
1251 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1353 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1258,19 +1360,13 @@ void svc_sock_update_bufs(struct svc_serv *serv)
1258 * The number of server threads has changed. Update 1360 * The number of server threads has changed. Update
1259 * rcvbuf and sndbuf accordingly on all sockets 1361 * rcvbuf and sndbuf accordingly on all sockets
1260 */ 1362 */
1261 struct list_head *le; 1363 struct svc_sock *svsk;
1262 1364
1263 spin_lock_bh(&serv->sv_lock); 1365 spin_lock_bh(&serv->sv_lock);
1264 list_for_each(le, &serv->sv_permsocks) { 1366 list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list)
1265 struct svc_sock *svsk =
1266 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1267 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1367 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1268 } 1368 list_for_each_entry(svsk, &serv->sv_tempsocks, sk_xprt.xpt_list)
1269 list_for_each(le, &serv->sv_tempsocks) {
1270 struct svc_sock *svsk =
1271 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1272 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1369 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1273 }
1274 spin_unlock_bh(&serv->sv_lock); 1370 spin_unlock_bh(&serv->sv_lock);
1275} 1371}
1276EXPORT_SYMBOL_GPL(svc_sock_update_bufs); 1372EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
@@ -1315,8 +1411,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1315 /* Initialize the socket */ 1411 /* Initialize the socket */
1316 if (sock->type == SOCK_DGRAM) 1412 if (sock->type == SOCK_DGRAM)
1317 svc_udp_init(svsk, serv); 1413 svc_udp_init(svsk, serv);
1318 else 1414 else {
1415 /* initialise setting must have enough space to
1416 * receive and respond to one request.
1417 */
1418 svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
1419 4 * serv->sv_max_mesg);
1319 svc_tcp_init(svsk, serv); 1420 svc_tcp_init(svsk, serv);
1421 }
1320 1422
1321 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1423 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1322 svsk, svsk->sk_sk); 1424 svsk, svsk->sk_sk);
@@ -1385,6 +1487,7 @@ EXPORT_SYMBOL_GPL(svc_addsock);
1385 */ 1487 */
1386static struct svc_xprt *svc_create_socket(struct svc_serv *serv, 1488static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1387 int protocol, 1489 int protocol,
1490 struct net *net,
1388 struct sockaddr *sin, int len, 1491 struct sockaddr *sin, int len,
1389 int flags) 1492 int flags)
1390{ 1493{
@@ -1421,7 +1524,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1421 return ERR_PTR(-EINVAL); 1524 return ERR_PTR(-EINVAL);
1422 } 1525 }
1423 1526
1424 error = sock_create_kern(family, type, protocol, &sock); 1527 error = __sock_create(net, family, type, protocol, &sock, 1);
1425 if (error < 0) 1528 if (error < 0)
1426 return ERR_PTR(error); 1529 return ERR_PTR(error);
1427 1530
@@ -1472,6 +1575,7 @@ static void svc_sock_detach(struct svc_xprt *xprt)
1472{ 1575{
1473 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1576 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1474 struct sock *sk = svsk->sk_sk; 1577 struct sock *sk = svsk->sk_sk;
1578 wait_queue_head_t *wq;
1475 1579
1476 dprintk("svc: svc_sock_detach(%p)\n", svsk); 1580 dprintk("svc: svc_sock_detach(%p)\n", svsk);
1477 1581
@@ -1480,8 +1584,9 @@ static void svc_sock_detach(struct svc_xprt *xprt)
1480 sk->sk_data_ready = svsk->sk_odata; 1584 sk->sk_data_ready = svsk->sk_odata;
1481 sk->sk_write_space = svsk->sk_owspace; 1585 sk->sk_write_space = svsk->sk_owspace;
1482 1586
1483 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 1587 wq = sk_sleep(sk);
1484 wake_up_interruptible(sk_sleep(sk)); 1588 if (wq && waitqueue_active(wq))
1589 wake_up_interruptible(wq);
1485} 1590}
1486 1591
1487/* 1592/*
@@ -1495,8 +1600,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
1495 1600
1496 svc_sock_detach(xprt); 1601 svc_sock_detach(xprt);
1497 1602
1498 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) 1603 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
1604 svc_tcp_clear_pages(svsk);
1499 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); 1605 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
1606 }
1500} 1607}
1501 1608
1502/* 1609/*
@@ -1514,41 +1621,43 @@ static void svc_sock_free(struct svc_xprt *xprt)
1514 kfree(svsk); 1621 kfree(svsk);
1515} 1622}
1516 1623
1624#if defined(CONFIG_NFS_V4_1)
1517/* 1625/*
1518 * Create a svc_xprt. 1626 * Create a back channel svc_xprt which shares the fore channel socket.
1519 *
1520 * For internal use only (e.g. nfsv4.1 backchannel).
1521 * Callers should typically use the xpo_create() method.
1522 */ 1627 */
1523struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot) 1628static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
1629 int protocol,
1630 struct net *net,
1631 struct sockaddr *sin, int len,
1632 int flags)
1524{ 1633{
1525 struct svc_sock *svsk; 1634 struct svc_sock *svsk;
1526 struct svc_xprt *xprt = NULL; 1635 struct svc_xprt *xprt;
1636
1637 if (protocol != IPPROTO_TCP) {
1638 printk(KERN_WARNING "svc: only TCP sockets"
1639 " supported on shared back channel\n");
1640 return ERR_PTR(-EINVAL);
1641 }
1527 1642
1528 dprintk("svc: %s\n", __func__);
1529 svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); 1643 svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
1530 if (!svsk) 1644 if (!svsk)
1531 goto out; 1645 return ERR_PTR(-ENOMEM);
1532 1646
1533 xprt = &svsk->sk_xprt; 1647 xprt = &svsk->sk_xprt;
1534 if (prot == IPPROTO_TCP) 1648 svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
1535 svc_xprt_init(&svc_tcp_class, xprt, serv); 1649
1536 else if (prot == IPPROTO_UDP) 1650 serv->sv_bc_xprt = xprt;
1537 svc_xprt_init(&svc_udp_class, xprt, serv); 1651
1538 else
1539 BUG();
1540out:
1541 dprintk("svc: %s return %p\n", __func__, xprt);
1542 return xprt; 1652 return xprt;
1543} 1653}
1544EXPORT_SYMBOL_GPL(svc_sock_create);
1545 1654
1546/* 1655/*
1547 * Destroy a svc_sock. 1656 * Free a back channel svc_sock.
1548 */ 1657 */
1549void svc_sock_destroy(struct svc_xprt *xprt) 1658static void svc_bc_sock_free(struct svc_xprt *xprt)
1550{ 1659{
1551 if (xprt) 1660 if (xprt)
1552 kfree(container_of(xprt, struct svc_sock, sk_xprt)); 1661 kfree(container_of(xprt, struct svc_sock, sk_xprt));
1553} 1662}
1554EXPORT_SYMBOL_GPL(svc_sock_destroy); 1663#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index a1f82a87d34d..f008c14ad34c 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -111,6 +111,23 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
111} 111}
112EXPORT_SYMBOL_GPL(xdr_decode_string_inplace); 112EXPORT_SYMBOL_GPL(xdr_decode_string_inplace);
113 113
114/**
115 * xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf
116 * @buf: XDR buffer where string resides
117 * @len: length of string, in bytes
118 *
119 */
120void
121xdr_terminate_string(struct xdr_buf *buf, const u32 len)
122{
123 char *kaddr;
124
125 kaddr = kmap_atomic(buf->pages[0], KM_USER0);
126 kaddr[buf->page_base + len] = '\0';
127 kunmap_atomic(kaddr, KM_USER0);
128}
129EXPORT_SYMBOL(xdr_terminate_string);
130
114void 131void
115xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, 132xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
116 unsigned int len) 133 unsigned int len)
@@ -395,24 +412,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
395{ 412{
396 struct kvec *tail; 413 struct kvec *tail;
397 size_t copy; 414 size_t copy;
398 char *p;
399 unsigned int pglen = buf->page_len; 415 unsigned int pglen = buf->page_len;
416 unsigned int tailbuf_len;
400 417
401 tail = buf->tail; 418 tail = buf->tail;
402 BUG_ON (len > pglen); 419 BUG_ON (len > pglen);
403 420
421 tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
422
404 /* Shift the tail first */ 423 /* Shift the tail first */
405 if (tail->iov_len != 0) { 424 if (tailbuf_len != 0) {
406 p = (char *)tail->iov_base + len; 425 unsigned int free_space = tailbuf_len - tail->iov_len;
426
427 if (len < free_space)
428 free_space = len;
429 tail->iov_len += free_space;
430
431 copy = len;
407 if (tail->iov_len > len) { 432 if (tail->iov_len > len) {
408 copy = tail->iov_len - len; 433 char *p = (char *)tail->iov_base + len;
409 memmove(p, tail->iov_base, copy); 434 memmove(p, tail->iov_base, tail->iov_len - len);
410 } else 435 } else
411 buf->buflen -= len;
412 /* Copy from the inlined pages into the tail */
413 copy = len;
414 if (copy > tail->iov_len)
415 copy = tail->iov_len; 436 copy = tail->iov_len;
437 /* Copy from the inlined pages into the tail */
416 _copy_from_pages((char *)tail->iov_base, 438 _copy_from_pages((char *)tail->iov_base,
417 buf->pages, buf->page_base + pglen - len, 439 buf->pages, buf->page_base + pglen - len,
418 copy); 440 copy);
@@ -530,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
530} 552}
531EXPORT_SYMBOL_GPL(xdr_write_pages); 553EXPORT_SYMBOL_GPL(xdr_write_pages);
532 554
555static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
556 __be32 *p, unsigned int len)
557{
558 if (len > iov->iov_len)
559 len = iov->iov_len;
560 if (p == NULL)
561 p = (__be32*)iov->iov_base;
562 xdr->p = p;
563 xdr->end = (__be32*)(iov->iov_base + len);
564 xdr->iov = iov;
565 xdr->page_ptr = NULL;
566}
567
568static int xdr_set_page_base(struct xdr_stream *xdr,
569 unsigned int base, unsigned int len)
570{
571 unsigned int pgnr;
572 unsigned int maxlen;
573 unsigned int pgoff;
574 unsigned int pgend;
575 void *kaddr;
576
577 maxlen = xdr->buf->page_len;
578 if (base >= maxlen)
579 return -EINVAL;
580 maxlen -= base;
581 if (len > maxlen)
582 len = maxlen;
583
584 base += xdr->buf->page_base;
585
586 pgnr = base >> PAGE_SHIFT;
587 xdr->page_ptr = &xdr->buf->pages[pgnr];
588 kaddr = page_address(*xdr->page_ptr);
589
590 pgoff = base & ~PAGE_MASK;
591 xdr->p = (__be32*)(kaddr + pgoff);
592
593 pgend = pgoff + len;
594 if (pgend > PAGE_SIZE)
595 pgend = PAGE_SIZE;
596 xdr->end = (__be32*)(kaddr + pgend);
597 xdr->iov = NULL;
598 return 0;
599}
600
601static void xdr_set_next_page(struct xdr_stream *xdr)
602{
603 unsigned int newbase;
604
605 newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
606 newbase -= xdr->buf->page_base;
607
608 if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
609 xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
610}
611
612static bool xdr_set_next_buffer(struct xdr_stream *xdr)
613{
614 if (xdr->page_ptr != NULL)
615 xdr_set_next_page(xdr);
616 else if (xdr->iov == xdr->buf->head) {
617 if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
618 xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
619 }
620 return xdr->p != xdr->end;
621}
622
533/** 623/**
534 * xdr_init_decode - Initialize an xdr_stream for decoding data. 624 * xdr_init_decode - Initialize an xdr_stream for decoding data.
535 * @xdr: pointer to xdr_stream struct 625 * @xdr: pointer to xdr_stream struct
@@ -538,20 +628,86 @@ EXPORT_SYMBOL_GPL(xdr_write_pages);
538 */ 628 */
539void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) 629void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
540{ 630{
541 struct kvec *iov = buf->head;
542 unsigned int len = iov->iov_len;
543
544 if (len > buf->len)
545 len = buf->len;
546 xdr->buf = buf; 631 xdr->buf = buf;
547 xdr->iov = iov; 632 xdr->scratch.iov_base = NULL;
548 xdr->p = p; 633 xdr->scratch.iov_len = 0;
549 xdr->end = (__be32 *)((char *)iov->iov_base + len); 634 if (buf->head[0].iov_len != 0)
635 xdr_set_iov(xdr, buf->head, p, buf->len);
636 else if (buf->page_len != 0)
637 xdr_set_page_base(xdr, 0, buf->len);
550} 638}
551EXPORT_SYMBOL_GPL(xdr_init_decode); 639EXPORT_SYMBOL_GPL(xdr_init_decode);
552 640
553/** 641/**
554 * xdr_inline_decode - Retrieve non-page XDR data to decode 642 * xdr_init_decode - Initialize an xdr_stream for decoding data.
643 * @xdr: pointer to xdr_stream struct
644 * @buf: pointer to XDR buffer from which to decode data
645 * @pages: list of pages to decode into
646 * @len: length in bytes of buffer in pages
647 */
648void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
649 struct page **pages, unsigned int len)
650{
651 memset(buf, 0, sizeof(*buf));
652 buf->pages = pages;
653 buf->page_len = len;
654 buf->buflen = len;
655 buf->len = len;
656 xdr_init_decode(xdr, buf, NULL);
657}
658EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
659
660static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
661{
662 __be32 *p = xdr->p;
663 __be32 *q = p + XDR_QUADLEN(nbytes);
664
665 if (unlikely(q > xdr->end || q < p))
666 return NULL;
667 xdr->p = q;
668 return p;
669}
670
671/**
672 * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
673 * @xdr: pointer to xdr_stream struct
674 * @buf: pointer to an empty buffer
675 * @buflen: size of 'buf'
676 *
677 * The scratch buffer is used when decoding from an array of pages.
678 * If an xdr_inline_decode() call spans across page boundaries, then
679 * we copy the data into the scratch buffer in order to allow linear
680 * access.
681 */
682void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
683{
684 xdr->scratch.iov_base = buf;
685 xdr->scratch.iov_len = buflen;
686}
687EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
688
689static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
690{
691 __be32 *p;
692 void *cpdest = xdr->scratch.iov_base;
693 size_t cplen = (char *)xdr->end - (char *)xdr->p;
694
695 if (nbytes > xdr->scratch.iov_len)
696 return NULL;
697 memcpy(cpdest, xdr->p, cplen);
698 cpdest += cplen;
699 nbytes -= cplen;
700 if (!xdr_set_next_buffer(xdr))
701 return NULL;
702 p = __xdr_inline_decode(xdr, nbytes);
703 if (p == NULL)
704 return NULL;
705 memcpy(cpdest, p, nbytes);
706 return xdr->scratch.iov_base;
707}
708
709/**
710 * xdr_inline_decode - Retrieve XDR data to decode
555 * @xdr: pointer to xdr_stream struct 711 * @xdr: pointer to xdr_stream struct
556 * @nbytes: number of bytes of data to decode 712 * @nbytes: number of bytes of data to decode
557 * 713 *
@@ -562,13 +718,16 @@ EXPORT_SYMBOL_GPL(xdr_init_decode);
562 */ 718 */
563__be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) 719__be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
564{ 720{
565 __be32 *p = xdr->p; 721 __be32 *p;
566 __be32 *q = p + XDR_QUADLEN(nbytes);
567 722
568 if (unlikely(q > xdr->end || q < p)) 723 if (nbytes == 0)
724 return xdr->p;
725 if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
569 return NULL; 726 return NULL;
570 xdr->p = q; 727 p = __xdr_inline_decode(xdr, nbytes);
571 return p; 728 if (p != NULL)
729 return p;
730 return xdr_copy_to_scratch(xdr, nbytes);
572} 731}
573EXPORT_SYMBOL_GPL(xdr_inline_decode); 732EXPORT_SYMBOL_GPL(xdr_inline_decode);
574 733
@@ -628,16 +787,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages);
628 */ 787 */
629void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) 788void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
630{ 789{
631 char * kaddr = page_address(xdr->buf->pages[0]);
632 xdr_read_pages(xdr, len); 790 xdr_read_pages(xdr, len);
633 /* 791 /*
634 * Position current pointer at beginning of tail, and 792 * Position current pointer at beginning of tail, and
635 * set remaining message length. 793 * set remaining message length.
636 */ 794 */
637 if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) 795 xdr_set_page_base(xdr, 0, len);
638 len = PAGE_CACHE_SIZE - xdr->buf->page_base;
639 xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
640 xdr->end = (__be32 *)((char *)xdr->p + len);
641} 796}
642EXPORT_SYMBOL_GPL(xdr_enter_page); 797EXPORT_SYMBOL_GPL(xdr_enter_page);
643 798
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 970fb00f388c..ce5eb68a9664 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -199,15 +199,12 @@ int xprt_reserve_xprt(struct rpc_task *task)
199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
200 if (task == xprt->snd_task) 200 if (task == xprt->snd_task)
201 return 1; 201 return 1;
202 if (task == NULL)
203 return 0;
204 goto out_sleep; 202 goto out_sleep;
205 } 203 }
206 xprt->snd_task = task; 204 xprt->snd_task = task;
207 if (req) { 205 req->rq_bytes_sent = 0;
208 req->rq_bytes_sent = 0; 206 req->rq_ntrans++;
209 req->rq_ntrans++; 207
210 }
211 return 1; 208 return 1;
212 209
213out_sleep: 210out_sleep:
@@ -215,7 +212,7 @@ out_sleep:
215 task->tk_pid, xprt); 212 task->tk_pid, xprt);
216 task->tk_timeout = 0; 213 task->tk_timeout = 0;
217 task->tk_status = -EAGAIN; 214 task->tk_status = -EAGAIN;
218 if (req && req->rq_ntrans) 215 if (req->rq_ntrans)
219 rpc_sleep_on(&xprt->resend, task, NULL); 216 rpc_sleep_on(&xprt->resend, task, NULL);
220 else 217 else
221 rpc_sleep_on(&xprt->sending, task, NULL); 218 rpc_sleep_on(&xprt->sending, task, NULL);
@@ -757,13 +754,11 @@ static void xprt_connect_status(struct rpc_task *task)
757 */ 754 */
758struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) 755struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
759{ 756{
760 struct list_head *pos; 757 struct rpc_rqst *entry;
761 758
762 list_for_each(pos, &xprt->recv) { 759 list_for_each_entry(entry, &xprt->recv, rq_list)
763 struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
764 if (entry->rq_xid == xid) 760 if (entry->rq_xid == xid)
765 return entry; 761 return entry;
766 }
767 762
768 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", 763 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n",
769 ntohl(xid)); 764 ntohl(xid));
@@ -911,6 +906,7 @@ void xprt_transmit(struct rpc_task *task)
911 } 906 }
912 907
913 dprintk("RPC: %5u xmit complete\n", task->tk_pid); 908 dprintk("RPC: %5u xmit complete\n", task->tk_pid);
909 task->tk_flags |= RPC_TASK_SENT;
914 spin_lock_bh(&xprt->transport_lock); 910 spin_lock_bh(&xprt->transport_lock);
915 911
916 xprt->ops->set_retrans_timeout(task); 912 xprt->ops->set_retrans_timeout(task);
@@ -962,6 +958,38 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
962 spin_unlock(&xprt->reserve_lock); 958 spin_unlock(&xprt->reserve_lock);
963} 959}
964 960
961struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
962{
963 struct rpc_xprt *xprt;
964
965 xprt = kzalloc(size, GFP_KERNEL);
966 if (xprt == NULL)
967 goto out;
968 atomic_set(&xprt->count, 1);
969
970 xprt->max_reqs = max_req;
971 xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
972 if (xprt->slot == NULL)
973 goto out_free;
974
975 xprt->xprt_net = get_net(net);
976 return xprt;
977
978out_free:
979 kfree(xprt);
980out:
981 return NULL;
982}
983EXPORT_SYMBOL_GPL(xprt_alloc);
984
985void xprt_free(struct rpc_xprt *xprt)
986{
987 put_net(xprt->xprt_net);
988 kfree(xprt->slot);
989 kfree(xprt);
990}
991EXPORT_SYMBOL_GPL(xprt_free);
992
965/** 993/**
966 * xprt_reserve - allocate an RPC request slot 994 * xprt_reserve - allocate an RPC request slot
967 * @task: RPC task requesting a slot allocation 995 * @task: RPC task requesting a slot allocation
@@ -1074,8 +1102,10 @@ found:
1074 -PTR_ERR(xprt)); 1102 -PTR_ERR(xprt));
1075 return xprt; 1103 return xprt;
1076 } 1104 }
1105 if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state))
1106 /* ->setup returned a pre-initialized xprt: */
1107 return xprt;
1077 1108
1078 kref_init(&xprt->kref);
1079 spin_lock_init(&xprt->transport_lock); 1109 spin_lock_init(&xprt->transport_lock);
1080 spin_lock_init(&xprt->reserve_lock); 1110 spin_lock_init(&xprt->reserve_lock);
1081 1111
@@ -1115,13 +1145,11 @@ found:
1115 1145
1116/** 1146/**
1117 * xprt_destroy - destroy an RPC transport, killing off all requests. 1147 * xprt_destroy - destroy an RPC transport, killing off all requests.
1118 * @kref: kref for the transport to destroy 1148 * @xprt: transport to destroy
1119 * 1149 *
1120 */ 1150 */
1121static void xprt_destroy(struct kref *kref) 1151static void xprt_destroy(struct rpc_xprt *xprt)
1122{ 1152{
1123 struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref);
1124
1125 dprintk("RPC: destroying transport %p\n", xprt); 1153 dprintk("RPC: destroying transport %p\n", xprt);
1126 xprt->shutdown = 1; 1154 xprt->shutdown = 1;
1127 del_timer_sync(&xprt->timer); 1155 del_timer_sync(&xprt->timer);
@@ -1145,7 +1173,8 @@ static void xprt_destroy(struct kref *kref)
1145 */ 1173 */
1146void xprt_put(struct rpc_xprt *xprt) 1174void xprt_put(struct rpc_xprt *xprt)
1147{ 1175{
1148 kref_put(&xprt->kref, xprt_destroy); 1176 if (atomic_dec_and_test(&xprt->count))
1177 xprt_destroy(xprt);
1149} 1178}
1150 1179
1151/** 1180/**
@@ -1155,6 +1184,7 @@ void xprt_put(struct rpc_xprt *xprt)
1155 */ 1184 */
1156struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) 1185struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
1157{ 1186{
1158 kref_get(&xprt->kref); 1187 if (atomic_inc_not_zero(&xprt->count))
1159 return xprt; 1188 return xprt;
1189 return NULL;
1160} 1190}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 2ac3f6e8adff..554d0814c875 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -87,6 +87,8 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
87 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) 87 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
88{ 88{
89 int len, n = 0, p; 89 int len, n = 0, p;
90 int page_base;
91 struct page **ppages;
90 92
91 if (pos == 0 && xdrbuf->head[0].iov_len) { 93 if (pos == 0 && xdrbuf->head[0].iov_len) {
92 seg[n].mr_page = NULL; 94 seg[n].mr_page = NULL;
@@ -95,34 +97,32 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
95 ++n; 97 ++n;
96 } 98 }
97 99
98 if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) { 100 len = xdrbuf->page_len;
99 if (n == nsegs) 101 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
100 return 0; 102 page_base = xdrbuf->page_base & ~PAGE_MASK;
101 seg[n].mr_page = xdrbuf->pages[0]; 103 p = 0;
102 seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base; 104 while (len && n < nsegs) {
103 seg[n].mr_len = min_t(u32, 105 seg[n].mr_page = ppages[p];
104 PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); 106 seg[n].mr_offset = (void *)(unsigned long) page_base;
105 len = xdrbuf->page_len - seg[n].mr_len; 107 seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
108 BUG_ON(seg[n].mr_len > PAGE_SIZE);
109 len -= seg[n].mr_len;
106 ++n; 110 ++n;
107 p = 1; 111 ++p;
108 while (len > 0) { 112 page_base = 0; /* page offset only applies to first page */
109 if (n == nsegs)
110 return 0;
111 seg[n].mr_page = xdrbuf->pages[p];
112 seg[n].mr_offset = NULL;
113 seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
114 len -= seg[n].mr_len;
115 ++n;
116 ++p;
117 }
118 } 113 }
119 114
115 /* Message overflows the seg array */
116 if (len && n == nsegs)
117 return 0;
118
120 if (xdrbuf->tail[0].iov_len) { 119 if (xdrbuf->tail[0].iov_len) {
121 /* the rpcrdma protocol allows us to omit any trailing 120 /* the rpcrdma protocol allows us to omit any trailing
122 * xdr pad bytes, saving the server an RDMA operation. */ 121 * xdr pad bytes, saving the server an RDMA operation. */
123 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) 122 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
124 return n; 123 return n;
125 if (n == nsegs) 124 if (n == nsegs)
125 /* Tail remains, but we're out of segments */
126 return 0; 126 return 0;
127 seg[n].mr_page = NULL; 127 seg[n].mr_page = NULL;
128 seg[n].mr_offset = xdrbuf->tail[0].iov_base; 128 seg[n].mr_offset = xdrbuf->tail[0].iov_base;
@@ -296,6 +296,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
296 int copy_len; 296 int copy_len;
297 unsigned char *srcp, *destp; 297 unsigned char *srcp, *destp;
298 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 298 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
299 int page_base;
300 struct page **ppages;
299 301
300 destp = rqst->rq_svec[0].iov_base; 302 destp = rqst->rq_svec[0].iov_base;
301 curlen = rqst->rq_svec[0].iov_len; 303 curlen = rqst->rq_svec[0].iov_len;
@@ -324,28 +326,25 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
324 __func__, destp + copy_len, curlen); 326 __func__, destp + copy_len, curlen);
325 rqst->rq_svec[0].iov_len += curlen; 327 rqst->rq_svec[0].iov_len += curlen;
326 } 328 }
327
328 r_xprt->rx_stats.pullup_copy_count += copy_len; 329 r_xprt->rx_stats.pullup_copy_count += copy_len;
329 npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; 330
331 page_base = rqst->rq_snd_buf.page_base;
332 ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT);
333 page_base &= ~PAGE_MASK;
334 npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT;
330 for (i = 0; copy_len && i < npages; i++) { 335 for (i = 0; copy_len && i < npages; i++) {
331 if (i == 0) 336 curlen = PAGE_SIZE - page_base;
332 curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
333 else
334 curlen = PAGE_SIZE;
335 if (curlen > copy_len) 337 if (curlen > copy_len)
336 curlen = copy_len; 338 curlen = copy_len;
337 dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", 339 dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
338 __func__, i, destp, copy_len, curlen); 340 __func__, i, destp, copy_len, curlen);
339 srcp = kmap_atomic(rqst->rq_snd_buf.pages[i], 341 srcp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
340 KM_SKB_SUNRPC_DATA); 342 memcpy(destp, srcp+page_base, curlen);
341 if (i == 0)
342 memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
343 else
344 memcpy(destp, srcp, curlen);
345 kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA); 343 kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
346 rqst->rq_svec[0].iov_len += curlen; 344 rqst->rq_svec[0].iov_len += curlen;
347 destp += curlen; 345 destp += curlen;
348 copy_len -= curlen; 346 copy_len -= curlen;
347 page_base = 0;
349 } 348 }
350 /* header now contains entire send message */ 349 /* header now contains entire send message */
351 return pad; 350 return pad;
@@ -606,6 +605,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
606{ 605{
607 int i, npages, curlen, olen; 606 int i, npages, curlen, olen;
608 char *destp; 607 char *destp;
608 struct page **ppages;
609 int page_base;
609 610
610 curlen = rqst->rq_rcv_buf.head[0].iov_len; 611 curlen = rqst->rq_rcv_buf.head[0].iov_len;
611 if (curlen > copy_len) { /* write chunk header fixup */ 612 if (curlen > copy_len) { /* write chunk header fixup */
@@ -624,32 +625,29 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
624 olen = copy_len; 625 olen = copy_len;
625 i = 0; 626 i = 0;
626 rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; 627 rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
628 page_base = rqst->rq_rcv_buf.page_base;
629 ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
630 page_base &= ~PAGE_MASK;
631
627 if (copy_len && rqst->rq_rcv_buf.page_len) { 632 if (copy_len && rqst->rq_rcv_buf.page_len) {
628 npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base + 633 npages = PAGE_ALIGN(page_base +
629 rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; 634 rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
630 for (; i < npages; i++) { 635 for (; i < npages; i++) {
631 if (i == 0) 636 curlen = PAGE_SIZE - page_base;
632 curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
633 else
634 curlen = PAGE_SIZE;
635 if (curlen > copy_len) 637 if (curlen > copy_len)
636 curlen = copy_len; 638 curlen = copy_len;
637 dprintk("RPC: %s: page %d" 639 dprintk("RPC: %s: page %d"
638 " srcp 0x%p len %d curlen %d\n", 640 " srcp 0x%p len %d curlen %d\n",
639 __func__, i, srcp, copy_len, curlen); 641 __func__, i, srcp, copy_len, curlen);
640 destp = kmap_atomic(rqst->rq_rcv_buf.pages[i], 642 destp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
641 KM_SKB_SUNRPC_DATA); 643 memcpy(destp + page_base, srcp, curlen);
642 if (i == 0) 644 flush_dcache_page(ppages[i]);
643 memcpy(destp + rqst->rq_rcv_buf.page_base,
644 srcp, curlen);
645 else
646 memcpy(destp, srcp, curlen);
647 flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
648 kunmap_atomic(destp, KM_SKB_SUNRPC_DATA); 645 kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
649 srcp += curlen; 646 srcp += curlen;
650 copy_len -= curlen; 647 copy_len -= curlen;
651 if (copy_len == 0) 648 if (copy_len == 0)
652 break; 649 break;
650 page_base = 0;
653 } 651 }
654 rqst->rq_rcv_buf.page_len = olen - copy_len; 652 rqst->rq_rcv_buf.page_len = olen - copy_len;
655 } else 653 } else
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index d718b8fa9525..09af4fab1a45 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/fs.h> 44#include <linux/fs.h>
45#include <linux/sysctl.h> 45#include <linux/sysctl.h>
46#include <linux/workqueue.h>
46#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
47#include <linux/sunrpc/sched.h> 48#include <linux/sunrpc/sched.h>
48#include <linux/sunrpc/svc_rdma.h> 49#include <linux/sunrpc/svc_rdma.h>
@@ -74,6 +75,8 @@ atomic_t rdma_stat_sq_prod;
74struct kmem_cache *svc_rdma_map_cachep; 75struct kmem_cache *svc_rdma_map_cachep;
75struct kmem_cache *svc_rdma_ctxt_cachep; 76struct kmem_cache *svc_rdma_ctxt_cachep;
76 77
78struct workqueue_struct *svc_rdma_wq;
79
77/* 80/*
78 * This function implements reading and resetting an atomic_t stat 81 * This function implements reading and resetting an atomic_t stat
79 * variable through read/write to a proc file. Any write to the file 82 * variable through read/write to a proc file. Any write to the file
@@ -231,7 +234,7 @@ static ctl_table svcrdma_root_table[] = {
231void svc_rdma_cleanup(void) 234void svc_rdma_cleanup(void)
232{ 235{
233 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); 236 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
234 flush_scheduled_work(); 237 destroy_workqueue(svc_rdma_wq);
235 if (svcrdma_table_header) { 238 if (svcrdma_table_header) {
236 unregister_sysctl_table(svcrdma_table_header); 239 unregister_sysctl_table(svcrdma_table_header);
237 svcrdma_table_header = NULL; 240 svcrdma_table_header = NULL;
@@ -249,6 +252,11 @@ int svc_rdma_init(void)
249 dprintk("\tsq_depth : %d\n", 252 dprintk("\tsq_depth : %d\n",
250 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); 253 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
251 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); 254 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
255
256 svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
257 if (!svc_rdma_wq)
258 return -ENOMEM;
259
252 if (!svcrdma_table_header) 260 if (!svcrdma_table_header)
253 svcrdma_table_header = 261 svcrdma_table_header =
254 register_sysctl_table(svcrdma_root_table); 262 register_sysctl_table(svcrdma_root_table);
@@ -283,6 +291,7 @@ int svc_rdma_init(void)
283 kmem_cache_destroy(svc_rdma_map_cachep); 291 kmem_cache_destroy(svc_rdma_map_cachep);
284 err0: 292 err0:
285 unregister_sysctl_table(svcrdma_table_header); 293 unregister_sysctl_table(svcrdma_table_header);
294 destroy_workqueue(svc_rdma_wq);
286 return -ENOMEM; 295 return -ENOMEM;
287} 296}
288MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 297MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0194de814933..df67211c4baf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -263,9 +263,9 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; 263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) { 264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
265 frmr->page_list->page_list[page_no] = 265 frmr->page_list->page_list[page_no] =
266 ib_dma_map_single(xprt->sc_cm_id->device, 266 ib_dma_map_page(xprt->sc_cm_id->device,
267 page_address(rqstp->rq_arg.pages[page_no]), 267 rqstp->rq_arg.pages[page_no], 0,
268 PAGE_SIZE, DMA_FROM_DEVICE); 268 PAGE_SIZE, DMA_FROM_DEVICE);
269 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 269 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
270 frmr->page_list->page_list[page_no])) 270 frmr->page_list->page_list[page_no]))
271 goto fatal_err; 271 goto fatal_err;
@@ -309,17 +309,21 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
309 int count) 309 int count)
310{ 310{
311 int i; 311 int i;
312 unsigned long off;
312 313
313 ctxt->count = count; 314 ctxt->count = count;
314 ctxt->direction = DMA_FROM_DEVICE; 315 ctxt->direction = DMA_FROM_DEVICE;
315 for (i = 0; i < count; i++) { 316 for (i = 0; i < count; i++) {
316 ctxt->sge[i].length = 0; /* in case map fails */ 317 ctxt->sge[i].length = 0; /* in case map fails */
317 if (!frmr) { 318 if (!frmr) {
319 BUG_ON(0 == virt_to_page(vec[i].iov_base));
320 off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
318 ctxt->sge[i].addr = 321 ctxt->sge[i].addr =
319 ib_dma_map_single(xprt->sc_cm_id->device, 322 ib_dma_map_page(xprt->sc_cm_id->device,
320 vec[i].iov_base, 323 virt_to_page(vec[i].iov_base),
321 vec[i].iov_len, 324 off,
322 DMA_FROM_DEVICE); 325 vec[i].iov_len,
326 DMA_FROM_DEVICE);
323 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 327 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
324 ctxt->sge[i].addr)) 328 ctxt->sge[i].addr))
325 return -EINVAL; 329 return -EINVAL;
@@ -491,6 +495,7 @@ next_sge:
491 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", 495 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
492 err); 496 err);
493 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 497 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
498 svc_rdma_unmap_dma(ctxt);
494 svc_rdma_put_context(ctxt, 0); 499 svc_rdma_put_context(ctxt, 0);
495 goto out; 500 goto out;
496 } 501 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b15e1ebb2bfa..249a835b703f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -70,8 +70,8 @@
70 * on extra page for the RPCRMDA header. 70 * on extra page for the RPCRMDA header.
71 */ 71 */
72static int fast_reg_xdr(struct svcxprt_rdma *xprt, 72static int fast_reg_xdr(struct svcxprt_rdma *xprt,
73 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
74 struct svc_rdma_req_map *vec) 74 struct svc_rdma_req_map *vec)
75{ 75{
76 int sge_no; 76 int sge_no;
77 u32 sge_bytes; 77 u32 sge_bytes;
@@ -96,21 +96,25 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
96 vec->count = 2; 96 vec->count = 2;
97 sge_no++; 97 sge_no++;
98 98
99 /* Build the FRMR */ 99 /* Map the XDR head */
100 frmr->kva = frva; 100 frmr->kva = frva;
101 frmr->direction = DMA_TO_DEVICE; 101 frmr->direction = DMA_TO_DEVICE;
102 frmr->access_flags = 0; 102 frmr->access_flags = 0;
103 frmr->map_len = PAGE_SIZE; 103 frmr->map_len = PAGE_SIZE;
104 frmr->page_list_len = 1; 104 frmr->page_list_len = 1;
105 page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
105 frmr->page_list->page_list[page_no] = 106 frmr->page_list->page_list[page_no] =
106 ib_dma_map_single(xprt->sc_cm_id->device, 107 ib_dma_map_page(xprt->sc_cm_id->device,
107 (void *)xdr->head[0].iov_base, 108 virt_to_page(xdr->head[0].iov_base),
108 PAGE_SIZE, DMA_TO_DEVICE); 109 page_off,
110 PAGE_SIZE - page_off,
111 DMA_TO_DEVICE);
109 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 112 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
110 frmr->page_list->page_list[page_no])) 113 frmr->page_list->page_list[page_no]))
111 goto fatal_err; 114 goto fatal_err;
112 atomic_inc(&xprt->sc_dma_used); 115 atomic_inc(&xprt->sc_dma_used);
113 116
117 /* Map the XDR page list */
114 page_off = xdr->page_base; 118 page_off = xdr->page_base;
115 page_bytes = xdr->page_len + page_off; 119 page_bytes = xdr->page_len + page_off;
116 if (!page_bytes) 120 if (!page_bytes)
@@ -128,9 +132,9 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
128 page_bytes -= sge_bytes; 132 page_bytes -= sge_bytes;
129 133
130 frmr->page_list->page_list[page_no] = 134 frmr->page_list->page_list[page_no] =
131 ib_dma_map_single(xprt->sc_cm_id->device, 135 ib_dma_map_page(xprt->sc_cm_id->device,
132 page_address(page), 136 page, page_off,
133 PAGE_SIZE, DMA_TO_DEVICE); 137 sge_bytes, DMA_TO_DEVICE);
134 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 138 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
135 frmr->page_list->page_list[page_no])) 139 frmr->page_list->page_list[page_no]))
136 goto fatal_err; 140 goto fatal_err;
@@ -166,8 +170,10 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
166 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; 170 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
167 171
168 frmr->page_list->page_list[page_no] = 172 frmr->page_list->page_list[page_no] =
169 ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, 173 ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
170 DMA_TO_DEVICE); 174 page_off,
175 PAGE_SIZE,
176 DMA_TO_DEVICE);
171 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 177 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
172 frmr->page_list->page_list[page_no])) 178 frmr->page_list->page_list[page_no]))
173 goto fatal_err; 179 goto fatal_err;
@@ -245,6 +251,35 @@ static int map_xdr(struct svcxprt_rdma *xprt,
245 return 0; 251 return 0;
246} 252}
247 253
254static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
255 struct xdr_buf *xdr,
256 u32 xdr_off, size_t len, int dir)
257{
258 struct page *page;
259 dma_addr_t dma_addr;
260 if (xdr_off < xdr->head[0].iov_len) {
261 /* This offset is in the head */
262 xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
263 page = virt_to_page(xdr->head[0].iov_base);
264 } else {
265 xdr_off -= xdr->head[0].iov_len;
266 if (xdr_off < xdr->page_len) {
267 /* This offset is in the page list */
268 page = xdr->pages[xdr_off >> PAGE_SHIFT];
269 xdr_off &= ~PAGE_MASK;
270 } else {
271 /* This offset is in the tail */
272 xdr_off -= xdr->page_len;
273 xdr_off += (unsigned long)
274 xdr->tail[0].iov_base & ~PAGE_MASK;
275 page = virt_to_page(xdr->tail[0].iov_base);
276 }
277 }
278 dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
279 min_t(size_t, PAGE_SIZE, len), dir);
280 return dma_addr;
281}
282
248/* Assumptions: 283/* Assumptions:
249 * - We are using FRMR 284 * - We are using FRMR
250 * - or - 285 * - or -
@@ -293,10 +328,9 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
293 sge[sge_no].length = sge_bytes; 328 sge[sge_no].length = sge_bytes;
294 if (!vec->frmr) { 329 if (!vec->frmr) {
295 sge[sge_no].addr = 330 sge[sge_no].addr =
296 ib_dma_map_single(xprt->sc_cm_id->device, 331 dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
297 (void *) 332 sge_bytes, DMA_TO_DEVICE);
298 vec->sge[xdr_sge_no].iov_base + sge_off, 333 xdr_off += sge_bytes;
299 sge_bytes, DMA_TO_DEVICE);
300 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 334 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
301 sge[sge_no].addr)) 335 sge[sge_no].addr))
302 goto err; 336 goto err;
@@ -333,6 +367,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
333 goto err; 367 goto err;
334 return 0; 368 return 0;
335 err: 369 err:
370 svc_rdma_unmap_dma(ctxt);
371 svc_rdma_put_frmr(xprt, vec->frmr);
336 svc_rdma_put_context(ctxt, 0); 372 svc_rdma_put_context(ctxt, 0);
337 /* Fatal error, close transport */ 373 /* Fatal error, close transport */
338 return -EIO; 374 return -EIO;
@@ -494,7 +530,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
494 * In all three cases, this function prepares the RPCRDMA header in 530 * In all three cases, this function prepares the RPCRDMA header in
495 * sge[0], the 'type' parameter indicates the type to place in the 531 * sge[0], the 'type' parameter indicates the type to place in the
496 * RPCRDMA header, and the 'byte_count' field indicates how much of 532 * RPCRDMA header, and the 'byte_count' field indicates how much of
497 * the XDR to include in this RDMA_SEND. 533 * the XDR to include in this RDMA_SEND. NB: The offset of the payload
534 * to send is zero in the XDR.
498 */ 535 */
499static int send_reply(struct svcxprt_rdma *rdma, 536static int send_reply(struct svcxprt_rdma *rdma,
500 struct svc_rqst *rqstp, 537 struct svc_rqst *rqstp,
@@ -536,23 +573,24 @@ static int send_reply(struct svcxprt_rdma *rdma,
536 ctxt->sge[0].lkey = rdma->sc_dma_lkey; 573 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
537 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 574 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
538 ctxt->sge[0].addr = 575 ctxt->sge[0].addr =
539 ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), 576 ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
540 ctxt->sge[0].length, DMA_TO_DEVICE); 577 ctxt->sge[0].length, DMA_TO_DEVICE);
541 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) 578 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
542 goto err; 579 goto err;
543 atomic_inc(&rdma->sc_dma_used); 580 atomic_inc(&rdma->sc_dma_used);
544 581
545 ctxt->direction = DMA_TO_DEVICE; 582 ctxt->direction = DMA_TO_DEVICE;
546 583
547 /* Determine how many of our SGE are to be transmitted */ 584 /* Map the payload indicated by 'byte_count' */
548 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 585 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
586 int xdr_off = 0;
549 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 587 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
550 byte_count -= sge_bytes; 588 byte_count -= sge_bytes;
551 if (!vec->frmr) { 589 if (!vec->frmr) {
552 ctxt->sge[sge_no].addr = 590 ctxt->sge[sge_no].addr =
553 ib_dma_map_single(rdma->sc_cm_id->device, 591 dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
554 vec->sge[sge_no].iov_base, 592 sge_bytes, DMA_TO_DEVICE);
555 sge_bytes, DMA_TO_DEVICE); 593 xdr_off += sge_bytes;
556 if (ib_dma_mapping_error(rdma->sc_cm_id->device, 594 if (ib_dma_mapping_error(rdma->sc_cm_id->device,
557 ctxt->sge[sge_no].addr)) 595 ctxt->sge[sge_no].addr))
558 goto err; 596 goto err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index edea15a54e51..c3c232a88d94 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -45,6 +45,7 @@
45#include <linux/sched.h> 45#include <linux/sched.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/spinlock.h> 47#include <linux/spinlock.h>
48#include <linux/workqueue.h>
48#include <rdma/ib_verbs.h> 49#include <rdma/ib_verbs.h>
49#include <rdma/rdma_cm.h> 50#include <rdma/rdma_cm.h>
50#include <linux/sunrpc/svc_rdma.h> 51#include <linux/sunrpc/svc_rdma.h>
@@ -52,6 +53,7 @@
52#define RPCDBG_FACILITY RPCDBG_SVCXPRT 53#define RPCDBG_FACILITY RPCDBG_SVCXPRT
53 54
54static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 55static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
56 struct net *net,
55 struct sockaddr *sa, int salen, 57 struct sockaddr *sa, int salen,
56 int flags); 58 int flags);
57static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); 59static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
@@ -89,6 +91,9 @@ struct svc_xprt_class svc_rdma_class = {
89/* WR context cache. Created in svc_rdma.c */ 91/* WR context cache. Created in svc_rdma.c */
90extern struct kmem_cache *svc_rdma_ctxt_cachep; 92extern struct kmem_cache *svc_rdma_ctxt_cachep;
91 93
94/* Workqueue created in svc_rdma.c */
95extern struct workqueue_struct *svc_rdma_wq;
96
92struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 97struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
93{ 98{
94 struct svc_rdma_op_ctxt *ctxt; 99 struct svc_rdma_op_ctxt *ctxt;
@@ -120,7 +125,7 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
120 */ 125 */
121 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { 126 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
122 atomic_dec(&xprt->sc_dma_used); 127 atomic_dec(&xprt->sc_dma_used);
123 ib_dma_unmap_single(xprt->sc_cm_id->device, 128 ib_dma_unmap_page(xprt->sc_cm_id->device,
124 ctxt->sge[i].addr, 129 ctxt->sge[i].addr,
125 ctxt->sge[i].length, 130 ctxt->sge[i].length,
126 ctxt->direction); 131 ctxt->direction);
@@ -328,7 +333,7 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
328} 333}
329 334
330/* 335/*
331 * Processs a completion context 336 * Process a completion context
332 */ 337 */
333static void process_context(struct svcxprt_rdma *xprt, 338static void process_context(struct svcxprt_rdma *xprt,
334 struct svc_rdma_op_ctxt *ctxt) 339 struct svc_rdma_op_ctxt *ctxt)
@@ -502,8 +507,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
502 BUG_ON(sge_no >= xprt->sc_max_sge); 507 BUG_ON(sge_no >= xprt->sc_max_sge);
503 page = svc_rdma_get_page(); 508 page = svc_rdma_get_page();
504 ctxt->pages[sge_no] = page; 509 ctxt->pages[sge_no] = page;
505 pa = ib_dma_map_single(xprt->sc_cm_id->device, 510 pa = ib_dma_map_page(xprt->sc_cm_id->device,
506 page_address(page), PAGE_SIZE, 511 page, 0, PAGE_SIZE,
507 DMA_FROM_DEVICE); 512 DMA_FROM_DEVICE);
508 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) 513 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
509 goto err_put_ctxt; 514 goto err_put_ctxt;
@@ -511,9 +516,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
511 ctxt->sge[sge_no].addr = pa; 516 ctxt->sge[sge_no].addr = pa;
512 ctxt->sge[sge_no].length = PAGE_SIZE; 517 ctxt->sge[sge_no].length = PAGE_SIZE;
513 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; 518 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
519 ctxt->count = sge_no + 1;
514 buflen += PAGE_SIZE; 520 buflen += PAGE_SIZE;
515 } 521 }
516 ctxt->count = sge_no;
517 recv_wr.next = NULL; 522 recv_wr.next = NULL;
518 recv_wr.sg_list = &ctxt->sge[0]; 523 recv_wr.sg_list = &ctxt->sge[0];
519 recv_wr.num_sge = ctxt->count; 524 recv_wr.num_sge = ctxt->count;
@@ -529,6 +534,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
529 return ret; 534 return ret;
530 535
531 err_put_ctxt: 536 err_put_ctxt:
537 svc_rdma_unmap_dma(ctxt);
532 svc_rdma_put_context(ctxt, 1); 538 svc_rdma_put_context(ctxt, 1);
533 return -ENOMEM; 539 return -ENOMEM;
534} 540}
@@ -670,6 +676,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
670 * Create a listening RDMA service endpoint. 676 * Create a listening RDMA service endpoint.
671 */ 677 */
672static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 678static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
679 struct net *net,
673 struct sockaddr *sa, int salen, 680 struct sockaddr *sa, int salen,
674 int flags) 681 int flags)
675{ 682{
@@ -688,7 +695,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
688 return ERR_PTR(-ENOMEM); 695 return ERR_PTR(-ENOMEM);
689 xprt = &cma_xprt->sc_xprt; 696 xprt = &cma_xprt->sc_xprt;
690 697
691 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); 698 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
699 IB_QPT_RC);
692 if (IS_ERR(listen_id)) { 700 if (IS_ERR(listen_id)) {
693 ret = PTR_ERR(listen_id); 701 ret = PTR_ERR(listen_id);
694 dprintk("svcrdma: rdma_create_id failed = %d\n", ret); 702 dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
@@ -798,8 +806,8 @@ static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
798 if (ib_dma_mapping_error(frmr->mr->device, addr)) 806 if (ib_dma_mapping_error(frmr->mr->device, addr))
799 continue; 807 continue;
800 atomic_dec(&xprt->sc_dma_used); 808 atomic_dec(&xprt->sc_dma_used);
801 ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, 809 ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
802 frmr->direction); 810 frmr->direction);
803 } 811 }
804} 812}
805 813
@@ -1184,7 +1192,7 @@ static void svc_rdma_free(struct svc_xprt *xprt)
1184 struct svcxprt_rdma *rdma = 1192 struct svcxprt_rdma *rdma =
1185 container_of(xprt, struct svcxprt_rdma, sc_xprt); 1193 container_of(xprt, struct svcxprt_rdma, sc_xprt);
1186 INIT_WORK(&rdma->sc_work, __svc_rdma_free); 1194 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
1187 schedule_work(&rdma->sc_work); 1195 queue_work(svc_rdma_wq, &rdma->sc_work);
1188} 1196}
1189 1197
1190static int svc_rdma_has_wspace(struct svc_xprt *xprt) 1198static int svc_rdma_has_wspace(struct svc_xprt *xprt)
@@ -1274,7 +1282,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1274 atomic_read(&xprt->sc_sq_count) < 1282 atomic_read(&xprt->sc_sq_count) <
1275 xprt->sc_sq_depth); 1283 xprt->sc_sq_depth);
1276 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1284 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1277 return 0; 1285 return -ENOTCONN;
1278 continue; 1286 continue;
1279 } 1287 }
1280 /* Take a transport ref for each WR posted */ 1288 /* Take a transport ref for each WR posted */
@@ -1306,7 +1314,6 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1306 enum rpcrdma_errcode err) 1314 enum rpcrdma_errcode err)
1307{ 1315{
1308 struct ib_send_wr err_wr; 1316 struct ib_send_wr err_wr;
1309 struct ib_sge sge;
1310 struct page *p; 1317 struct page *p;
1311 struct svc_rdma_op_ctxt *ctxt; 1318 struct svc_rdma_op_ctxt *ctxt;
1312 u32 *va; 1319 u32 *va;
@@ -1319,26 +1326,28 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1319 /* XDR encode error */ 1326 /* XDR encode error */
1320 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1327 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1321 1328
1329 ctxt = svc_rdma_get_context(xprt);
1330 ctxt->direction = DMA_FROM_DEVICE;
1331 ctxt->count = 1;
1332 ctxt->pages[0] = p;
1333
1322 /* Prepare SGE for local address */ 1334 /* Prepare SGE for local address */
1323 sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, 1335 ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
1324 page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); 1336 p, 0, length, DMA_FROM_DEVICE);
1325 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { 1337 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1326 put_page(p); 1338 put_page(p);
1339 svc_rdma_put_context(ctxt, 1);
1327 return; 1340 return;
1328 } 1341 }
1329 atomic_inc(&xprt->sc_dma_used); 1342 atomic_inc(&xprt->sc_dma_used);
1330 sge.lkey = xprt->sc_dma_lkey; 1343 ctxt->sge[0].lkey = xprt->sc_dma_lkey;
1331 sge.length = length; 1344 ctxt->sge[0].length = length;
1332
1333 ctxt = svc_rdma_get_context(xprt);
1334 ctxt->count = 1;
1335 ctxt->pages[0] = p;
1336 1345
1337 /* Prepare SEND WR */ 1346 /* Prepare SEND WR */
1338 memset(&err_wr, 0, sizeof err_wr); 1347 memset(&err_wr, 0, sizeof err_wr);
1339 ctxt->wr_op = IB_WR_SEND; 1348 ctxt->wr_op = IB_WR_SEND;
1340 err_wr.wr_id = (unsigned long)ctxt; 1349 err_wr.wr_id = (unsigned long)ctxt;
1341 err_wr.sg_list = &sge; 1350 err_wr.sg_list = ctxt->sge;
1342 err_wr.num_sge = 1; 1351 err_wr.num_sge = 1;
1343 err_wr.opcode = IB_WR_SEND; 1352 err_wr.opcode = IB_WR_SEND;
1344 err_wr.send_flags = IB_SEND_SIGNALED; 1353 err_wr.send_flags = IB_SEND_SIGNALED;
@@ -1348,9 +1357,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1348 if (ret) { 1357 if (ret) {
1349 dprintk("svcrdma: Error %d posting send for protocol error\n", 1358 dprintk("svcrdma: Error %d posting send for protocol error\n",
1350 ret); 1359 ret);
1351 ib_dma_unmap_single(xprt->sc_cm_id->device, 1360 svc_rdma_unmap_dma(ctxt);
1352 sge.addr, PAGE_SIZE,
1353 DMA_FROM_DEVICE);
1354 svc_rdma_put_context(ctxt, 1); 1361 svc_rdma_put_context(ctxt, 1);
1355 } 1362 }
1356} 1363}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a85e866a77f7..0867070bb5ca 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -237,8 +237,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
237 237
238 dprintk("RPC: %s: called\n", __func__); 238 dprintk("RPC: %s: called\n", __func__);
239 239
240 cancel_delayed_work(&r_xprt->rdma_connect); 240 cancel_delayed_work_sync(&r_xprt->rdma_connect);
241 flush_scheduled_work();
242 241
243 xprt_clear_connected(xprt); 242 xprt_clear_connected(xprt);
244 243
@@ -251,9 +250,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
251 250
252 xprt_rdma_free_addresses(xprt); 251 xprt_rdma_free_addresses(xprt);
253 252
254 kfree(xprt->slot); 253 xprt_free(xprt);
255 xprt->slot = NULL;
256 kfree(xprt);
257 254
258 dprintk("RPC: %s: returning\n", __func__); 255 dprintk("RPC: %s: returning\n", __func__);
259 256
@@ -285,23 +282,14 @@ xprt_setup_rdma(struct xprt_create *args)
285 return ERR_PTR(-EBADF); 282 return ERR_PTR(-EBADF);
286 } 283 }
287 284
288 xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); 285 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
286 xprt_rdma_slot_table_entries);
289 if (xprt == NULL) { 287 if (xprt == NULL) {
290 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", 288 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
291 __func__); 289 __func__);
292 return ERR_PTR(-ENOMEM); 290 return ERR_PTR(-ENOMEM);
293 } 291 }
294 292
295 xprt->max_reqs = xprt_rdma_slot_table_entries;
296 xprt->slot = kcalloc(xprt->max_reqs,
297 sizeof(struct rpc_rqst), GFP_KERNEL);
298 if (xprt->slot == NULL) {
299 dprintk("RPC: %s: couldn't allocate %d slots\n",
300 __func__, xprt->max_reqs);
301 kfree(xprt);
302 return ERR_PTR(-ENOMEM);
303 }
304
305 /* 60 second timeout, no retries */ 293 /* 60 second timeout, no retries */
306 xprt->timeout = &xprt_rdma_default_timeout; 294 xprt->timeout = &xprt_rdma_default_timeout;
307 xprt->bind_timeout = (60U * HZ); 295 xprt->bind_timeout = (60U * HZ);
@@ -410,8 +398,7 @@ out3:
410out2: 398out2:
411 rpcrdma_ia_close(&new_xprt->rx_ia); 399 rpcrdma_ia_close(&new_xprt->rx_ia);
412out1: 400out1:
413 kfree(xprt->slot); 401 xprt_free(xprt);
414 kfree(xprt);
415 return ERR_PTR(rc); 402 return ERR_PTR(rc);
416} 403}
417 404
@@ -460,7 +447,7 @@ xprt_rdma_connect(struct rpc_task *task)
460 } else { 447 } else {
461 schedule_delayed_work(&r_xprt->rdma_connect, 0); 448 schedule_delayed_work(&r_xprt->rdma_connect, 0);
462 if (!RPC_IS_ASYNC(task)) 449 if (!RPC_IS_ASYNC(task))
463 flush_scheduled_work(); 450 flush_delayed_work(&r_xprt->rdma_connect);
464 } 451 }
465} 452}
466 453
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5f4c7b3bc711..80f8da344df5 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -144,6 +144,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
144static inline 144static inline
145void rpcrdma_event_process(struct ib_wc *wc) 145void rpcrdma_event_process(struct ib_wc *wc)
146{ 146{
147 struct rpcrdma_mw *frmr;
147 struct rpcrdma_rep *rep = 148 struct rpcrdma_rep *rep =
148 (struct rpcrdma_rep *)(unsigned long) wc->wr_id; 149 (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
149 150
@@ -154,15 +155,23 @@ void rpcrdma_event_process(struct ib_wc *wc)
154 return; 155 return;
155 156
156 if (IB_WC_SUCCESS != wc->status) { 157 if (IB_WC_SUCCESS != wc->status) {
157 dprintk("RPC: %s: %s WC status %X, connection lost\n", 158 dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
158 __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", 159 __func__, wc->opcode, wc->status);
159 wc->status);
160 rep->rr_len = ~0U; 160 rep->rr_len = ~0U;
161 rpcrdma_schedule_tasklet(rep); 161 if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
162 rpcrdma_schedule_tasklet(rep);
162 return; 163 return;
163 } 164 }
164 165
165 switch (wc->opcode) { 166 switch (wc->opcode) {
167 case IB_WC_FAST_REG_MR:
168 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
169 frmr->r.frmr.state = FRMR_IS_VALID;
170 break;
171 case IB_WC_LOCAL_INV:
172 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
173 frmr->r.frmr.state = FRMR_IS_INVALID;
174 break;
166 case IB_WC_RECV: 175 case IB_WC_RECV:
167 rep->rr_len = wc->byte_len; 176 rep->rr_len = wc->byte_len;
168 ib_dma_sync_single_for_cpu( 177 ib_dma_sync_single_for_cpu(
@@ -378,7 +387,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
378 387
379 init_completion(&ia->ri_done); 388 init_completion(&ia->ri_done);
380 389
381 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); 390 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
382 if (IS_ERR(id)) { 391 if (IS_ERR(id)) {
383 rc = PTR_ERR(id); 392 rc = PTR_ERR(id);
384 dprintk("RPC: %s: rdma_create_id() failed %i\n", 393 dprintk("RPC: %s: rdma_create_id() failed %i\n",
@@ -1450,6 +1459,12 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1450 seg->mr_dma = ib_dma_map_single(ia->ri_id->device, 1459 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1451 seg->mr_offset, 1460 seg->mr_offset,
1452 seg->mr_dmalen, seg->mr_dir); 1461 seg->mr_dmalen, seg->mr_dir);
1462 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1463 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1464 __func__,
1465 (unsigned long long)seg->mr_dma,
1466 seg->mr_offset, seg->mr_dmalen);
1467 }
1453} 1468}
1454 1469
1455static void 1470static void
@@ -1469,7 +1484,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1469 struct rpcrdma_xprt *r_xprt) 1484 struct rpcrdma_xprt *r_xprt)
1470{ 1485{
1471 struct rpcrdma_mr_seg *seg1 = seg; 1486 struct rpcrdma_mr_seg *seg1 = seg;
1472 struct ib_send_wr frmr_wr, *bad_wr; 1487 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1488
1473 u8 key; 1489 u8 key;
1474 int len, pageoff; 1490 int len, pageoff;
1475 int i, rc; 1491 int i, rc;
@@ -1484,6 +1500,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1484 rpcrdma_map_one(ia, seg, writing); 1500 rpcrdma_map_one(ia, seg, writing);
1485 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; 1501 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
1486 len += seg->mr_len; 1502 len += seg->mr_len;
1503 BUG_ON(seg->mr_len > PAGE_SIZE);
1487 ++seg; 1504 ++seg;
1488 ++i; 1505 ++i;
1489 /* Check for holes */ 1506 /* Check for holes */
@@ -1494,26 +1511,45 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1494 dprintk("RPC: %s: Using frmr %p to map %d segments\n", 1511 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1495 __func__, seg1->mr_chunk.rl_mw, i); 1512 __func__, seg1->mr_chunk.rl_mw, i);
1496 1513
1514 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1515 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1516 __func__,
1517 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1518 /* Invalidate before using. */
1519 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1520 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1521 invalidate_wr.next = &frmr_wr;
1522 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1523 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1524 invalidate_wr.ex.invalidate_rkey =
1525 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1526 DECR_CQCOUNT(&r_xprt->rx_ep);
1527 post_wr = &invalidate_wr;
1528 } else
1529 post_wr = &frmr_wr;
1530
1497 /* Bump the key */ 1531 /* Bump the key */
1498 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); 1532 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1499 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); 1533 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1500 1534
1501 /* Prepare FRMR WR */ 1535 /* Prepare FRMR WR */
1502 memset(&frmr_wr, 0, sizeof frmr_wr); 1536 memset(&frmr_wr, 0, sizeof frmr_wr);
1537 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1503 frmr_wr.opcode = IB_WR_FAST_REG_MR; 1538 frmr_wr.opcode = IB_WR_FAST_REG_MR;
1504 frmr_wr.send_flags = 0; /* unsignaled */ 1539 frmr_wr.send_flags = IB_SEND_SIGNALED;
1505 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; 1540 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1506 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; 1541 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
1507 frmr_wr.wr.fast_reg.page_list_len = i; 1542 frmr_wr.wr.fast_reg.page_list_len = i;
1508 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 1543 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1509 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; 1544 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1545 BUG_ON(frmr_wr.wr.fast_reg.length < len);
1510 frmr_wr.wr.fast_reg.access_flags = (writing ? 1546 frmr_wr.wr.fast_reg.access_flags = (writing ?
1511 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 1547 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1512 IB_ACCESS_REMOTE_READ); 1548 IB_ACCESS_REMOTE_READ);
1513 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1549 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1514 DECR_CQCOUNT(&r_xprt->rx_ep); 1550 DECR_CQCOUNT(&r_xprt->rx_ep);
1515 1551
1516 rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); 1552 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
1517 1553
1518 if (rc) { 1554 if (rc) {
1519 dprintk("RPC: %s: failed ib_post_send for register," 1555 dprintk("RPC: %s: failed ib_post_send for register,"
@@ -1542,8 +1578,9 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1542 rpcrdma_unmap_one(ia, seg++); 1578 rpcrdma_unmap_one(ia, seg++);
1543 1579
1544 memset(&invalidate_wr, 0, sizeof invalidate_wr); 1580 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1581 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1545 invalidate_wr.opcode = IB_WR_LOCAL_INV; 1582 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1546 invalidate_wr.send_flags = 0; /* unsignaled */ 1583 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1547 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1584 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1548 DECR_CQCOUNT(&r_xprt->rx_ep); 1585 DECR_CQCOUNT(&r_xprt->rx_ep);
1549 1586
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c7a7eba991bc..cae761a8536c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -164,6 +164,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
164 struct { 164 struct {
165 struct ib_fast_reg_page_list *fr_pgl; 165 struct ib_fast_reg_page_list *fr_pgl;
166 struct ib_mr *fr_mr; 166 struct ib_mr *fr_mr;
167 enum { FRMR_IS_INVALID, FRMR_IS_VALID } state;
167 } frmr; 168 } frmr;
168 } r; 169 } r;
169 struct list_head mw_list; 170 struct list_head mw_list;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fe9306bf10cc..72abb7358933 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/types.h> 21#include <linux/types.h>
22#include <linux/string.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23#include <linux/module.h> 24#include <linux/module.h>
24#include <linux/capability.h> 25#include <linux/capability.h>
@@ -28,6 +29,7 @@
28#include <linux/in.h> 29#include <linux/in.h>
29#include <linux/net.h> 30#include <linux/net.h>
30#include <linux/mm.h> 31#include <linux/mm.h>
32#include <linux/un.h>
31#include <linux/udp.h> 33#include <linux/udp.h>
32#include <linux/tcp.h> 34#include <linux/tcp.h>
33#include <linux/sunrpc/clnt.h> 35#include <linux/sunrpc/clnt.h>
@@ -45,6 +47,9 @@
45#include <net/tcp.h> 47#include <net/tcp.h>
46 48
47#include "sunrpc.h" 49#include "sunrpc.h"
50
51static void xs_close(struct rpc_xprt *xprt);
52
48/* 53/*
49 * xprtsock tunables 54 * xprtsock tunables
50 */ 55 */
@@ -261,6 +266,11 @@ static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
261 return (struct sockaddr *) &xprt->addr; 266 return (struct sockaddr *) &xprt->addr;
262} 267}
263 268
269static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt)
270{
271 return (struct sockaddr_un *) &xprt->addr;
272}
273
264static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) 274static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
265{ 275{
266 return (struct sockaddr_in *) &xprt->addr; 276 return (struct sockaddr_in *) &xprt->addr;
@@ -276,23 +286,34 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
276 struct sockaddr *sap = xs_addr(xprt); 286 struct sockaddr *sap = xs_addr(xprt);
277 struct sockaddr_in6 *sin6; 287 struct sockaddr_in6 *sin6;
278 struct sockaddr_in *sin; 288 struct sockaddr_in *sin;
289 struct sockaddr_un *sun;
279 char buf[128]; 290 char buf[128];
280 291
281 (void)rpc_ntop(sap, buf, sizeof(buf));
282 xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
283
284 switch (sap->sa_family) { 292 switch (sap->sa_family) {
293 case AF_LOCAL:
294 sun = xs_addr_un(xprt);
295 strlcpy(buf, sun->sun_path, sizeof(buf));
296 xprt->address_strings[RPC_DISPLAY_ADDR] =
297 kstrdup(buf, GFP_KERNEL);
298 break;
285 case AF_INET: 299 case AF_INET:
300 (void)rpc_ntop(sap, buf, sizeof(buf));
301 xprt->address_strings[RPC_DISPLAY_ADDR] =
302 kstrdup(buf, GFP_KERNEL);
286 sin = xs_addr_in(xprt); 303 sin = xs_addr_in(xprt);
287 snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); 304 snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
288 break; 305 break;
289 case AF_INET6: 306 case AF_INET6:
307 (void)rpc_ntop(sap, buf, sizeof(buf));
308 xprt->address_strings[RPC_DISPLAY_ADDR] =
309 kstrdup(buf, GFP_KERNEL);
290 sin6 = xs_addr_in6(xprt); 310 sin6 = xs_addr_in6(xprt);
291 snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); 311 snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
292 break; 312 break;
293 default: 313 default:
294 BUG(); 314 BUG();
295 } 315 }
316
296 xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); 317 xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
297} 318}
298 319
@@ -495,6 +516,70 @@ static int xs_nospace(struct rpc_task *task)
495 return ret; 516 return ret;
496} 517}
497 518
519/*
520 * Construct a stream transport record marker in @buf.
521 */
522static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
523{
524 u32 reclen = buf->len - sizeof(rpc_fraghdr);
525 rpc_fraghdr *base = buf->head[0].iov_base;
526 *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
527}
528
529/**
530 * xs_local_send_request - write an RPC request to an AF_LOCAL socket
531 * @task: RPC task that manages the state of an RPC request
532 *
533 * Return values:
534 * 0: The request has been sent
535 * EAGAIN: The socket was blocked, please call again later to
536 * complete the request
537 * ENOTCONN: Caller needs to invoke connect logic then call again
538 * other: Some other error occured, the request was not sent
539 */
540static int xs_local_send_request(struct rpc_task *task)
541{
542 struct rpc_rqst *req = task->tk_rqstp;
543 struct rpc_xprt *xprt = req->rq_xprt;
544 struct sock_xprt *transport =
545 container_of(xprt, struct sock_xprt, xprt);
546 struct xdr_buf *xdr = &req->rq_snd_buf;
547 int status;
548
549 xs_encode_stream_record_marker(&req->rq_snd_buf);
550
551 xs_pktdump("packet data:",
552 req->rq_svec->iov_base, req->rq_svec->iov_len);
553
554 status = xs_sendpages(transport->sock, NULL, 0,
555 xdr, req->rq_bytes_sent);
556 dprintk("RPC: %s(%u) = %d\n",
557 __func__, xdr->len - req->rq_bytes_sent, status);
558 if (likely(status >= 0)) {
559 req->rq_bytes_sent += status;
560 req->rq_xmit_bytes_sent += status;
561 if (likely(req->rq_bytes_sent >= req->rq_slen)) {
562 req->rq_bytes_sent = 0;
563 return 0;
564 }
565 status = -EAGAIN;
566 }
567
568 switch (status) {
569 case -EAGAIN:
570 status = xs_nospace(task);
571 break;
572 default:
573 dprintk("RPC: sendmsg returned unrecognized error %d\n",
574 -status);
575 case -EPIPE:
576 xs_close(xprt);
577 status = -ENOTCONN;
578 }
579
580 return status;
581}
582
498/** 583/**
499 * xs_udp_send_request - write an RPC request to a UDP socket 584 * xs_udp_send_request - write an RPC request to a UDP socket
500 * @task: address of RPC task that manages the state of an RPC request 585 * @task: address of RPC task that manages the state of an RPC request
@@ -504,7 +589,7 @@ static int xs_nospace(struct rpc_task *task)
504 * EAGAIN: The socket was blocked, please call again later to 589 * EAGAIN: The socket was blocked, please call again later to
505 * complete the request 590 * complete the request
506 * ENOTCONN: Caller needs to invoke connect logic then call again 591 * ENOTCONN: Caller needs to invoke connect logic then call again
507 * other: Some other error occured, the request was not sent 592 * other: Some other error occurred, the request was not sent
508 */ 593 */
509static int xs_udp_send_request(struct rpc_task *task) 594static int xs_udp_send_request(struct rpc_task *task)
510{ 595{
@@ -574,13 +659,6 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
574 kernel_sock_shutdown(sock, SHUT_WR); 659 kernel_sock_shutdown(sock, SHUT_WR);
575} 660}
576 661
577static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
578{
579 u32 reclen = buf->len - sizeof(rpc_fraghdr);
580 rpc_fraghdr *base = buf->head[0].iov_base;
581 *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen);
582}
583
584/** 662/**
585 * xs_tcp_send_request - write an RPC request to a TCP socket 663 * xs_tcp_send_request - write an RPC request to a TCP socket
586 * @task: address of RPC task that manages the state of an RPC request 664 * @task: address of RPC task that manages the state of an RPC request
@@ -590,7 +668,7 @@ static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
590 * EAGAIN: The socket was blocked, please call again later to 668 * EAGAIN: The socket was blocked, please call again later to
591 * complete the request 669 * complete the request
592 * ENOTCONN: Caller needs to invoke connect logic then call again 670 * ENOTCONN: Caller needs to invoke connect logic then call again
593 * other: Some other error occured, the request was not sent 671 * other: Some other error occurred, the request was not sent
594 * 672 *
595 * XXX: In the case of soft timeouts, should we eventually give up 673 * XXX: In the case of soft timeouts, should we eventually give up
596 * if sendmsg is not able to make progress? 674 * if sendmsg is not able to make progress?
@@ -603,7 +681,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
603 struct xdr_buf *xdr = &req->rq_snd_buf; 681 struct xdr_buf *xdr = &req->rq_snd_buf;
604 int status; 682 int status;
605 683
606 xs_encode_tcp_record_marker(&req->rq_snd_buf); 684 xs_encode_stream_record_marker(&req->rq_snd_buf);
607 685
608 xs_pktdump("packet data:", 686 xs_pktdump("packet data:",
609 req->rq_svec->iov_base, 687 req->rq_svec->iov_base,
@@ -710,6 +788,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
710 if (sk == NULL) 788 if (sk == NULL)
711 return; 789 return;
712 790
791 transport->srcport = 0;
792
713 write_lock_bh(&sk->sk_callback_lock); 793 write_lock_bh(&sk->sk_callback_lock);
714 transport->inet = NULL; 794 transport->inet = NULL;
715 transport->sock = NULL; 795 transport->sock = NULL;
@@ -770,12 +850,11 @@ static void xs_destroy(struct rpc_xprt *xprt)
770 850
771 dprintk("RPC: xs_destroy xprt %p\n", xprt); 851 dprintk("RPC: xs_destroy xprt %p\n", xprt);
772 852
773 cancel_rearming_delayed_work(&transport->connect_worker); 853 cancel_delayed_work_sync(&transport->connect_worker);
774 854
775 xs_close(xprt); 855 xs_close(xprt);
776 xs_free_peer_addresses(xprt); 856 xs_free_peer_addresses(xprt);
777 kfree(xprt->slot); 857 xprt_free(xprt);
778 kfree(xprt);
779 module_put(THIS_MODULE); 858 module_put(THIS_MODULE);
780} 859}
781 860
@@ -784,6 +863,88 @@ static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
784 return (struct rpc_xprt *) sk->sk_user_data; 863 return (struct rpc_xprt *) sk->sk_user_data;
785} 864}
786 865
866static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
867{
868 struct xdr_skb_reader desc = {
869 .skb = skb,
870 .offset = sizeof(rpc_fraghdr),
871 .count = skb->len - sizeof(rpc_fraghdr),
872 };
873
874 if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
875 return -1;
876 if (desc.count)
877 return -1;
878 return 0;
879}
880
881/**
882 * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
883 * @sk: socket with data to read
884 * @len: how much data to read
885 *
886 * Currently this assumes we can read the whole reply in a single gulp.
887 */
888static void xs_local_data_ready(struct sock *sk, int len)
889{
890 struct rpc_task *task;
891 struct rpc_xprt *xprt;
892 struct rpc_rqst *rovr;
893 struct sk_buff *skb;
894 int err, repsize, copied;
895 u32 _xid;
896 __be32 *xp;
897
898 read_lock_bh(&sk->sk_callback_lock);
899 dprintk("RPC: %s...\n", __func__);
900 xprt = xprt_from_sock(sk);
901 if (xprt == NULL)
902 goto out;
903
904 skb = skb_recv_datagram(sk, 0, 1, &err);
905 if (skb == NULL)
906 goto out;
907
908 if (xprt->shutdown)
909 goto dropit;
910
911 repsize = skb->len - sizeof(rpc_fraghdr);
912 if (repsize < 4) {
913 dprintk("RPC: impossible RPC reply size %d\n", repsize);
914 goto dropit;
915 }
916
917 /* Copy the XID from the skb... */
918 xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
919 if (xp == NULL)
920 goto dropit;
921
922 /* Look up and lock the request corresponding to the given XID */
923 spin_lock(&xprt->transport_lock);
924 rovr = xprt_lookup_rqst(xprt, *xp);
925 if (!rovr)
926 goto out_unlock;
927 task = rovr->rq_task;
928
929 copied = rovr->rq_private_buf.buflen;
930 if (copied > repsize)
931 copied = repsize;
932
933 if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
934 dprintk("RPC: sk_buff copy failed\n");
935 goto out_unlock;
936 }
937
938 xprt_complete_rqst(task, copied);
939
940 out_unlock:
941 spin_unlock(&xprt->transport_lock);
942 dropit:
943 skb_free_datagram(sk, skb);
944 out:
945 read_unlock_bh(&sk->sk_callback_lock);
946}
947
787/** 948/**
788 * xs_udp_data_ready - "data ready" callback for UDP sockets 949 * xs_udp_data_ready - "data ready" callback for UDP sockets
789 * @sk: socket with data to read 950 * @sk: socket with data to read
@@ -1343,7 +1504,6 @@ static void xs_tcp_state_change(struct sock *sk)
1343 case TCP_CLOSE_WAIT: 1504 case TCP_CLOSE_WAIT:
1344 /* The server initiated a shutdown of the socket */ 1505 /* The server initiated a shutdown of the socket */
1345 xprt_force_disconnect(xprt); 1506 xprt_force_disconnect(xprt);
1346 case TCP_SYN_SENT:
1347 xprt->connect_cookie++; 1507 xprt->connect_cookie++;
1348 case TCP_CLOSING: 1508 case TCP_CLOSING:
1349 /* 1509 /*
@@ -1516,7 +1676,7 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
1516 xs_update_peer_port(xprt); 1676 xs_update_peer_port(xprt);
1517} 1677}
1518 1678
1519static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) 1679static unsigned short xs_get_srcport(struct sock_xprt *transport)
1520{ 1680{
1521 unsigned short port = transport->srcport; 1681 unsigned short port = transport->srcport;
1522 1682
@@ -1525,7 +1685,7 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket
1525 return port; 1685 return port;
1526} 1686}
1527 1687
1528static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) 1688static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
1529{ 1689{
1530 if (transport->srcport != 0) 1690 if (transport->srcport != 0)
1531 transport->srcport = 0; 1691 transport->srcport = 0;
@@ -1535,23 +1695,18 @@ static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket
1535 return xprt_max_resvport; 1695 return xprt_max_resvport;
1536 return --port; 1696 return --port;
1537} 1697}
1538 1698static int xs_bind(struct sock_xprt *transport, struct socket *sock)
1539static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1540{ 1699{
1541 struct sockaddr_in myaddr = { 1700 struct sockaddr_storage myaddr;
1542 .sin_family = AF_INET,
1543 };
1544 struct sockaddr_in *sa;
1545 int err, nloop = 0; 1701 int err, nloop = 0;
1546 unsigned short port = xs_get_srcport(transport, sock); 1702 unsigned short port = xs_get_srcport(transport);
1547 unsigned short last; 1703 unsigned short last;
1548 1704
1549 sa = (struct sockaddr_in *)&transport->srcaddr; 1705 memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
1550 myaddr.sin_addr = sa->sin_addr;
1551 do { 1706 do {
1552 myaddr.sin_port = htons(port); 1707 rpc_set_port((struct sockaddr *)&myaddr, port);
1553 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1708 err = kernel_bind(sock, (struct sockaddr *)&myaddr,
1554 sizeof(myaddr)); 1709 transport->xprt.addrlen);
1555 if (port == 0) 1710 if (port == 0)
1556 break; 1711 break;
1557 if (err == 0) { 1712 if (err == 0) {
@@ -1559,52 +1714,47 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1559 break; 1714 break;
1560 } 1715 }
1561 last = port; 1716 last = port;
1562 port = xs_next_srcport(transport, sock, port); 1717 port = xs_next_srcport(transport, port);
1563 if (port > last) 1718 if (port > last)
1564 nloop++; 1719 nloop++;
1565 } while (err == -EADDRINUSE && nloop != 2); 1720 } while (err == -EADDRINUSE && nloop != 2);
1566 dprintk("RPC: %s %pI4:%u: %s (%d)\n", 1721
1567 __func__, &myaddr.sin_addr, 1722 if (myaddr.ss_family == AF_INET)
1568 port, err ? "failed" : "ok", err); 1723 dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__,
1724 &((struct sockaddr_in *)&myaddr)->sin_addr,
1725 port, err ? "failed" : "ok", err);
1726 else
1727 dprintk("RPC: %s %pI6:%u: %s (%d)\n", __func__,
1728 &((struct sockaddr_in6 *)&myaddr)->sin6_addr,
1729 port, err ? "failed" : "ok", err);
1569 return err; 1730 return err;
1570} 1731}
1571 1732
1572static int xs_bind6(struct sock_xprt *transport, struct socket *sock) 1733/*
1734 * We don't support autobind on AF_LOCAL sockets
1735 */
1736static void xs_local_rpcbind(struct rpc_task *task)
1573{ 1737{
1574 struct sockaddr_in6 myaddr = { 1738 xprt_set_bound(task->tk_xprt);
1575 .sin6_family = AF_INET6, 1739}
1576 };
1577 struct sockaddr_in6 *sa;
1578 int err, nloop = 0;
1579 unsigned short port = xs_get_srcport(transport, sock);
1580 unsigned short last;
1581 1740
1582 sa = (struct sockaddr_in6 *)&transport->srcaddr; 1741static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
1583 myaddr.sin6_addr = sa->sin6_addr; 1742{
1584 do {
1585 myaddr.sin6_port = htons(port);
1586 err = kernel_bind(sock, (struct sockaddr *) &myaddr,
1587 sizeof(myaddr));
1588 if (port == 0)
1589 break;
1590 if (err == 0) {
1591 transport->srcport = port;
1592 break;
1593 }
1594 last = port;
1595 port = xs_next_srcport(transport, sock, port);
1596 if (port > last)
1597 nloop++;
1598 } while (err == -EADDRINUSE && nloop != 2);
1599 dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n",
1600 &myaddr.sin6_addr, port, err ? "failed" : "ok", err);
1601 return err;
1602} 1743}
1603 1744
1604#ifdef CONFIG_DEBUG_LOCK_ALLOC 1745#ifdef CONFIG_DEBUG_LOCK_ALLOC
1605static struct lock_class_key xs_key[2]; 1746static struct lock_class_key xs_key[2];
1606static struct lock_class_key xs_slock_key[2]; 1747static struct lock_class_key xs_slock_key[2];
1607 1748
1749static inline void xs_reclassify_socketu(struct socket *sock)
1750{
1751 struct sock *sk = sock->sk;
1752
1753 BUG_ON(sock_owned_by_user(sk));
1754 sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC",
1755 &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]);
1756}
1757
1608static inline void xs_reclassify_socket4(struct socket *sock) 1758static inline void xs_reclassify_socket4(struct socket *sock)
1609{ 1759{
1610 struct sock *sk = sock->sk; 1760 struct sock *sk = sock->sk;
@@ -1622,7 +1772,26 @@ static inline void xs_reclassify_socket6(struct socket *sock)
1622 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", 1772 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
1623 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); 1773 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
1624} 1774}
1775
1776static inline void xs_reclassify_socket(int family, struct socket *sock)
1777{
1778 switch (family) {
1779 case AF_LOCAL:
1780 xs_reclassify_socketu(sock);
1781 break;
1782 case AF_INET:
1783 xs_reclassify_socket4(sock);
1784 break;
1785 case AF_INET6:
1786 xs_reclassify_socket6(sock);
1787 break;
1788 }
1789}
1625#else 1790#else
1791static inline void xs_reclassify_socketu(struct socket *sock)
1792{
1793}
1794
1626static inline void xs_reclassify_socket4(struct socket *sock) 1795static inline void xs_reclassify_socket4(struct socket *sock)
1627{ 1796{
1628} 1797}
@@ -1630,11 +1799,42 @@ static inline void xs_reclassify_socket4(struct socket *sock)
1630static inline void xs_reclassify_socket6(struct socket *sock) 1799static inline void xs_reclassify_socket6(struct socket *sock)
1631{ 1800{
1632} 1801}
1802
1803static inline void xs_reclassify_socket(int family, struct socket *sock)
1804{
1805}
1633#endif 1806#endif
1634 1807
1635static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1808static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1809 struct sock_xprt *transport, int family, int type, int protocol)
1636{ 1810{
1637 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1811 struct socket *sock;
1812 int err;
1813
1814 err = __sock_create(xprt->xprt_net, family, type, protocol, &sock, 1);
1815 if (err < 0) {
1816 dprintk("RPC: can't create %d transport socket (%d).\n",
1817 protocol, -err);
1818 goto out;
1819 }
1820 xs_reclassify_socket(family, sock);
1821
1822 err = xs_bind(transport, sock);
1823 if (err) {
1824 sock_release(sock);
1825 goto out;
1826 }
1827
1828 return sock;
1829out:
1830 return ERR_PTR(err);
1831}
1832
1833static int xs_local_finish_connecting(struct rpc_xprt *xprt,
1834 struct socket *sock)
1835{
1836 struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
1837 xprt);
1638 1838
1639 if (!transport->inet) { 1839 if (!transport->inet) {
1640 struct sock *sk = sock->sk; 1840 struct sock *sk = sock->sk;
@@ -1644,13 +1844,12 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1644 xs_save_old_callbacks(transport, sk); 1844 xs_save_old_callbacks(transport, sk);
1645 1845
1646 sk->sk_user_data = xprt; 1846 sk->sk_user_data = xprt;
1647 sk->sk_data_ready = xs_udp_data_ready; 1847 sk->sk_data_ready = xs_local_data_ready;
1648 sk->sk_write_space = xs_udp_write_space; 1848 sk->sk_write_space = xs_udp_write_space;
1649 sk->sk_error_report = xs_error_report; 1849 sk->sk_error_report = xs_error_report;
1650 sk->sk_no_check = UDP_CSUM_NORCV;
1651 sk->sk_allocation = GFP_ATOMIC; 1850 sk->sk_allocation = GFP_ATOMIC;
1652 1851
1653 xprt_set_connected(xprt); 1852 xprt_clear_connected(xprt);
1654 1853
1655 /* Reset to new socket */ 1854 /* Reset to new socket */
1656 transport->sock = sock; 1855 transport->sock = sock;
@@ -1658,85 +1857,113 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1658 1857
1659 write_unlock_bh(&sk->sk_callback_lock); 1858 write_unlock_bh(&sk->sk_callback_lock);
1660 } 1859 }
1661 xs_udp_do_set_buffer_size(xprt); 1860
1861 /* Tell the socket layer to start connecting... */
1862 xprt->stat.connect_count++;
1863 xprt->stat.connect_start = jiffies;
1864 return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
1662} 1865}
1663 1866
1664/** 1867/**
1665 * xs_udp_connect_worker4 - set up a UDP socket 1868 * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint
1666 * @work: RPC transport to connect 1869 * @xprt: RPC transport to connect
1870 * @transport: socket transport to connect
1871 * @create_sock: function to create a socket of the correct type
1667 * 1872 *
1668 * Invoked by a work queue tasklet. 1873 * Invoked by a work queue tasklet.
1669 */ 1874 */
1670static void xs_udp_connect_worker4(struct work_struct *work) 1875static void xs_local_setup_socket(struct work_struct *work)
1671{ 1876{
1672 struct sock_xprt *transport = 1877 struct sock_xprt *transport =
1673 container_of(work, struct sock_xprt, connect_worker.work); 1878 container_of(work, struct sock_xprt, connect_worker.work);
1674 struct rpc_xprt *xprt = &transport->xprt; 1879 struct rpc_xprt *xprt = &transport->xprt;
1675 struct socket *sock = transport->sock; 1880 struct socket *sock;
1676 int err, status = -EIO; 1881 int status = -EIO;
1677 1882
1678 if (xprt->shutdown) 1883 if (xprt->shutdown)
1679 goto out; 1884 goto out;
1680 1885
1681 /* Start by resetting any existing state */ 1886 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1682 xs_reset_transport(transport); 1887 status = __sock_create(xprt->xprt_net, AF_LOCAL,
1683 1888 SOCK_STREAM, 0, &sock, 1);
1684 err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1889 if (status < 0) {
1685 if (err < 0) { 1890 dprintk("RPC: can't create AF_LOCAL "
1686 dprintk("RPC: can't create UDP transport socket (%d).\n", -err); 1891 "transport socket (%d).\n", -status);
1687 goto out; 1892 goto out;
1688 } 1893 }
1689 xs_reclassify_socket4(sock); 1894 xs_reclassify_socketu(sock);
1690 1895
1691 if (xs_bind4(transport, sock)) { 1896 dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
1692 sock_release(sock); 1897 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
1693 goto out;
1694 }
1695 1898
1696 dprintk("RPC: worker connecting xprt %p via %s to " 1899 status = xs_local_finish_connecting(xprt, sock);
1697 "%s (port %s)\n", xprt, 1900 switch (status) {
1698 xprt->address_strings[RPC_DISPLAY_PROTO], 1901 case 0:
1699 xprt->address_strings[RPC_DISPLAY_ADDR], 1902 dprintk("RPC: xprt %p connected to %s\n",
1700 xprt->address_strings[RPC_DISPLAY_PORT]); 1903 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
1904 xprt_set_connected(xprt);
1905 break;
1906 case -ENOENT:
1907 dprintk("RPC: xprt %p: socket %s does not exist\n",
1908 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
1909 break;
1910 default:
1911 printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
1912 __func__, -status,
1913 xprt->address_strings[RPC_DISPLAY_ADDR]);
1914 }
1701 1915
1702 xs_udp_finish_connecting(xprt, sock);
1703 status = 0;
1704out: 1916out:
1705 xprt_clear_connecting(xprt); 1917 xprt_clear_connecting(xprt);
1706 xprt_wake_pending_tasks(xprt, status); 1918 xprt_wake_pending_tasks(xprt, status);
1707} 1919}
1708 1920
1709/** 1921static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1710 * xs_udp_connect_worker6 - set up a UDP socket 1922{
1711 * @work: RPC transport to connect 1923 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1712 * 1924
1713 * Invoked by a work queue tasklet. 1925 if (!transport->inet) {
1714 */ 1926 struct sock *sk = sock->sk;
1715static void xs_udp_connect_worker6(struct work_struct *work) 1927
1928 write_lock_bh(&sk->sk_callback_lock);
1929
1930 xs_save_old_callbacks(transport, sk);
1931
1932 sk->sk_user_data = xprt;
1933 sk->sk_data_ready = xs_udp_data_ready;
1934 sk->sk_write_space = xs_udp_write_space;
1935 sk->sk_error_report = xs_error_report;
1936 sk->sk_no_check = UDP_CSUM_NORCV;
1937 sk->sk_allocation = GFP_ATOMIC;
1938
1939 xprt_set_connected(xprt);
1940
1941 /* Reset to new socket */
1942 transport->sock = sock;
1943 transport->inet = sk;
1944
1945 write_unlock_bh(&sk->sk_callback_lock);
1946 }
1947 xs_udp_do_set_buffer_size(xprt);
1948}
1949
1950static void xs_udp_setup_socket(struct work_struct *work)
1716{ 1951{
1717 struct sock_xprt *transport = 1952 struct sock_xprt *transport =
1718 container_of(work, struct sock_xprt, connect_worker.work); 1953 container_of(work, struct sock_xprt, connect_worker.work);
1719 struct rpc_xprt *xprt = &transport->xprt; 1954 struct rpc_xprt *xprt = &transport->xprt;
1720 struct socket *sock = transport->sock; 1955 struct socket *sock = transport->sock;
1721 int err, status = -EIO; 1956 int status = -EIO;
1722 1957
1723 if (xprt->shutdown) 1958 if (xprt->shutdown)
1724 goto out; 1959 goto out;
1725 1960
1726 /* Start by resetting any existing state */ 1961 /* Start by resetting any existing state */
1727 xs_reset_transport(transport); 1962 xs_reset_transport(transport);
1728 1963 sock = xs_create_sock(xprt, transport,
1729 err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock); 1964 xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP);
1730 if (err < 0) { 1965 if (IS_ERR(sock))
1731 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1732 goto out;
1733 }
1734 xs_reclassify_socket6(sock);
1735
1736 if (xs_bind6(transport, sock) < 0) {
1737 sock_release(sock);
1738 goto out; 1966 goto out;
1739 }
1740 1967
1741 dprintk("RPC: worker connecting xprt %p via %s to " 1968 dprintk("RPC: worker connecting xprt %p via %s to "
1742 "%s (port %s)\n", xprt, 1969 "%s (port %s)\n", xprt,
@@ -1755,12 +1982,12 @@ out:
1755 * We need to preserve the port number so the reply cache on the server can 1982 * We need to preserve the port number so the reply cache on the server can
1756 * find our cached RPC replies when we get around to reconnecting. 1983 * find our cached RPC replies when we get around to reconnecting.
1757 */ 1984 */
1758static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1985static void xs_abort_connection(struct sock_xprt *transport)
1759{ 1986{
1760 int result; 1987 int result;
1761 struct sockaddr any; 1988 struct sockaddr any;
1762 1989
1763 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); 1990 dprintk("RPC: disconnecting xprt %p to reuse port\n", transport);
1764 1991
1765 /* 1992 /*
1766 * Disconnect the transport socket by doing a connect operation 1993 * Disconnect the transport socket by doing a connect operation
@@ -1770,13 +1997,13 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo
1770 any.sa_family = AF_UNSPEC; 1997 any.sa_family = AF_UNSPEC;
1771 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 1998 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1772 if (!result) 1999 if (!result)
1773 xs_sock_mark_closed(xprt); 2000 xs_sock_mark_closed(&transport->xprt);
1774 else 2001 else
1775 dprintk("RPC: AF_UNSPEC connect return code %d\n", 2002 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1776 result); 2003 result);
1777} 2004}
1778 2005
1779static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 2006static void xs_tcp_reuse_connection(struct sock_xprt *transport)
1780{ 2007{
1781 unsigned int state = transport->inet->sk_state; 2008 unsigned int state = transport->inet->sk_state;
1782 2009
@@ -1799,12 +2026,13 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra
1799 "sk_shutdown set to %d\n", 2026 "sk_shutdown set to %d\n",
1800 __func__, transport->inet->sk_shutdown); 2027 __func__, transport->inet->sk_shutdown);
1801 } 2028 }
1802 xs_abort_connection(xprt, transport); 2029 xs_abort_connection(transport);
1803} 2030}
1804 2031
1805static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 2032static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1806{ 2033{
1807 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2034 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2035 int ret = -ENOTCONN;
1808 2036
1809 if (!transport->inet) { 2037 if (!transport->inet) {
1810 struct sock *sk = sock->sk; 2038 struct sock *sk = sock->sk;
@@ -1836,12 +2064,22 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1836 } 2064 }
1837 2065
1838 if (!xprt_bound(xprt)) 2066 if (!xprt_bound(xprt))
1839 return -ENOTCONN; 2067 goto out;
1840 2068
1841 /* Tell the socket layer to start connecting... */ 2069 /* Tell the socket layer to start connecting... */
1842 xprt->stat.connect_count++; 2070 xprt->stat.connect_count++;
1843 xprt->stat.connect_start = jiffies; 2071 xprt->stat.connect_start = jiffies;
1844 return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); 2072 ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
2073 switch (ret) {
2074 case 0:
2075 case -EINPROGRESS:
2076 /* SYN_SENT! */
2077 xprt->connect_cookie++;
2078 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
2079 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
2080 }
2081out:
2082 return ret;
1845} 2083}
1846 2084
1847/** 2085/**
@@ -1852,12 +2090,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1852 * 2090 *
1853 * Invoked by a work queue tasklet. 2091 * Invoked by a work queue tasklet.
1854 */ 2092 */
1855static void xs_tcp_setup_socket(struct rpc_xprt *xprt, 2093static void xs_tcp_setup_socket(struct work_struct *work)
1856 struct sock_xprt *transport,
1857 struct socket *(*create_sock)(struct rpc_xprt *,
1858 struct sock_xprt *))
1859{ 2094{
2095 struct sock_xprt *transport =
2096 container_of(work, struct sock_xprt, connect_worker.work);
1860 struct socket *sock = transport->sock; 2097 struct socket *sock = transport->sock;
2098 struct rpc_xprt *xprt = &transport->xprt;
1861 int status = -EIO; 2099 int status = -EIO;
1862 2100
1863 if (xprt->shutdown) 2101 if (xprt->shutdown)
@@ -1865,7 +2103,8 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1865 2103
1866 if (!sock) { 2104 if (!sock) {
1867 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 2105 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1868 sock = create_sock(xprt, transport); 2106 sock = xs_create_sock(xprt, transport,
2107 xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP);
1869 if (IS_ERR(sock)) { 2108 if (IS_ERR(sock)) {
1870 status = PTR_ERR(sock); 2109 status = PTR_ERR(sock);
1871 goto out; 2110 goto out;
@@ -1876,7 +2115,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1876 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, 2115 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1877 &xprt->state); 2116 &xprt->state);
1878 /* "close" the socket, preserving the local port */ 2117 /* "close" the socket, preserving the local port */
1879 xs_tcp_reuse_connection(xprt, transport); 2118 xs_tcp_reuse_connection(transport);
1880 2119
1881 if (abort_and_exit) 2120 if (abort_and_exit)
1882 goto out_eagain; 2121 goto out_eagain;
@@ -1925,84 +2164,6 @@ out:
1925 xprt_wake_pending_tasks(xprt, status); 2164 xprt_wake_pending_tasks(xprt, status);
1926} 2165}
1927 2166
1928static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
1929 struct sock_xprt *transport)
1930{
1931 struct socket *sock;
1932 int err;
1933
1934 /* start from scratch */
1935 err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
1936 if (err < 0) {
1937 dprintk("RPC: can't create TCP transport socket (%d).\n",
1938 -err);
1939 goto out_err;
1940 }
1941 xs_reclassify_socket4(sock);
1942
1943 if (xs_bind4(transport, sock) < 0) {
1944 sock_release(sock);
1945 goto out_err;
1946 }
1947 return sock;
1948out_err:
1949 return ERR_PTR(-EIO);
1950}
1951
1952/**
1953 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
1954 * @work: RPC transport to connect
1955 *
1956 * Invoked by a work queue tasklet.
1957 */
1958static void xs_tcp_connect_worker4(struct work_struct *work)
1959{
1960 struct sock_xprt *transport =
1961 container_of(work, struct sock_xprt, connect_worker.work);
1962 struct rpc_xprt *xprt = &transport->xprt;
1963
1964 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
1965}
1966
1967static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
1968 struct sock_xprt *transport)
1969{
1970 struct socket *sock;
1971 int err;
1972
1973 /* start from scratch */
1974 err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
1975 if (err < 0) {
1976 dprintk("RPC: can't create TCP transport socket (%d).\n",
1977 -err);
1978 goto out_err;
1979 }
1980 xs_reclassify_socket6(sock);
1981
1982 if (xs_bind6(transport, sock) < 0) {
1983 sock_release(sock);
1984 goto out_err;
1985 }
1986 return sock;
1987out_err:
1988 return ERR_PTR(-EIO);
1989}
1990
1991/**
1992 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
1993 * @work: RPC transport to connect
1994 *
1995 * Invoked by a work queue tasklet.
1996 */
1997static void xs_tcp_connect_worker6(struct work_struct *work)
1998{
1999 struct sock_xprt *transport =
2000 container_of(work, struct sock_xprt, connect_worker.work);
2001 struct rpc_xprt *xprt = &transport->xprt;
2002
2003 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
2004}
2005
2006/** 2167/**
2007 * xs_connect - connect a socket to a remote endpoint 2168 * xs_connect - connect a socket to a remote endpoint
2008 * @task: address of RPC task that manages state of connect request 2169 * @task: address of RPC task that manages state of connect request
@@ -2041,6 +2202,32 @@ static void xs_connect(struct rpc_task *task)
2041} 2202}
2042 2203
2043/** 2204/**
2205 * xs_local_print_stats - display AF_LOCAL socket-specifc stats
2206 * @xprt: rpc_xprt struct containing statistics
2207 * @seq: output file
2208 *
2209 */
2210static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
2211{
2212 long idle_time = 0;
2213
2214 if (xprt_connected(xprt))
2215 idle_time = (long)(jiffies - xprt->last_used) / HZ;
2216
2217 seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu "
2218 "%llu %llu\n",
2219 xprt->stat.bind_count,
2220 xprt->stat.connect_count,
2221 xprt->stat.connect_time,
2222 idle_time,
2223 xprt->stat.sends,
2224 xprt->stat.recvs,
2225 xprt->stat.bad_xids,
2226 xprt->stat.req_u,
2227 xprt->stat.bklog_u);
2228}
2229
2230/**
2044 * xs_udp_print_stats - display UDP socket-specifc stats 2231 * xs_udp_print_stats - display UDP socket-specifc stats
2045 * @xprt: rpc_xprt struct containing statistics 2232 * @xprt: rpc_xprt struct containing statistics
2046 * @seq: output file 2233 * @seq: output file
@@ -2138,10 +2325,7 @@ static int bc_sendto(struct rpc_rqst *req)
2138 unsigned long headoff; 2325 unsigned long headoff;
2139 unsigned long tailoff; 2326 unsigned long tailoff;
2140 2327
2141 /* 2328 xs_encode_stream_record_marker(xbufp);
2142 * Set up the rpc header and record marker stuff
2143 */
2144 xs_encode_tcp_record_marker(xbufp);
2145 2329
2146 tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; 2330 tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
2147 headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; 2331 headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
@@ -2213,6 +2397,21 @@ static void bc_destroy(struct rpc_xprt *xprt)
2213{ 2397{
2214} 2398}
2215 2399
2400static struct rpc_xprt_ops xs_local_ops = {
2401 .reserve_xprt = xprt_reserve_xprt,
2402 .release_xprt = xs_tcp_release_xprt,
2403 .rpcbind = xs_local_rpcbind,
2404 .set_port = xs_local_set_port,
2405 .connect = xs_connect,
2406 .buf_alloc = rpc_malloc,
2407 .buf_free = rpc_free,
2408 .send_request = xs_local_send_request,
2409 .set_retrans_timeout = xprt_set_retrans_timeout_def,
2410 .close = xs_close,
2411 .destroy = xs_destroy,
2412 .print_stats = xs_local_print_stats,
2413};
2414
2216static struct rpc_xprt_ops xs_udp_ops = { 2415static struct rpc_xprt_ops xs_udp_ops = {
2217 .set_buffer_size = xs_udp_set_buffer_size, 2416 .set_buffer_size = xs_udp_set_buffer_size,
2218 .reserve_xprt = xprt_reserve_xprt_cong, 2417 .reserve_xprt = xprt_reserve_xprt_cong,
@@ -2262,6 +2461,33 @@ static struct rpc_xprt_ops bc_tcp_ops = {
2262 .print_stats = xs_tcp_print_stats, 2461 .print_stats = xs_tcp_print_stats,
2263}; 2462};
2264 2463
2464static int xs_init_anyaddr(const int family, struct sockaddr *sap)
2465{
2466 static const struct sockaddr_in sin = {
2467 .sin_family = AF_INET,
2468 .sin_addr.s_addr = htonl(INADDR_ANY),
2469 };
2470 static const struct sockaddr_in6 sin6 = {
2471 .sin6_family = AF_INET6,
2472 .sin6_addr = IN6ADDR_ANY_INIT,
2473 };
2474
2475 switch (family) {
2476 case AF_LOCAL:
2477 break;
2478 case AF_INET:
2479 memcpy(sap, &sin, sizeof(sin));
2480 break;
2481 case AF_INET6:
2482 memcpy(sap, &sin6, sizeof(sin6));
2483 break;
2484 default:
2485 dprintk("RPC: %s: Bad address family\n", __func__);
2486 return -EAFNOSUPPORT;
2487 }
2488 return 0;
2489}
2490
2265static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2491static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2266 unsigned int slot_table_size) 2492 unsigned int slot_table_size)
2267{ 2493{
@@ -2273,31 +2499,93 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2273 return ERR_PTR(-EBADF); 2499 return ERR_PTR(-EBADF);
2274 } 2500 }
2275 2501
2276 new = kzalloc(sizeof(*new), GFP_KERNEL); 2502 xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
2277 if (new == NULL) { 2503 if (xprt == NULL) {
2278 dprintk("RPC: xs_setup_xprt: couldn't allocate " 2504 dprintk("RPC: xs_setup_xprt: couldn't allocate "
2279 "rpc_xprt\n"); 2505 "rpc_xprt\n");
2280 return ERR_PTR(-ENOMEM); 2506 return ERR_PTR(-ENOMEM);
2281 } 2507 }
2282 xprt = &new->xprt;
2283
2284 xprt->max_reqs = slot_table_size;
2285 xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
2286 if (xprt->slot == NULL) {
2287 kfree(xprt);
2288 dprintk("RPC: xs_setup_xprt: couldn't allocate slot "
2289 "table\n");
2290 return ERR_PTR(-ENOMEM);
2291 }
2292 2508
2509 new = container_of(xprt, struct sock_xprt, xprt);
2293 memcpy(&xprt->addr, args->dstaddr, args->addrlen); 2510 memcpy(&xprt->addr, args->dstaddr, args->addrlen);
2294 xprt->addrlen = args->addrlen; 2511 xprt->addrlen = args->addrlen;
2295 if (args->srcaddr) 2512 if (args->srcaddr)
2296 memcpy(&new->srcaddr, args->srcaddr, args->addrlen); 2513 memcpy(&new->srcaddr, args->srcaddr, args->addrlen);
2514 else {
2515 int err;
2516 err = xs_init_anyaddr(args->dstaddr->sa_family,
2517 (struct sockaddr *)&new->srcaddr);
2518 if (err != 0)
2519 return ERR_PTR(err);
2520 }
2297 2521
2298 return xprt; 2522 return xprt;
2299} 2523}
2300 2524
2525static const struct rpc_timeout xs_local_default_timeout = {
2526 .to_initval = 10 * HZ,
2527 .to_maxval = 10 * HZ,
2528 .to_retries = 2,
2529};
2530
2531/**
2532 * xs_setup_local - Set up transport to use an AF_LOCAL socket
2533 * @args: rpc transport creation arguments
2534 *
2535 * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP
2536 */
2537static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
2538{
2539 struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr;
2540 struct sock_xprt *transport;
2541 struct rpc_xprt *xprt;
2542 struct rpc_xprt *ret;
2543
2544 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2545 if (IS_ERR(xprt))
2546 return xprt;
2547 transport = container_of(xprt, struct sock_xprt, xprt);
2548
2549 xprt->prot = 0;
2550 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
2551 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
2552
2553 xprt->bind_timeout = XS_BIND_TO;
2554 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
2555 xprt->idle_timeout = XS_IDLE_DISC_TO;
2556
2557 xprt->ops = &xs_local_ops;
2558 xprt->timeout = &xs_local_default_timeout;
2559
2560 switch (sun->sun_family) {
2561 case AF_LOCAL:
2562 if (sun->sun_path[0] != '/') {
2563 dprintk("RPC: bad AF_LOCAL address: %s\n",
2564 sun->sun_path);
2565 ret = ERR_PTR(-EINVAL);
2566 goto out_err;
2567 }
2568 xprt_set_bound(xprt);
2569 INIT_DELAYED_WORK(&transport->connect_worker,
2570 xs_local_setup_socket);
2571 xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
2572 break;
2573 default:
2574 ret = ERR_PTR(-EAFNOSUPPORT);
2575 goto out_err;
2576 }
2577
2578 dprintk("RPC: set up xprt to %s via AF_LOCAL\n",
2579 xprt->address_strings[RPC_DISPLAY_ADDR]);
2580
2581 if (try_module_get(THIS_MODULE))
2582 return xprt;
2583 ret = ERR_PTR(-EINVAL);
2584out_err:
2585 xprt_free(xprt);
2586 return ret;
2587}
2588
2301static const struct rpc_timeout xs_udp_default_timeout = { 2589static const struct rpc_timeout xs_udp_default_timeout = {
2302 .to_initval = 5 * HZ, 2590 .to_initval = 5 * HZ,
2303 .to_maxval = 30 * HZ, 2591 .to_maxval = 30 * HZ,
@@ -2341,7 +2629,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2341 xprt_set_bound(xprt); 2629 xprt_set_bound(xprt);
2342 2630
2343 INIT_DELAYED_WORK(&transport->connect_worker, 2631 INIT_DELAYED_WORK(&transport->connect_worker,
2344 xs_udp_connect_worker4); 2632 xs_udp_setup_socket);
2345 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); 2633 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
2346 break; 2634 break;
2347 case AF_INET6: 2635 case AF_INET6:
@@ -2349,7 +2637,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2349 xprt_set_bound(xprt); 2637 xprt_set_bound(xprt);
2350 2638
2351 INIT_DELAYED_WORK(&transport->connect_worker, 2639 INIT_DELAYED_WORK(&transport->connect_worker,
2352 xs_udp_connect_worker6); 2640 xs_udp_setup_socket);
2353 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2641 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
2354 break; 2642 break;
2355 default: 2643 default:
@@ -2371,8 +2659,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2371 return xprt; 2659 return xprt;
2372 ret = ERR_PTR(-EINVAL); 2660 ret = ERR_PTR(-EINVAL);
2373out_err: 2661out_err:
2374 kfree(xprt->slot); 2662 xprt_free(xprt);
2375 kfree(xprt);
2376 return ret; 2663 return ret;
2377} 2664}
2378 2665
@@ -2416,7 +2703,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2416 xprt_set_bound(xprt); 2703 xprt_set_bound(xprt);
2417 2704
2418 INIT_DELAYED_WORK(&transport->connect_worker, 2705 INIT_DELAYED_WORK(&transport->connect_worker,
2419 xs_tcp_connect_worker4); 2706 xs_tcp_setup_socket);
2420 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); 2707 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
2421 break; 2708 break;
2422 case AF_INET6: 2709 case AF_INET6:
@@ -2424,7 +2711,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2424 xprt_set_bound(xprt); 2711 xprt_set_bound(xprt);
2425 2712
2426 INIT_DELAYED_WORK(&transport->connect_worker, 2713 INIT_DELAYED_WORK(&transport->connect_worker,
2427 xs_tcp_connect_worker6); 2714 xs_tcp_setup_socket);
2428 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2715 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
2429 break; 2716 break;
2430 default: 2717 default:
@@ -2447,8 +2734,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2447 return xprt; 2734 return xprt;
2448 ret = ERR_PTR(-EINVAL); 2735 ret = ERR_PTR(-EINVAL);
2449out_err: 2736out_err:
2450 kfree(xprt->slot); 2737 xprt_free(xprt);
2451 kfree(xprt);
2452 return ret; 2738 return ret;
2453} 2739}
2454 2740
@@ -2465,6 +2751,15 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2465 struct svc_sock *bc_sock; 2751 struct svc_sock *bc_sock;
2466 struct rpc_xprt *ret; 2752 struct rpc_xprt *ret;
2467 2753
2754 if (args->bc_xprt->xpt_bc_xprt) {
2755 /*
2756 * This server connection already has a backchannel
2757 * export; we can't create a new one, as we wouldn't be
2758 * able to match replies based on xid any more. So,
2759 * reuse the already-existing one:
2760 */
2761 return args->bc_xprt->xpt_bc_xprt;
2762 }
2468 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2763 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2469 if (IS_ERR(xprt)) 2764 if (IS_ERR(xprt))
2470 return xprt; 2765 return xprt;
@@ -2481,16 +2776,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2481 xprt->reestablish_timeout = 0; 2776 xprt->reestablish_timeout = 0;
2482 xprt->idle_timeout = 0; 2777 xprt->idle_timeout = 0;
2483 2778
2484 /*
2485 * The backchannel uses the same socket connection as the
2486 * forechannel
2487 */
2488 xprt->bc_xprt = args->bc_xprt;
2489 bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
2490 bc_sock->sk_bc_xprt = xprt;
2491 transport->sock = bc_sock->sk_sock;
2492 transport->inet = bc_sock->sk_sk;
2493
2494 xprt->ops = &bc_tcp_ops; 2779 xprt->ops = &bc_tcp_ops;
2495 2780
2496 switch (addr->sa_family) { 2781 switch (addr->sa_family) {
@@ -2507,15 +2792,24 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2507 goto out_err; 2792 goto out_err;
2508 } 2793 }
2509 2794
2510 if (xprt_bound(xprt)) 2795 dprintk("RPC: set up xprt to %s (port %s) via %s\n",
2511 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2796 xprt->address_strings[RPC_DISPLAY_ADDR],
2512 xprt->address_strings[RPC_DISPLAY_ADDR], 2797 xprt->address_strings[RPC_DISPLAY_PORT],
2513 xprt->address_strings[RPC_DISPLAY_PORT], 2798 xprt->address_strings[RPC_DISPLAY_PROTO]);
2514 xprt->address_strings[RPC_DISPLAY_PROTO]); 2799
2515 else 2800 /*
2516 dprintk("RPC: set up xprt to %s (autobind) via %s\n", 2801 * Once we've associated a backchannel xprt with a connection,
2517 xprt->address_strings[RPC_DISPLAY_ADDR], 2802 * we want to keep it around as long as long as the connection
2518 xprt->address_strings[RPC_DISPLAY_PROTO]); 2803 * lasts, in case we need to start using it for a backchannel
2804 * again; this reference won't be dropped until bc_xprt is
2805 * destroyed.
2806 */
2807 xprt_get(xprt);
2808 args->bc_xprt->xpt_bc_xprt = xprt;
2809 xprt->bc_xprt = args->bc_xprt;
2810 bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
2811 transport->sock = bc_sock->sk_sock;
2812 transport->inet = bc_sock->sk_sk;
2519 2813
2520 /* 2814 /*
2521 * Since we don't want connections for the backchannel, we set 2815 * Since we don't want connections for the backchannel, we set
@@ -2526,13 +2820,21 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2526 2820
2527 if (try_module_get(THIS_MODULE)) 2821 if (try_module_get(THIS_MODULE))
2528 return xprt; 2822 return xprt;
2823 xprt_put(xprt);
2529 ret = ERR_PTR(-EINVAL); 2824 ret = ERR_PTR(-EINVAL);
2530out_err: 2825out_err:
2531 kfree(xprt->slot); 2826 xprt_free(xprt);
2532 kfree(xprt);
2533 return ret; 2827 return ret;
2534} 2828}
2535 2829
2830static struct xprt_class xs_local_transport = {
2831 .list = LIST_HEAD_INIT(xs_local_transport.list),
2832 .name = "named UNIX socket",
2833 .owner = THIS_MODULE,
2834 .ident = XPRT_TRANSPORT_LOCAL,
2835 .setup = xs_setup_local,
2836};
2837
2536static struct xprt_class xs_udp_transport = { 2838static struct xprt_class xs_udp_transport = {
2537 .list = LIST_HEAD_INIT(xs_udp_transport.list), 2839 .list = LIST_HEAD_INIT(xs_udp_transport.list),
2538 .name = "udp", 2840 .name = "udp",
@@ -2568,6 +2870,7 @@ int init_socket_xprt(void)
2568 sunrpc_table_header = register_sysctl_table(sunrpc_table); 2870 sunrpc_table_header = register_sysctl_table(sunrpc_table);
2569#endif 2871#endif
2570 2872
2873 xprt_register_transport(&xs_local_transport);
2571 xprt_register_transport(&xs_udp_transport); 2874 xprt_register_transport(&xs_udp_transport);
2572 xprt_register_transport(&xs_tcp_transport); 2875 xprt_register_transport(&xs_tcp_transport);
2573 xprt_register_transport(&xs_bc_tcp_transport); 2876 xprt_register_transport(&xs_bc_tcp_transport);
@@ -2588,6 +2891,7 @@ void cleanup_socket_xprt(void)
2588 } 2891 }
2589#endif 2892#endif
2590 2893
2894 xprt_unregister_transport(&xs_local_transport);
2591 xprt_unregister_transport(&xs_udp_transport); 2895 xprt_unregister_transport(&xs_udp_transport);
2592 xprt_unregister_transport(&xs_tcp_transport); 2896 xprt_unregister_transport(&xs_tcp_transport);
2593 xprt_unregister_transport(&xs_bc_tcp_transport); 2897 xprt_unregister_transport(&xs_bc_tcp_transport);