aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:54:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:54:33 -0400
commita87e84b5cdfacf11af4e8a85c4bca9793658536f (patch)
treef8e3cb2d339d8ed0e987d55f725e501730cdc81d
parent342ff1a1b558ebbdb8cbd55ab6a63eca8b2473ca (diff)
parent3c394ddaa7ea4205f933fd9b481166b2669368a9 (diff)
Merge branch 'for-2.6.32' of git://linux-nfs.org/~bfields/linux
* 'for-2.6.32' of git://linux-nfs.org/~bfields/linux: (68 commits) nfsd4: nfsv4 clients should cross mountpoints nfsd: revise 4.1 status documentation sunrpc/cache: avoid variable over-loading in cache_defer_req sunrpc/cache: use list_del_init for the list_head entries in cache_deferred_req nfsd: return success for non-NFS4 nfs4_state_start nfsd41: Refactor create_client() nfsd41: modify nfsd4.1 backchannel to use new xprt class nfsd41: Backchannel: Implement cb_recall over NFSv4.1 nfsd41: Backchannel: cb_sequence callback nfsd41: Backchannel: Setup sequence information nfsd41: Backchannel: Server backchannel RPC wait queue nfsd41: Backchannel: Add sequence arguments to callback RPC arguments nfsd41: Backchannel: callback infrastructure nfsd4: use common rpc_cred for all callbacks nfsd4: allow nfs4 state startup to fail SUNRPC: Defer the auth_gss upcall when the RPC call is asynchronous nfsd4: fix null dereference creating nfsv4 callback client nfsd4: fix whitespace in NFSPROC4_CLNT_CB_NULL definition nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel sunrpc/cache: simplify cache_fresh_locked and cache_fresh_unlocked. ...
-rw-r--r--Documentation/filesystems/nfs41-server.txt54
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/host.c4
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/nfsd/export.c2
-rw-r--r--fs/nfsd/nfs3xdr.c75
-rw-r--r--fs/nfsd/nfs4acl.c4
-rw-r--r--fs/nfsd/nfs4callback.c263
-rw-r--r--fs/nfsd/nfs4proc.c89
-rw-r--r--fs/nfsd/nfs4state.c681
-rw-r--r--fs/nfsd/nfs4xdr.c42
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nfsd/nfsfh.c158
-rw-r--r--fs/nfsd/nfssvc.c54
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--include/linux/lockd/lockd.h43
-rw-r--r--include/linux/nfs4.h2
-rw-r--r--include/linux/nfsd/nfsd.h7
-rw-r--r--include/linux/nfsd/state.h77
-rw-r--r--include/linux/nfsd/xdr4.h19
-rw-r--r--include/linux/sunrpc/auth.h4
-rw-r--r--include/linux/sunrpc/clnt.h114
-rw-r--r--include/linux/sunrpc/svc.h2
-rw-r--r--include/linux/sunrpc/svc_xprt.h1
-rw-r--r--include/linux/sunrpc/svcsock.h1
-rw-r--r--include/linux/sunrpc/xprt.h19
-rw-r--r--include/linux/sunrpc/xprtrdma.h5
-rw-r--r--include/linux/sunrpc/xprtsock.h11
-rw-r--r--net/sunrpc/auth.c20
-rw-r--r--net/sunrpc/auth_generic.c4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c6
-rw-r--r--net/sunrpc/cache.c109
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/sched.c7
-rw-r--r--net/sunrpc/sunrpc.h14
-rw-r--r--net/sunrpc/svc_xprt.c25
-rw-r--r--net/sunrpc/svcauth_unix.c1
-rw-r--r--net/sunrpc/svcsock.c335
-rw-r--r--net/sunrpc/xprt.c15
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c4
-rw-r--r--net/sunrpc/xprtsock.c242
42 files changed, 1640 insertions, 897 deletions
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt
index 05d81cbcb2e1..5920fe26e6ff 100644
--- a/Documentation/filesystems/nfs41-server.txt
+++ b/Documentation/filesystems/nfs41-server.txt
@@ -11,6 +11,11 @@ the /proc/fs/nfsd/versions control file. Note that to write this
11control file, the nfsd service must be taken down. Use your user-mode 11control file, the nfsd service must be taken down. Use your user-mode
12nfs-utils to set this up; see rpc.nfsd(8) 12nfs-utils to set this up; see rpc.nfsd(8)
13 13
14(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
15"-4", respectively. Therefore, code meant to work on both new and old
16kernels must turn 4.1 on or off *before* turning support for version 4
17on or off; rpc.nfsd does this correctly.)
18
14The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based 19The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
15on the latest NFSv4.1 Internet Draft: 20on the latest NFSv4.1 Internet Draft:
16http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29 21http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
@@ -25,6 +30,49 @@ are still under development out of tree.
25See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design 30See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
26for more information. 31for more information.
27 32
33The current implementation is intended for developers only: while it
34does support ordinary file operations on clients we have tested against
35(including the linux client), it is incomplete in ways which may limit
36features unexpectedly, cause known bugs in rare cases, or cause
37interoperability problems with future clients. Known issues:
38
39 - gss support is questionable: currently mounts with kerberos
40 from a linux client are possible, but we aren't really
41 conformant with the spec (for example, we don't use kerberos
42 on the backchannel correctly).
43 - no trunking support: no clients currently take advantage of
44 trunking, but this is a mandatory failure, and its use is
45 recommended to clients in a number of places. (E.g. to ensure
46 timely renewal in case an existing connection's retry timeouts
47 have gotten too long; see section 8.3 of the draft.)
48 Therefore, lack of this feature may cause future clients to
49 fail.
50 - Incomplete backchannel support: incomplete backchannel gss
51 support and no support for BACKCHANNEL_CTL mean that
52 callbacks (hence delegations and layouts) may not be
53 available and clients confused by the incomplete
54 implementation may fail.
55 - Server reboot recovery is unsupported; if the server reboots,
56 clients may fail.
57 - We do not support SSV, which provides security for shared
58 client-server state (thus preventing unauthorized tampering
59 with locks and opens, for example). It is mandatory for
60 servers to support this, though no clients use it yet.
61 - Mandatory operations which we do not support, such as
62 DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and
63 TEST_STATEID, are not currently used by clients, but will be
64 (and the spec recommends their uses in common cases), and
65 clients should not be expected to know how to recover from the
66 case where they are not supported. This will eventually cause
67 interoperability failures.
68
69In addition, some limitations are inherited from the current NFSv4
70implementation:
71
72 - Incomplete delegation enforcement: if a file is renamed or
73 unlinked, a client holding a delegation may continue to
74 indefinitely allow opens of the file under the old name.
75
28The table below, taken from the NFSv4.1 document, lists 76The table below, taken from the NFSv4.1 document, lists
29the operations that are mandatory to implement (REQ), optional 77the operations that are mandatory to implement (REQ), optional
30(OPT), and NFSv4.0 operations that are required not to implement (MNI) 78(OPT), and NFSv4.0 operations that are required not to implement (MNI)
@@ -142,6 +190,12 @@ NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
142 190
143Implementation notes: 191Implementation notes:
144 192
193DELEGPURGE:
194* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
195 CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
196 persist across client reboots). Thus we need not implement this for
197 now.
198
145EXCHANGE_ID: 199EXCHANGE_ID:
146* only SP4_NONE state protection supported 200* only SP4_NONE state protection supported
147* implementation ids are ignored 201* implementation ids are ignored
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 1f3b0fc0d351..fc9032dc8862 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
166 */ 166 */
167 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) 167 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
168 continue; 168 continue;
169 if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) 169 if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
170 continue; 170 continue;
171 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) 171 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
172 continue; 172 continue;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 7cb076ac6b45..4600c2037b8b 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
111 */ 111 */
112 chain = &nlm_hosts[nlm_hash_address(ni->sap)]; 112 chain = &nlm_hosts[nlm_hash_address(ni->sap)];
113 hlist_for_each_entry(host, pos, chain, h_hash) { 113 hlist_for_each_entry(host, pos, chain, h_hash) {
114 if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) 114 if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
115 continue; 115 continue;
116 116
117 /* See if we have an NSM handle for this client */ 117 /* See if we have an NSM handle for this client */
@@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
125 if (host->h_server != ni->server) 125 if (host->h_server != ni->server)
126 continue; 126 continue;
127 if (ni->server && 127 if (ni->server &&
128 !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) 128 !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
129 continue; 129 continue;
130 130
131 /* Move to head of hash chain. */ 131 /* Move to head of hash chain. */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 30c933188dd7..f956651d0f65 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
209 struct nsm_handle *nsm; 209 struct nsm_handle *nsm;
210 210
211 list_for_each_entry(nsm, &nsm_handles, sm_link) 211 list_for_each_entry(nsm, &nsm_handles, sm_link)
212 if (nlm_cmp_addr(nsm_addr(nsm), sap)) 212 if (rpc_cmp_addr(nsm_addr(nsm), sap))
213 return nsm; 213 return nsm;
214 return NULL; 214 return NULL;
215} 215}
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 9e4d6aab611b..ad478da7ca63 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
417static int 417static int
418nlmsvc_match_ip(void *datap, struct nlm_host *host) 418nlmsvc_match_ip(void *datap, struct nlm_host *host)
419{ 419{
420 return nlm_cmp_addr(nlm_srcaddr(host), datap); 420 return rpc_cmp_addr(nlm_srcaddr(host), datap);
421} 421}
422 422
423/** 423/**
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index d9462643155c..984a5ebcc1d6 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1341,6 +1341,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
1341 if (rv) 1341 if (rv)
1342 goto out; 1342 goto out;
1343 rv = check_nfsd_access(exp, rqstp); 1343 rv = check_nfsd_access(exp, rqstp);
1344 if (rv)
1345 fh_put(fhp);
1344out: 1346out:
1345 exp_put(exp); 1347 exp_put(exp);
1346 return rv; 1348 return rv;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 01d4ec1c88e0..edf926e1062f 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -814,17 +814,6 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
814 return p; 814 return p;
815} 815}
816 816
817static __be32 *
818encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
819 struct svc_fh *fhp)
820{
821 p = encode_post_op_attr(cd->rqstp, p, fhp);
822 *p++ = xdr_one; /* yes, a file handle follows */
823 p = encode_fh(p, fhp);
824 fh_put(fhp);
825 return p;
826}
827
828static int 817static int
829compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, 818compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
830 const char *name, int namlen) 819 const char *name, int namlen)
@@ -836,29 +825,54 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
836 dparent = cd->fh.fh_dentry; 825 dparent = cd->fh.fh_dentry;
837 exp = cd->fh.fh_export; 826 exp = cd->fh.fh_export;
838 827
839 fh_init(fhp, NFS3_FHSIZE);
840 if (isdotent(name, namlen)) { 828 if (isdotent(name, namlen)) {
841 if (namlen == 2) { 829 if (namlen == 2) {
842 dchild = dget_parent(dparent); 830 dchild = dget_parent(dparent);
843 if (dchild == dparent) { 831 if (dchild == dparent) {
844 /* filesystem root - cannot return filehandle for ".." */ 832 /* filesystem root - cannot return filehandle for ".." */
845 dput(dchild); 833 dput(dchild);
846 return 1; 834 return -ENOENT;
847 } 835 }
848 } else 836 } else
849 dchild = dget(dparent); 837 dchild = dget(dparent);
850 } else 838 } else
851 dchild = lookup_one_len(name, dparent, namlen); 839 dchild = lookup_one_len(name, dparent, namlen);
852 if (IS_ERR(dchild)) 840 if (IS_ERR(dchild))
853 return 1; 841 return -ENOENT;
854 if (d_mountpoint(dchild) || 842 rv = -ENOENT;
855 fh_compose(fhp, exp, dchild, &cd->fh) != 0 || 843 if (d_mountpoint(dchild))
856 !dchild->d_inode) 844 goto out;
857 rv = 1; 845 rv = fh_compose(fhp, exp, dchild, &cd->fh);
846 if (rv)
847 goto out;
848 if (!dchild->d_inode)
849 goto out;
850 rv = 0;
851out:
858 dput(dchild); 852 dput(dchild);
859 return rv; 853 return rv;
860} 854}
861 855
856__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
857{
858 struct svc_fh fh;
859 int err;
860
861 fh_init(&fh, NFS3_FHSIZE);
862 err = compose_entry_fh(cd, &fh, name, namlen);
863 if (err) {
864 *p++ = 0;
865 *p++ = 0;
866 goto out;
867 }
868 p = encode_post_op_attr(cd->rqstp, p, &fh);
869 *p++ = xdr_one; /* yes, a file handle follows */
870 p = encode_fh(p, &fh);
871out:
872 fh_put(&fh);
873 return p;
874}
875
862/* 876/*
863 * Encode a directory entry. This one works for both normal readdir 877 * Encode a directory entry. This one works for both normal readdir
864 * and readdirplus. 878 * and readdirplus.
@@ -929,16 +943,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
929 943
930 p = encode_entry_baggage(cd, p, name, namlen, ino); 944 p = encode_entry_baggage(cd, p, name, namlen, ino);
931 945
932 /* throw in readdirplus baggage */ 946 if (plus)
933 if (plus) { 947 p = encode_entryplus_baggage(cd, p, name, namlen);
934 struct svc_fh fh;
935
936 if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
937 *p++ = 0;
938 *p++ = 0;
939 } else
940 p = encode_entryplus_baggage(cd, p, &fh);
941 }
942 num_entry_words = p - cd->buffer; 948 num_entry_words = p - cd->buffer;
943 } else if (cd->rqstp->rq_respages[pn+1] != NULL) { 949 } else if (cd->rqstp->rq_respages[pn+1] != NULL) {
944 /* temporarily encode entry into next page, then move back to 950 /* temporarily encode entry into next page, then move back to
@@ -951,17 +957,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
951 957
952 p1 = encode_entry_baggage(cd, p1, name, namlen, ino); 958 p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
953 959
954 /* throw in readdirplus baggage */ 960 if (plus)
955 if (plus) { 961 p = encode_entryplus_baggage(cd, p1, name, namlen);
956 struct svc_fh fh;
957
958 if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
959 /* zero out the filehandle */
960 *p1++ = 0;
961 *p1++ = 0;
962 } else
963 p1 = encode_entryplus_baggage(cd, p1, &fh);
964 }
965 962
966 /* determine entry word length and lengths to go in pages */ 963 /* determine entry word length and lengths to go in pages */
967 num_entry_words = p1 - tmp; 964 num_entry_words = p1 - tmp;
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 54b8b4140c8f..725d02f210e2 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -321,7 +321,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
321 deny = ~pas.group & pas.other; 321 deny = ~pas.group & pas.other;
322 if (deny) { 322 if (deny) {
323 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; 323 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
324 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; 324 ace->flag = eflag;
325 ace->access_mask = deny_mask_from_posix(deny, flags); 325 ace->access_mask = deny_mask_from_posix(deny, flags);
326 ace->whotype = NFS4_ACL_WHO_GROUP; 326 ace->whotype = NFS4_ACL_WHO_GROUP;
327 ace++; 327 ace++;
@@ -335,7 +335,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
335 if (deny) { 335 if (deny) {
336 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; 336 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
337 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; 337 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
338 ace->access_mask = mask_from_posix(deny, flags); 338 ace->access_mask = deny_mask_from_posix(deny, flags);
339 ace->whotype = NFS4_ACL_WHO_NAMED; 339 ace->whotype = NFS4_ACL_WHO_NAMED;
340 ace->who = pa->e_id; 340 ace->who = pa->e_id;
341 ace++; 341 ace++;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3fd23f7aceca..24e8d78f8dde 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -43,25 +43,30 @@
43#include <linux/sunrpc/xdr.h> 43#include <linux/sunrpc/xdr.h>
44#include <linux/sunrpc/svc.h> 44#include <linux/sunrpc/svc.h>
45#include <linux/sunrpc/clnt.h> 45#include <linux/sunrpc/clnt.h>
46#include <linux/sunrpc/svcsock.h>
46#include <linux/nfsd/nfsd.h> 47#include <linux/nfsd/nfsd.h>
47#include <linux/nfsd/state.h> 48#include <linux/nfsd/state.h>
48#include <linux/sunrpc/sched.h> 49#include <linux/sunrpc/sched.h>
49#include <linux/nfs4.h> 50#include <linux/nfs4.h>
51#include <linux/sunrpc/xprtsock.h>
50 52
51#define NFSDDBG_FACILITY NFSDDBG_PROC 53#define NFSDDBG_FACILITY NFSDDBG_PROC
52 54
53#define NFSPROC4_CB_NULL 0 55#define NFSPROC4_CB_NULL 0
54#define NFSPROC4_CB_COMPOUND 1 56#define NFSPROC4_CB_COMPOUND 1
57#define NFS4_STATEID_SIZE 16
55 58
56/* Index of predefined Linux callback client operations */ 59/* Index of predefined Linux callback client operations */
57 60
58enum { 61enum {
59 NFSPROC4_CLNT_CB_NULL = 0, 62 NFSPROC4_CLNT_CB_NULL = 0,
60 NFSPROC4_CLNT_CB_RECALL, 63 NFSPROC4_CLNT_CB_RECALL,
64 NFSPROC4_CLNT_CB_SEQUENCE,
61}; 65};
62 66
63enum nfs_cb_opnum4 { 67enum nfs_cb_opnum4 {
64 OP_CB_RECALL = 4, 68 OP_CB_RECALL = 4,
69 OP_CB_SEQUENCE = 11,
65}; 70};
66 71
67#define NFS4_MAXTAGLEN 20 72#define NFS4_MAXTAGLEN 20
@@ -70,17 +75,29 @@ enum nfs_cb_opnum4 {
70#define NFS4_dec_cb_null_sz 0 75#define NFS4_dec_cb_null_sz 0
71#define cb_compound_enc_hdr_sz 4 76#define cb_compound_enc_hdr_sz 4
72#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) 77#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2))
78#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2)
79#define cb_sequence_enc_sz (sessionid_sz + 4 + \
80 1 /* no referring calls list yet */)
81#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4)
82
73#define op_enc_sz 1 83#define op_enc_sz 1
74#define op_dec_sz 2 84#define op_dec_sz 2
75#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) 85#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2))
76#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) 86#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2)
77#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ 87#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \
88 cb_sequence_enc_sz + \
78 1 + enc_stateid_sz + \ 89 1 + enc_stateid_sz + \
79 enc_nfs4_fh_sz) 90 enc_nfs4_fh_sz)
80 91
81#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ 92#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \
93 cb_sequence_dec_sz + \
82 op_dec_sz) 94 op_dec_sz)
83 95
96struct nfs4_rpc_args {
97 void *args_op;
98 struct nfsd4_cb_sequence args_seq;
99};
100
84/* 101/*
85* Generic encode routines from fs/nfs/nfs4xdr.c 102* Generic encode routines from fs/nfs/nfs4xdr.c
86*/ 103*/
@@ -137,11 +154,13 @@ xdr_error: \
137} while (0) 154} while (0)
138 155
139struct nfs4_cb_compound_hdr { 156struct nfs4_cb_compound_hdr {
140 int status; 157 /* args */
141 u32 ident; 158 u32 ident; /* minorversion 0 only */
142 u32 nops; 159 u32 nops;
143 __be32 *nops_p; 160 __be32 *nops_p;
144 u32 minorversion; 161 u32 minorversion;
162 /* res */
163 int status;
145 u32 taglen; 164 u32 taglen;
146 char *tag; 165 char *tag;
147}; 166};
@@ -238,6 +257,27 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
238 hdr->nops++; 257 hdr->nops++;
239} 258}
240 259
260static void
261encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
262 struct nfs4_cb_compound_hdr *hdr)
263{
264 __be32 *p;
265
266 if (hdr->minorversion == 0)
267 return;
268
269 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
270
271 WRITE32(OP_CB_SEQUENCE);
272 WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN);
273 WRITE32(args->cbs_clp->cl_cb_seq_nr);
274 WRITE32(0); /* slotid, always 0 */
275 WRITE32(0); /* highest slotid always 0 */
276 WRITE32(0); /* cachethis always 0 */
277 WRITE32(0); /* FIXME: support referring_call_lists */
278 hdr->nops++;
279}
280
241static int 281static int
242nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) 282nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
243{ 283{
@@ -249,15 +289,19 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
249} 289}
250 290
251static int 291static int
252nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args) 292nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
293 struct nfs4_rpc_args *rpc_args)
253{ 294{
254 struct xdr_stream xdr; 295 struct xdr_stream xdr;
296 struct nfs4_delegation *args = rpc_args->args_op;
255 struct nfs4_cb_compound_hdr hdr = { 297 struct nfs4_cb_compound_hdr hdr = {
256 .ident = args->dl_ident, 298 .ident = args->dl_ident,
299 .minorversion = rpc_args->args_seq.cbs_minorversion,
257 }; 300 };
258 301
259 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 302 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
260 encode_cb_compound_hdr(&xdr, &hdr); 303 encode_cb_compound_hdr(&xdr, &hdr);
304 encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
261 encode_cb_recall(&xdr, args, &hdr); 305 encode_cb_recall(&xdr, args, &hdr);
262 encode_cb_nops(&hdr); 306 encode_cb_nops(&hdr);
263 return 0; 307 return 0;
@@ -299,6 +343,57 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
299 return 0; 343 return 0;
300} 344}
301 345
346/*
347 * Our current back channel implmentation supports a single backchannel
348 * with a single slot.
349 */
350static int
351decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
352 struct rpc_rqst *rqstp)
353{
354 struct nfs4_sessionid id;
355 int status;
356 u32 dummy;
357 __be32 *p;
358
359 if (res->cbs_minorversion == 0)
360 return 0;
361
362 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
363 if (status)
364 return status;
365
366 /*
367 * If the server returns different values for sessionID, slotID or
368 * sequence number, the server is looney tunes.
369 */
370 status = -ESERVERFAULT;
371
372 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
373 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
374 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
375 if (memcmp(id.data, res->cbs_clp->cl_sessionid.data,
376 NFS4_MAX_SESSIONID_LEN)) {
377 dprintk("%s Invalid session id\n", __func__);
378 goto out;
379 }
380 READ32(dummy);
381 if (dummy != res->cbs_clp->cl_cb_seq_nr) {
382 dprintk("%s Invalid sequence number\n", __func__);
383 goto out;
384 }
385 READ32(dummy); /* slotid must be 0 */
386 if (dummy != 0) {
387 dprintk("%s Invalid slotid\n", __func__);
388 goto out;
389 }
390 /* FIXME: process highest slotid and target highest slotid */
391 status = 0;
392out:
393 return status;
394}
395
396
302static int 397static int
303nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) 398nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
304{ 399{
@@ -306,7 +401,8 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
306} 401}
307 402
308static int 403static int
309nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p) 404nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
405 struct nfsd4_cb_sequence *seq)
310{ 406{
311 struct xdr_stream xdr; 407 struct xdr_stream xdr;
312 struct nfs4_cb_compound_hdr hdr; 408 struct nfs4_cb_compound_hdr hdr;
@@ -316,6 +412,11 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p)
316 status = decode_cb_compound_hdr(&xdr, &hdr); 412 status = decode_cb_compound_hdr(&xdr, &hdr);
317 if (status) 413 if (status)
318 goto out; 414 goto out;
415 if (seq) {
416 status = decode_cb_sequence(&xdr, seq, rqstp);
417 if (status)
418 goto out;
419 }
319 status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); 420 status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
320out: 421out:
321 return status; 422 return status;
@@ -377,16 +478,15 @@ static int max_cb_time(void)
377 478
378int setup_callback_client(struct nfs4_client *clp) 479int setup_callback_client(struct nfs4_client *clp)
379{ 480{
380 struct sockaddr_in addr;
381 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 481 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
382 struct rpc_timeout timeparms = { 482 struct rpc_timeout timeparms = {
383 .to_initval = max_cb_time(), 483 .to_initval = max_cb_time(),
384 .to_retries = 0, 484 .to_retries = 0,
385 }; 485 };
386 struct rpc_create_args args = { 486 struct rpc_create_args args = {
387 .protocol = IPPROTO_TCP, 487 .protocol = XPRT_TRANSPORT_TCP,
388 .address = (struct sockaddr *)&addr, 488 .address = (struct sockaddr *) &cb->cb_addr,
389 .addrsize = sizeof(addr), 489 .addrsize = cb->cb_addrlen,
390 .timeout = &timeparms, 490 .timeout = &timeparms,
391 .program = &cb_program, 491 .program = &cb_program,
392 .prognumber = cb->cb_prog, 492 .prognumber = cb->cb_prog,
@@ -399,13 +499,10 @@ int setup_callback_client(struct nfs4_client *clp)
399 499
400 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 500 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
401 return -EINVAL; 501 return -EINVAL;
402 502 if (cb->cb_minorversion) {
403 /* Initialize address */ 503 args.bc_xprt = clp->cl_cb_xprt;
404 memset(&addr, 0, sizeof(addr)); 504 args.protocol = XPRT_TRANSPORT_BC_TCP;
405 addr.sin_family = AF_INET; 505 }
406 addr.sin_port = htons(cb->cb_port);
407 addr.sin_addr.s_addr = htonl(cb->cb_addr);
408
409 /* Create RPC client */ 506 /* Create RPC client */
410 client = rpc_create(&args); 507 client = rpc_create(&args);
411 if (IS_ERR(client)) { 508 if (IS_ERR(client)) {
@@ -439,42 +536,29 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
439 .rpc_call_done = nfsd4_cb_probe_done, 536 .rpc_call_done = nfsd4_cb_probe_done,
440}; 537};
441 538
442static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb) 539static struct rpc_cred *callback_cred;
443{
444 struct auth_cred acred = {
445 .machine_cred = 1
446 };
447 540
448 /* 541int set_callback_cred(void)
449 * Note in the gss case this doesn't actually have to wait for a 542{
450 * gss upcall (or any calls to the client); this just creates a 543 callback_cred = rpc_lookup_machine_cred();
451 * non-uptodate cred which the rpc state machine will fill in with 544 if (!callback_cred)
452 * a refresh_upcall later. 545 return -ENOMEM;
453 */ 546 return 0;
454 return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred,
455 RPCAUTH_LOOKUP_NEW);
456} 547}
457 548
549
458void do_probe_callback(struct nfs4_client *clp) 550void do_probe_callback(struct nfs4_client *clp)
459{ 551{
460 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 552 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
461 struct rpc_message msg = { 553 struct rpc_message msg = {
462 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 554 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
463 .rpc_argp = clp, 555 .rpc_argp = clp,
556 .rpc_cred = callback_cred
464 }; 557 };
465 struct rpc_cred *cred;
466 int status; 558 int status;
467 559
468 cred = lookup_cb_cred(cb);
469 if (IS_ERR(cred)) {
470 status = PTR_ERR(cred);
471 goto out;
472 }
473 cb->cb_cred = cred;
474 msg.rpc_cred = cb->cb_cred;
475 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, 560 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT,
476 &nfsd4_cb_probe_ops, (void *)clp); 561 &nfsd4_cb_probe_ops, (void *)clp);
477out:
478 if (status) { 562 if (status) {
479 warn_no_callback_path(clp, status); 563 warn_no_callback_path(clp, status);
480 put_nfs4_client(clp); 564 put_nfs4_client(clp);
@@ -503,11 +587,95 @@ nfsd4_probe_callback(struct nfs4_client *clp)
503 do_probe_callback(clp); 587 do_probe_callback(clp);
504} 588}
505 589
590/*
591 * There's currently a single callback channel slot.
592 * If the slot is available, then mark it busy. Otherwise, set the
593 * thread for sleeping on the callback RPC wait queue.
594 */
595static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
596 struct rpc_task *task)
597{
598 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
599 u32 *ptr = (u32 *)clp->cl_sessionid.data;
600 int status = 0;
601
602 dprintk("%s: %u:%u:%u:%u\n", __func__,
603 ptr[0], ptr[1], ptr[2], ptr[3]);
604
605 if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
606 rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
607 dprintk("%s slot is busy\n", __func__);
608 status = -EAGAIN;
609 goto out;
610 }
611
612 /*
613 * We'll need the clp during XDR encoding and decoding,
614 * and the sequence during decoding to verify the reply
615 */
616 args->args_seq.cbs_clp = clp;
617 task->tk_msg.rpc_resp = &args->args_seq;
618
619out:
620 dprintk("%s status=%d\n", __func__, status);
621 return status;
622}
623
624/*
625 * TODO: cb_sequence should support referring call lists, cachethis, multiple
626 * slots, and mark callback channel down on communication errors.
627 */
628static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
629{
630 struct nfs4_delegation *dp = calldata;
631 struct nfs4_client *clp = dp->dl_client;
632 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
633 u32 minorversion = clp->cl_cb_conn.cb_minorversion;
634 int status = 0;
635
636 args->args_seq.cbs_minorversion = minorversion;
637 if (minorversion) {
638 status = nfsd41_cb_setup_sequence(clp, task);
639 if (status) {
640 if (status != -EAGAIN) {
641 /* terminate rpc task */
642 task->tk_status = status;
643 task->tk_action = NULL;
644 }
645 return;
646 }
647 }
648 rpc_call_start(task);
649}
650
651static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
652{
653 struct nfs4_delegation *dp = calldata;
654 struct nfs4_client *clp = dp->dl_client;
655
656 dprintk("%s: minorversion=%d\n", __func__,
657 clp->cl_cb_conn.cb_minorversion);
658
659 if (clp->cl_cb_conn.cb_minorversion) {
660 /* No need for lock, access serialized in nfsd4_cb_prepare */
661 ++clp->cl_cb_seq_nr;
662 clear_bit(0, &clp->cl_cb_slot_busy);
663 rpc_wake_up_next(&clp->cl_cb_waitq);
664 dprintk("%s: freed slot, new seqid=%d\n", __func__,
665 clp->cl_cb_seq_nr);
666
667 /* We're done looking into the sequence information */
668 task->tk_msg.rpc_resp = NULL;
669 }
670}
671
506static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 672static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
507{ 673{
508 struct nfs4_delegation *dp = calldata; 674 struct nfs4_delegation *dp = calldata;
509 struct nfs4_client *clp = dp->dl_client; 675 struct nfs4_client *clp = dp->dl_client;
510 676
677 nfsd4_cb_done(task, calldata);
678
511 switch (task->tk_status) { 679 switch (task->tk_status) {
512 case -EIO: 680 case -EIO:
513 /* Network partition? */ 681 /* Network partition? */
@@ -520,16 +688,19 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
520 break; 688 break;
521 default: 689 default:
522 /* success, or error we can't handle */ 690 /* success, or error we can't handle */
523 return; 691 goto done;
524 } 692 }
525 if (dp->dl_retries--) { 693 if (dp->dl_retries--) {
526 rpc_delay(task, 2*HZ); 694 rpc_delay(task, 2*HZ);
527 task->tk_status = 0; 695 task->tk_status = 0;
528 rpc_restart_call(task); 696 rpc_restart_call(task);
697 return;
529 } else { 698 } else {
530 atomic_set(&clp->cl_cb_conn.cb_set, 0); 699 atomic_set(&clp->cl_cb_conn.cb_set, 0);
531 warn_no_callback_path(clp, task->tk_status); 700 warn_no_callback_path(clp, task->tk_status);
532 } 701 }
702done:
703 kfree(task->tk_msg.rpc_argp);
533} 704}
534 705
535static void nfsd4_cb_recall_release(void *calldata) 706static void nfsd4_cb_recall_release(void *calldata)
@@ -542,6 +713,7 @@ static void nfsd4_cb_recall_release(void *calldata)
542} 713}
543 714
544static const struct rpc_call_ops nfsd4_cb_recall_ops = { 715static const struct rpc_call_ops nfsd4_cb_recall_ops = {
716 .rpc_call_prepare = nfsd4_cb_prepare,
545 .rpc_call_done = nfsd4_cb_recall_done, 717 .rpc_call_done = nfsd4_cb_recall_done,
546 .rpc_release = nfsd4_cb_recall_release, 718 .rpc_release = nfsd4_cb_recall_release,
547}; 719};
@@ -554,17 +726,24 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
554{ 726{
555 struct nfs4_client *clp = dp->dl_client; 727 struct nfs4_client *clp = dp->dl_client;
556 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; 728 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
729 struct nfs4_rpc_args *args;
557 struct rpc_message msg = { 730 struct rpc_message msg = {
558 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], 731 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
559 .rpc_argp = dp, 732 .rpc_cred = callback_cred
560 .rpc_cred = clp->cl_cb_conn.cb_cred
561 }; 733 };
562 int status; 734 int status = -ENOMEM;
563 735
736 args = kzalloc(sizeof(*args), GFP_KERNEL);
737 if (!args)
738 goto out;
739 args->args_op = dp;
740 msg.rpc_argp = args;
564 dp->dl_retries = 1; 741 dp->dl_retries = 1;
565 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, 742 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
566 &nfsd4_cb_recall_ops, dp); 743 &nfsd4_cb_recall_ops, dp);
744out:
567 if (status) { 745 if (status) {
746 kfree(args);
568 put_nfs4_client(clp); 747 put_nfs4_client(clp);
569 nfs4_put_delegation(dp); 748 nfs4_put_delegation(dp);
570 } 749 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7c8801769a3c..bebc0c2e1b0a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -68,7 +68,6 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
68 u32 *bmval, u32 *writable) 68 u32 *bmval, u32 *writable)
69{ 69{
70 struct dentry *dentry = cstate->current_fh.fh_dentry; 70 struct dentry *dentry = cstate->current_fh.fh_dentry;
71 struct svc_export *exp = cstate->current_fh.fh_export;
72 71
73 /* 72 /*
74 * Check about attributes are supported by the NFSv4 server or not. 73 * Check about attributes are supported by the NFSv4 server or not.
@@ -80,17 +79,13 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
80 return nfserr_attrnotsupp; 79 return nfserr_attrnotsupp;
81 80
82 /* 81 /*
83 * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported 82 * Check FATTR4_WORD0_ACL can be supported
84 * in current environment or not. 83 * in current environment or not.
85 */ 84 */
86 if (bmval[0] & FATTR4_WORD0_ACL) { 85 if (bmval[0] & FATTR4_WORD0_ACL) {
87 if (!IS_POSIXACL(dentry->d_inode)) 86 if (!IS_POSIXACL(dentry->d_inode))
88 return nfserr_attrnotsupp; 87 return nfserr_attrnotsupp;
89 } 88 }
90 if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) {
91 if (exp->ex_fslocs.locations == NULL)
92 return nfserr_attrnotsupp;
93 }
94 89
95 /* 90 /*
96 * According to spec, read-only attributes return ERR_INVAL. 91 * According to spec, read-only attributes return ERR_INVAL.
@@ -123,6 +118,35 @@ nfsd4_check_open_attributes(struct svc_rqst *rqstp,
123 return status; 118 return status;
124} 119}
125 120
121static int
122is_create_with_attrs(struct nfsd4_open *open)
123{
124 return open->op_create == NFS4_OPEN_CREATE
125 && (open->op_createmode == NFS4_CREATE_UNCHECKED
126 || open->op_createmode == NFS4_CREATE_GUARDED
127 || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1);
128}
129
130/*
131 * if error occurs when setting the acl, just clear the acl bit
132 * in the returned attr bitmap.
133 */
134static void
135do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
136 struct nfs4_acl *acl, u32 *bmval)
137{
138 __be32 status;
139
140 status = nfsd4_set_nfs4_acl(rqstp, fhp, acl);
141 if (status)
142 /*
143 * We should probably fail the whole open at this point,
144 * but we've already created the file, so it's too late;
145 * So this seems the least of evils:
146 */
147 bmval[0] &= ~FATTR4_WORD0_ACL;
148}
149
126static inline void 150static inline void
127fh_dup2(struct svc_fh *dst, struct svc_fh *src) 151fh_dup2(struct svc_fh *dst, struct svc_fh *src)
128{ 152{
@@ -206,6 +230,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
206 if (status) 230 if (status)
207 goto out; 231 goto out;
208 232
233 if (is_create_with_attrs(open) && open->op_acl != NULL)
234 do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval);
235
209 set_change_info(&open->op_cinfo, current_fh); 236 set_change_info(&open->op_cinfo, current_fh);
210 fh_dup2(current_fh, &resfh); 237 fh_dup2(current_fh, &resfh);
211 238
@@ -536,12 +563,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
536 status = nfserr_badtype; 563 status = nfserr_badtype;
537 } 564 }
538 565
539 if (!status) { 566 if (status)
540 fh_unlock(&cstate->current_fh); 567 goto out;
541 set_change_info(&create->cr_cinfo, &cstate->current_fh); 568
542 fh_dup2(&cstate->current_fh, &resfh); 569 if (create->cr_acl != NULL)
543 } 570 do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
571 create->cr_bmval);
544 572
573 fh_unlock(&cstate->current_fh);
574 set_change_info(&create->cr_cinfo, &cstate->current_fh);
575 fh_dup2(&cstate->current_fh, &resfh);
576out:
545 fh_put(&resfh); 577 fh_put(&resfh);
546 return status; 578 return status;
547} 579}
@@ -947,34 +979,6 @@ static struct nfsd4_operation nfsd4_ops[];
947static const char *nfsd4_op_name(unsigned opnum); 979static const char *nfsd4_op_name(unsigned opnum);
948 980
949/* 981/*
950 * This is a replay of a compound for which no cache entry pages
951 * were used. Encode the sequence operation, and if cachethis is FALSE
952 * encode the uncache rep error on the next operation.
953 */
954static __be32
955nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args,
956 struct nfsd4_compoundres *resp)
957{
958 struct nfsd4_op *op;
959
960 dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__,
961 resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis);
962
963 /* Encode the replayed sequence operation */
964 BUG_ON(resp->opcnt != 1);
965 op = &args->ops[resp->opcnt - 1];
966 nfsd4_encode_operation(resp, op);
967
968 /*return nfserr_retry_uncached_rep in next operation. */
969 if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) {
970 op = &args->ops[resp->opcnt++];
971 op->status = nfserr_retry_uncached_rep;
972 nfsd4_encode_operation(resp, op);
973 }
974 return op->status;
975}
976
977/*
978 * Enforce NFSv4.1 COMPOUND ordering rules. 982 * Enforce NFSv4.1 COMPOUND ordering rules.
979 * 983 *
980 * TODO: 984 * TODO:
@@ -1083,13 +1087,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1083 BUG_ON(op->status == nfs_ok); 1087 BUG_ON(op->status == nfs_ok);
1084 1088
1085encode_op: 1089encode_op:
1086 /* Only from SEQUENCE or CREATE_SESSION */ 1090 /* Only from SEQUENCE */
1087 if (resp->cstate.status == nfserr_replay_cache) { 1091 if (resp->cstate.status == nfserr_replay_cache) {
1088 dprintk("%s NFS4.1 replay from cache\n", __func__); 1092 dprintk("%s NFS4.1 replay from cache\n", __func__);
1089 if (nfsd4_not_cached(resp)) 1093 status = op->status;
1090 status = nfsd4_enc_uncached_replay(args, resp);
1091 else
1092 status = op->status;
1093 goto out; 1094 goto out;
1094 } 1095 }
1095 if (op->status == nfserr_replay_me) { 1096 if (op->status == nfserr_replay_me) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 766d3d544544..2153f9bdbebd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -55,6 +55,7 @@
55#include <linux/lockd/bind.h> 55#include <linux/lockd/bind.h>
56#include <linux/module.h> 56#include <linux/module.h>
57#include <linux/sunrpc/svcauth_gss.h> 57#include <linux/sunrpc/svcauth_gss.h>
58#include <linux/sunrpc/clnt.h>
58 59
59#define NFSDDBG_FACILITY NFSDDBG_PROC 60#define NFSDDBG_FACILITY NFSDDBG_PROC
60 61
@@ -413,36 +414,65 @@ gen_sessionid(struct nfsd4_session *ses)
413} 414}
414 415
415/* 416/*
416 * Give the client the number of slots it requests bound by 417 * The protocol defines ca_maxresponssize_cached to include the size of
417 * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. 418 * the rpc header, but all we need to cache is the data starting after
419 * the end of the initial SEQUENCE operation--the rest we regenerate
420 * each time. Therefore we can advertise a ca_maxresponssize_cached
421 * value that is the number of bytes in our cache plus a few additional
422 * bytes. In order to stay on the safe side, and not promise more than
423 * we can cache, those additional bytes must be the minimum possible: 24
424 * bytes of rpc header (xid through accept state, with AUTH_NULL
425 * verifier), 12 for the compound header (with zero-length tag), and 44
426 * for the SEQUENCE op response:
427 */
428#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
429
430/*
431 * Give the client the number of ca_maxresponsesize_cached slots it
432 * requests, of size bounded by NFSD_SLOT_CACHE_SIZE,
433 * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more
434 * than NFSD_MAX_SLOTS_PER_SESSION.
418 * 435 *
419 * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we 436 * If we run out of reserved DRC memory we should (up to a point)
420 * should (up to a point) re-negotiate active sessions and reduce their 437 * re-negotiate active sessions and reduce their slot usage to make
421 * slot usage to make rooom for new connections. For now we just fail the 438 * rooom for new connections. For now we just fail the create session.
422 * create session.
423 */ 439 */
424static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) 440static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
425{ 441{
426 int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; 442 int mem, size = fchan->maxresp_cached;
427 443
428 if (fchan->maxreqs < 1) 444 if (fchan->maxreqs < 1)
429 return nfserr_inval; 445 return nfserr_inval;
430 else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
431 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
432 446
433 spin_lock(&nfsd_serv->sv_lock); 447 if (size < NFSD_MIN_HDR_SEQ_SZ)
434 if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) 448 size = NFSD_MIN_HDR_SEQ_SZ;
435 np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; 449 size -= NFSD_MIN_HDR_SEQ_SZ;
436 nfsd_serv->sv_drc_pages_used += np; 450 if (size > NFSD_SLOT_CACHE_SIZE)
437 spin_unlock(&nfsd_serv->sv_lock); 451 size = NFSD_SLOT_CACHE_SIZE;
452
453 /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
454 mem = fchan->maxreqs * size;
455 if (mem > NFSD_MAX_MEM_PER_SESSION) {
456 fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
457 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
458 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
459 mem = fchan->maxreqs * size;
460 }
438 461
439 if (np <= 0) { 462 spin_lock(&nfsd_drc_lock);
440 status = nfserr_resource; 463 /* bound the total session drc memory ussage */
441 fchan->maxreqs = 0; 464 if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
442 } else 465 fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
443 fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; 466 mem = fchan->maxreqs * size;
467 }
468 nfsd_drc_mem_used += mem;
469 spin_unlock(&nfsd_drc_lock);
444 470
445 return status; 471 if (fchan->maxreqs == 0)
472 return nfserr_serverfault;
473
474 fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ;
475 return 0;
446} 476}
447 477
448/* 478/*
@@ -466,36 +496,41 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
466 fchan->maxresp_sz = maxcount; 496 fchan->maxresp_sz = maxcount;
467 session_fchan->maxresp_sz = fchan->maxresp_sz; 497 session_fchan->maxresp_sz = fchan->maxresp_sz;
468 498
469 /* Set the max response cached size our default which is
470 * a multiple of PAGE_SIZE and small */
471 session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
472 fchan->maxresp_cached = session_fchan->maxresp_cached;
473
474 /* Use the client's maxops if possible */ 499 /* Use the client's maxops if possible */
475 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) 500 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
476 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; 501 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
477 session_fchan->maxops = fchan->maxops; 502 session_fchan->maxops = fchan->maxops;
478 503
479 /* try to use the client requested number of slots */
480 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
481 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
482
483 /* FIXME: Error means no more DRC pages so the server should 504 /* FIXME: Error means no more DRC pages so the server should
484 * recover pages from existing sessions. For now fail session 505 * recover pages from existing sessions. For now fail session
485 * creation. 506 * creation.
486 */ 507 */
487 status = set_forechannel_maxreqs(fchan); 508 status = set_forechannel_drc_size(fchan);
488 509
510 session_fchan->maxresp_cached = fchan->maxresp_cached;
489 session_fchan->maxreqs = fchan->maxreqs; 511 session_fchan->maxreqs = fchan->maxreqs;
512
513 dprintk("%s status %d\n", __func__, status);
490 return status; 514 return status;
491} 515}
492 516
517static void
518free_session_slots(struct nfsd4_session *ses)
519{
520 int i;
521
522 for (i = 0; i < ses->se_fchannel.maxreqs; i++)
523 kfree(ses->se_slots[i]);
524}
525
493static int 526static int
494alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, 527alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
495 struct nfsd4_create_session *cses) 528 struct nfsd4_create_session *cses)
496{ 529{
497 struct nfsd4_session *new, tmp; 530 struct nfsd4_session *new, tmp;
498 int idx, status = nfserr_resource, slotsize; 531 struct nfsd4_slot *sp;
532 int idx, slotsize, cachesize, i;
533 int status;
499 534
500 memset(&tmp, 0, sizeof(tmp)); 535 memset(&tmp, 0, sizeof(tmp));
501 536
@@ -506,14 +541,27 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
506 if (status) 541 if (status)
507 goto out; 542 goto out;
508 543
509 /* allocate struct nfsd4_session and slot table in one piece */ 544 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot)
510 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot); 545 + sizeof(struct nfsd4_session) > PAGE_SIZE);
546
547 status = nfserr_serverfault;
548 /* allocate struct nfsd4_session and slot table pointers in one piece */
549 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
511 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); 550 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
512 if (!new) 551 if (!new)
513 goto out; 552 goto out;
514 553
515 memcpy(new, &tmp, sizeof(*new)); 554 memcpy(new, &tmp, sizeof(*new));
516 555
556 /* allocate each struct nfsd4_slot and data cache in one piece */
557 cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
558 for (i = 0; i < new->se_fchannel.maxreqs; i++) {
559 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
560 if (!sp)
561 goto out_free;
562 new->se_slots[i] = sp;
563 }
564
517 new->se_client = clp; 565 new->se_client = clp;
518 gen_sessionid(new); 566 gen_sessionid(new);
519 idx = hash_sessionid(&new->se_sessionid); 567 idx = hash_sessionid(&new->se_sessionid);
@@ -530,6 +578,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
530 status = nfs_ok; 578 status = nfs_ok;
531out: 579out:
532 return status; 580 return status;
581out_free:
582 free_session_slots(new);
583 kfree(new);
584 goto out;
533} 585}
534 586
535/* caller must hold sessionid_lock */ 587/* caller must hold sessionid_lock */
@@ -572,19 +624,16 @@ release_session(struct nfsd4_session *ses)
572 nfsd4_put_session(ses); 624 nfsd4_put_session(ses);
573} 625}
574 626
575static void nfsd4_release_respages(struct page **respages, short resused);
576
577void 627void
578free_session(struct kref *kref) 628free_session(struct kref *kref)
579{ 629{
580 struct nfsd4_session *ses; 630 struct nfsd4_session *ses;
581 int i;
582 631
583 ses = container_of(kref, struct nfsd4_session, se_ref); 632 ses = container_of(kref, struct nfsd4_session, se_ref);
584 for (i = 0; i < ses->se_fchannel.maxreqs; i++) { 633 spin_lock(&nfsd_drc_lock);
585 struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; 634 nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
586 nfsd4_release_respages(e->ce_respages, e->ce_resused); 635 spin_unlock(&nfsd_drc_lock);
587 } 636 free_session_slots(ses);
588 kfree(ses); 637 kfree(ses);
589} 638}
590 639
@@ -647,18 +696,14 @@ shutdown_callback_client(struct nfs4_client *clp)
647 clp->cl_cb_conn.cb_client = NULL; 696 clp->cl_cb_conn.cb_client = NULL;
648 rpc_shutdown_client(clnt); 697 rpc_shutdown_client(clnt);
649 } 698 }
650 if (clp->cl_cb_conn.cb_cred) {
651 put_rpccred(clp->cl_cb_conn.cb_cred);
652 clp->cl_cb_conn.cb_cred = NULL;
653 }
654} 699}
655 700
656static inline void 701static inline void
657free_client(struct nfs4_client *clp) 702free_client(struct nfs4_client *clp)
658{ 703{
659 shutdown_callback_client(clp); 704 shutdown_callback_client(clp);
660 nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, 705 if (clp->cl_cb_xprt)
661 clp->cl_slot.sl_cache_entry.ce_resused); 706 svc_xprt_put(clp->cl_cb_xprt);
662 if (clp->cl_cred.cr_group_info) 707 if (clp->cl_cred.cr_group_info)
663 put_group_info(clp->cl_cred.cr_group_info); 708 put_group_info(clp->cl_cred.cr_group_info);
664 kfree(clp->cl_principal); 709 kfree(clp->cl_principal);
@@ -714,25 +759,6 @@ expire_client(struct nfs4_client *clp)
714 put_nfs4_client(clp); 759 put_nfs4_client(clp);
715} 760}
716 761
717static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
718{
719 struct nfs4_client *clp;
720
721 clp = alloc_client(name);
722 if (clp == NULL)
723 return NULL;
724 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
725 atomic_set(&clp->cl_count, 1);
726 atomic_set(&clp->cl_cb_conn.cb_set, 0);
727 INIT_LIST_HEAD(&clp->cl_idhash);
728 INIT_LIST_HEAD(&clp->cl_strhash);
729 INIT_LIST_HEAD(&clp->cl_openowners);
730 INIT_LIST_HEAD(&clp->cl_delegations);
731 INIT_LIST_HEAD(&clp->cl_sessions);
732 INIT_LIST_HEAD(&clp->cl_lru);
733 return clp;
734}
735
736static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) 762static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
737{ 763{
738 memcpy(target->cl_verifier.data, source->data, 764 memcpy(target->cl_verifier.data, source->data,
@@ -795,6 +821,46 @@ static void gen_confirm(struct nfs4_client *clp)
795 *p++ = i++; 821 *p++ = i++;
796} 822}
797 823
824static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
825 struct svc_rqst *rqstp, nfs4_verifier *verf)
826{
827 struct nfs4_client *clp;
828 struct sockaddr *sa = svc_addr(rqstp);
829 char *princ;
830
831 clp = alloc_client(name);
832 if (clp == NULL)
833 return NULL;
834
835 princ = svc_gss_principal(rqstp);
836 if (princ) {
837 clp->cl_principal = kstrdup(princ, GFP_KERNEL);
838 if (clp->cl_principal == NULL) {
839 free_client(clp);
840 return NULL;
841 }
842 }
843
844 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
845 atomic_set(&clp->cl_count, 1);
846 atomic_set(&clp->cl_cb_conn.cb_set, 0);
847 INIT_LIST_HEAD(&clp->cl_idhash);
848 INIT_LIST_HEAD(&clp->cl_strhash);
849 INIT_LIST_HEAD(&clp->cl_openowners);
850 INIT_LIST_HEAD(&clp->cl_delegations);
851 INIT_LIST_HEAD(&clp->cl_sessions);
852 INIT_LIST_HEAD(&clp->cl_lru);
853 clear_bit(0, &clp->cl_cb_slot_busy);
854 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
855 copy_verf(clp, verf);
856 rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
857 clp->cl_flavor = rqstp->rq_flavor;
858 copy_cred(&clp->cl_cred, &rqstp->rq_cred);
859 gen_confirm(clp);
860
861 return clp;
862}
863
798static int check_name(struct xdr_netobj name) 864static int check_name(struct xdr_netobj name)
799{ 865{
800 if (name.len == 0) 866 if (name.len == 0)
@@ -902,93 +968,40 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
902 return NULL; 968 return NULL;
903} 969}
904 970
905/* a helper function for parse_callback */
906static int
907parse_octet(unsigned int *lenp, char **addrp)
908{
909 unsigned int len = *lenp;
910 char *p = *addrp;
911 int n = -1;
912 char c;
913
914 for (;;) {
915 if (!len)
916 break;
917 len--;
918 c = *p++;
919 if (c == '.')
920 break;
921 if ((c < '0') || (c > '9')) {
922 n = -1;
923 break;
924 }
925 if (n < 0)
926 n = 0;
927 n = (n * 10) + (c - '0');
928 if (n > 255) {
929 n = -1;
930 break;
931 }
932 }
933 *lenp = len;
934 *addrp = p;
935 return n;
936}
937
938/* parse and set the setclientid ipv4 callback address */
939static int
940parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
941{
942 int temp = 0;
943 u32 cbaddr = 0;
944 u16 cbport = 0;
945 u32 addrlen = addr_len;
946 char *addr = addr_val;
947 int i, shift;
948
949 /* ipaddress */
950 shift = 24;
951 for(i = 4; i > 0 ; i--) {
952 if ((temp = parse_octet(&addrlen, &addr)) < 0) {
953 return 0;
954 }
955 cbaddr |= (temp << shift);
956 if (shift > 0)
957 shift -= 8;
958 }
959 *cbaddrp = cbaddr;
960
961 /* port */
962 shift = 8;
963 for(i = 2; i > 0 ; i--) {
964 if ((temp = parse_octet(&addrlen, &addr)) < 0) {
965 return 0;
966 }
967 cbport |= (temp << shift);
968 if (shift > 0)
969 shift -= 8;
970 }
971 *cbportp = cbport;
972 return 1;
973}
974
975static void 971static void
976gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) 972gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
977{ 973{
978 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 974 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
979 975 unsigned short expected_family;
980 /* Currently, we only support tcp for the callback channel */ 976
981 if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) 977 /* Currently, we only support tcp and tcp6 for the callback channel */
978 if (se->se_callback_netid_len == 3 &&
979 !memcmp(se->se_callback_netid_val, "tcp", 3))
980 expected_family = AF_INET;
981 else if (se->se_callback_netid_len == 4 &&
982 !memcmp(se->se_callback_netid_val, "tcp6", 4))
983 expected_family = AF_INET6;
984 else
982 goto out_err; 985 goto out_err;
983 986
984 if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, 987 cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
985 &cb->cb_addr, &cb->cb_port))) 988 se->se_callback_addr_len,
989 (struct sockaddr *) &cb->cb_addr,
990 sizeof(cb->cb_addr));
991
992 if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family)
986 goto out_err; 993 goto out_err;
994
995 if (cb->cb_addr.ss_family == AF_INET6)
996 ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid;
997
987 cb->cb_minorversion = 0; 998 cb->cb_minorversion = 0;
988 cb->cb_prog = se->se_callback_prog; 999 cb->cb_prog = se->se_callback_prog;
989 cb->cb_ident = se->se_callback_ident; 1000 cb->cb_ident = se->se_callback_ident;
990 return; 1001 return;
991out_err: 1002out_err:
1003 cb->cb_addr.ss_family = AF_UNSPEC;
1004 cb->cb_addrlen = 0;
992 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " 1005 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
993 "will not receive delegations\n", 1006 "will not receive delegations\n",
994 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 1007 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -996,175 +1009,87 @@ out_err:
996 return; 1009 return;
997} 1010}
998 1011
999void
1000nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
1001{
1002 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1003
1004 resp->cstate.statp = statp;
1005}
1006
1007/* 1012/*
1008 * Dereference the result pages. 1013 * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
1009 */ 1014 */
1010static void 1015void
1011nfsd4_release_respages(struct page **respages, short resused) 1016nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
1012{ 1017{
1013 int i; 1018 struct nfsd4_slot *slot = resp->cstate.slot;
1019 unsigned int base;
1014 1020
1015 dprintk("--> %s\n", __func__); 1021 dprintk("--> %s slot %p\n", __func__, slot);
1016 for (i = 0; i < resused; i++) {
1017 if (!respages[i])
1018 continue;
1019 put_page(respages[i]);
1020 respages[i] = NULL;
1021 }
1022}
1023 1022
1024static void 1023 slot->sl_opcnt = resp->opcnt;
1025nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) 1024 slot->sl_status = resp->cstate.status;
1026{
1027 int i;
1028 1025
1029 for (i = 0; i < count; i++) { 1026 if (nfsd4_not_cached(resp)) {
1030 topages[i] = frompages[i]; 1027 slot->sl_datalen = 0;
1031 if (!topages[i]) 1028 return;
1032 continue;
1033 get_page(topages[i]);
1034 } 1029 }
1030 slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap;
1031 base = (char *)resp->cstate.datap -
1032 (char *)resp->xbuf->head[0].iov_base;
1033 if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
1034 slot->sl_datalen))
1035 WARN("%s: sessions DRC could not cache compound\n", __func__);
1036 return;
1035} 1037}
1036 1038
1037/* 1039/*
1038 * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous 1040 * Encode the replay sequence operation from the slot values.
1039 * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total 1041 * If cachethis is FALSE encode the uncached rep error on the next
1040 * length of the XDR response is less than se_fmaxresp_cached 1042 * operation which sets resp->p and increments resp->opcnt for
1041 * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a 1043 * nfs4svc_encode_compoundres.
1042 * of the reply (e.g. readdir).
1043 * 1044 *
1044 * Store the base and length of the rq_req.head[0] page
1045 * of the NFSv4.1 data, just past the rpc header.
1046 */ 1045 */
1047void 1046static __be32
1048nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) 1047nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
1048 struct nfsd4_compoundres *resp)
1049{ 1049{
1050 struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; 1050 struct nfsd4_op *op;
1051 struct svc_rqst *rqstp = resp->rqstp; 1051 struct nfsd4_slot *slot = resp->cstate.slot;
1052 struct nfsd4_compoundargs *args = rqstp->rq_argp;
1053 struct nfsd4_op *op = &args->ops[resp->opcnt];
1054 struct kvec *resv = &rqstp->rq_res.head[0];
1055
1056 dprintk("--> %s entry %p\n", __func__, entry);
1057
1058 /* Don't cache a failed OP_SEQUENCE. */
1059 if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status)
1060 return;
1061 1052
1062 nfsd4_release_respages(entry->ce_respages, entry->ce_resused); 1053 dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
1063 entry->ce_opcnt = resp->opcnt; 1054 resp->opcnt, resp->cstate.slot->sl_cachethis);
1064 entry->ce_status = resp->cstate.status;
1065 1055
1066 /* 1056 /* Encode the replayed sequence operation */
1067 * Don't need a page to cache just the sequence operation - the slot 1057 op = &args->ops[resp->opcnt - 1];
1068 * does this for us! 1058 nfsd4_encode_operation(resp, op);
1069 */
1070 1059
1071 if (nfsd4_not_cached(resp)) { 1060 /* Return nfserr_retry_uncached_rep in next operation. */
1072 entry->ce_resused = 0; 1061 if (args->opcnt > 1 && slot->sl_cachethis == 0) {
1073 entry->ce_rpchdrlen = 0; 1062 op = &args->ops[resp->opcnt++];
1074 dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__, 1063 op->status = nfserr_retry_uncached_rep;
1075 resp->cstate.slot->sl_cache_entry.ce_cachethis); 1064 nfsd4_encode_operation(resp, op);
1076 return;
1077 }
1078 entry->ce_resused = rqstp->rq_resused;
1079 if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1)
1080 entry->ce_resused = NFSD_PAGES_PER_SLOT + 1;
1081 nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages,
1082 entry->ce_resused);
1083 entry->ce_datav.iov_base = resp->cstate.statp;
1084 entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp -
1085 (char *)page_address(rqstp->rq_respages[0]));
1086 /* Current request rpc header length*/
1087 entry->ce_rpchdrlen = (char *)resp->cstate.statp -
1088 (char *)page_address(rqstp->rq_respages[0]);
1089}
1090
1091/*
1092 * We keep the rpc header, but take the nfs reply from the replycache.
1093 */
1094static int
1095nfsd41_copy_replay_data(struct nfsd4_compoundres *resp,
1096 struct nfsd4_cache_entry *entry)
1097{
1098 struct svc_rqst *rqstp = resp->rqstp;
1099 struct kvec *resv = &resp->rqstp->rq_res.head[0];
1100 int len;
1101
1102 /* Current request rpc header length*/
1103 len = (char *)resp->cstate.statp -
1104 (char *)page_address(rqstp->rq_respages[0]);
1105 if (entry->ce_datav.iov_len + len > PAGE_SIZE) {
1106 dprintk("%s v41 cached reply too large (%Zd).\n", __func__,
1107 entry->ce_datav.iov_len);
1108 return 0;
1109 } 1065 }
1110 /* copy the cached reply nfsd data past the current rpc header */ 1066 return op->status;
1111 memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base,
1112 entry->ce_datav.iov_len);
1113 resv->iov_len = len + entry->ce_datav.iov_len;
1114 return 1;
1115} 1067}
1116 1068
1117/* 1069/*
1118 * Keep the first page of the replay. Copy the NFSv4.1 data from the first 1070 * The sequence operation is not cached because we can use the slot and
1119 * cached page. Replace any futher replay pages from the cache. 1071 * session values.
1120 */ 1072 */
1121__be32 1073__be32
1122nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, 1074nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
1123 struct nfsd4_sequence *seq) 1075 struct nfsd4_sequence *seq)
1124{ 1076{
1125 struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; 1077 struct nfsd4_slot *slot = resp->cstate.slot;
1126 __be32 status; 1078 __be32 status;
1127 1079
1128 dprintk("--> %s entry %p\n", __func__, entry); 1080 dprintk("--> %s slot %p\n", __func__, slot);
1129
1130 /*
1131 * If this is just the sequence operation, we did not keep
1132 * a page in the cache entry because we can just use the
1133 * slot info stored in struct nfsd4_sequence that was checked
1134 * against the slot in nfsd4_sequence().
1135 *
1136 * This occurs when seq->cachethis is FALSE, or when the client
1137 * session inactivity timer fires and a solo sequence operation
1138 * is sent (lease renewal).
1139 */
1140 if (seq && nfsd4_not_cached(resp)) {
1141 seq->maxslots = resp->cstate.session->se_fchannel.maxreqs;
1142 return nfs_ok;
1143 }
1144
1145 if (!nfsd41_copy_replay_data(resp, entry)) {
1146 /*
1147 * Not enough room to use the replay rpc header, send the
1148 * cached header. Release all the allocated result pages.
1149 */
1150 svc_free_res_pages(resp->rqstp);
1151 nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages,
1152 entry->ce_resused);
1153 } else {
1154 /* Release all but the first allocated result page */
1155 1081
1156 resp->rqstp->rq_resused--; 1082 /* Either returns 0 or nfserr_retry_uncached */
1157 svc_free_res_pages(resp->rqstp); 1083 status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
1084 if (status == nfserr_retry_uncached_rep)
1085 return status;
1158 1086
1159 nfsd4_copy_pages(&resp->rqstp->rq_respages[1], 1087 /* The sequence operation has been encoded, cstate->datap set. */
1160 &entry->ce_respages[1], 1088 memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen);
1161 entry->ce_resused - 1);
1162 }
1163 1089
1164 resp->rqstp->rq_resused = entry->ce_resused; 1090 resp->opcnt = slot->sl_opcnt;
1165 resp->opcnt = entry->ce_opcnt; 1091 resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen);
1166 resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen; 1092 status = slot->sl_status;
1167 status = entry->ce_status;
1168 1093
1169 return status; 1094 return status;
1170} 1095}
@@ -1194,13 +1119,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1194 int status; 1119 int status;
1195 unsigned int strhashval; 1120 unsigned int strhashval;
1196 char dname[HEXDIR_LEN]; 1121 char dname[HEXDIR_LEN];
1122 char addr_str[INET6_ADDRSTRLEN];
1197 nfs4_verifier verf = exid->verifier; 1123 nfs4_verifier verf = exid->verifier;
1198 u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; 1124 struct sockaddr *sa = svc_addr(rqstp);
1199 1125
1126 rpc_ntop(sa, addr_str, sizeof(addr_str));
1200 dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " 1127 dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
1201 " ip_addr=%u flags %x, spa_how %d\n", 1128 "ip_addr=%s flags %x, spa_how %d\n",
1202 __func__, rqstp, exid, exid->clname.len, exid->clname.data, 1129 __func__, rqstp, exid, exid->clname.len, exid->clname.data,
1203 ip_addr, exid->flags, exid->spa_how); 1130 addr_str, exid->flags, exid->spa_how);
1204 1131
1205 if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) 1132 if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A))
1206 return nfserr_inval; 1133 return nfserr_inval;
@@ -1281,28 +1208,23 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1281 1208
1282out_new: 1209out_new:
1283 /* Normal case */ 1210 /* Normal case */
1284 new = create_client(exid->clname, dname); 1211 new = create_client(exid->clname, dname, rqstp, &verf);
1285 if (new == NULL) { 1212 if (new == NULL) {
1286 status = nfserr_resource; 1213 status = nfserr_serverfault;
1287 goto out; 1214 goto out;
1288 } 1215 }
1289 1216
1290 copy_verf(new, &verf);
1291 copy_cred(&new->cl_cred, &rqstp->rq_cred);
1292 new->cl_addr = ip_addr;
1293 gen_clid(new); 1217 gen_clid(new);
1294 gen_confirm(new);
1295 add_to_unconfirmed(new, strhashval); 1218 add_to_unconfirmed(new, strhashval);
1296out_copy: 1219out_copy:
1297 exid->clientid.cl_boot = new->cl_clientid.cl_boot; 1220 exid->clientid.cl_boot = new->cl_clientid.cl_boot;
1298 exid->clientid.cl_id = new->cl_clientid.cl_id; 1221 exid->clientid.cl_id = new->cl_clientid.cl_id;
1299 1222
1300 new->cl_slot.sl_seqid = 0;
1301 exid->seqid = 1; 1223 exid->seqid = 1;
1302 nfsd4_set_ex_flags(new, exid); 1224 nfsd4_set_ex_flags(new, exid);
1303 1225
1304 dprintk("nfsd4_exchange_id seqid %d flags %x\n", 1226 dprintk("nfsd4_exchange_id seqid %d flags %x\n",
1305 new->cl_slot.sl_seqid, new->cl_exchange_flags); 1227 new->cl_cs_slot.sl_seqid, new->cl_exchange_flags);
1306 status = nfs_ok; 1228 status = nfs_ok;
1307 1229
1308out: 1230out:
@@ -1313,40 +1235,60 @@ error:
1313} 1235}
1314 1236
1315static int 1237static int
1316check_slot_seqid(u32 seqid, struct nfsd4_slot *slot) 1238check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
1317{ 1239{
1318 dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid, 1240 dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
1319 slot->sl_seqid); 1241 slot_seqid);
1320 1242
1321 /* The slot is in use, and no response has been sent. */ 1243 /* The slot is in use, and no response has been sent. */
1322 if (slot->sl_inuse) { 1244 if (slot_inuse) {
1323 if (seqid == slot->sl_seqid) 1245 if (seqid == slot_seqid)
1324 return nfserr_jukebox; 1246 return nfserr_jukebox;
1325 else 1247 else
1326 return nfserr_seq_misordered; 1248 return nfserr_seq_misordered;
1327 } 1249 }
1328 /* Normal */ 1250 /* Normal */
1329 if (likely(seqid == slot->sl_seqid + 1)) 1251 if (likely(seqid == slot_seqid + 1))
1330 return nfs_ok; 1252 return nfs_ok;
1331 /* Replay */ 1253 /* Replay */
1332 if (seqid == slot->sl_seqid) 1254 if (seqid == slot_seqid)
1333 return nfserr_replay_cache; 1255 return nfserr_replay_cache;
1334 /* Wraparound */ 1256 /* Wraparound */
1335 if (seqid == 1 && (slot->sl_seqid + 1) == 0) 1257 if (seqid == 1 && (slot_seqid + 1) == 0)
1336 return nfs_ok; 1258 return nfs_ok;
1337 /* Misordered replay or misordered new request */ 1259 /* Misordered replay or misordered new request */
1338 return nfserr_seq_misordered; 1260 return nfserr_seq_misordered;
1339} 1261}
1340 1262
1263/*
1264 * Cache the create session result into the create session single DRC
1265 * slot cache by saving the xdr structure. sl_seqid has been set.
1266 * Do this for solo or embedded create session operations.
1267 */
1268static void
1269nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
1270 struct nfsd4_clid_slot *slot, int nfserr)
1271{
1272 slot->sl_status = nfserr;
1273 memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
1274}
1275
1276static __be32
1277nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
1278 struct nfsd4_clid_slot *slot)
1279{
1280 memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
1281 return slot->sl_status;
1282}
1283
1341__be32 1284__be32
1342nfsd4_create_session(struct svc_rqst *rqstp, 1285nfsd4_create_session(struct svc_rqst *rqstp,
1343 struct nfsd4_compound_state *cstate, 1286 struct nfsd4_compound_state *cstate,
1344 struct nfsd4_create_session *cr_ses) 1287 struct nfsd4_create_session *cr_ses)
1345{ 1288{
1346 u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; 1289 struct sockaddr *sa = svc_addr(rqstp);
1347 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1348 struct nfs4_client *conf, *unconf; 1290 struct nfs4_client *conf, *unconf;
1349 struct nfsd4_slot *slot = NULL; 1291 struct nfsd4_clid_slot *cs_slot = NULL;
1350 int status = 0; 1292 int status = 0;
1351 1293
1352 nfs4_lock_state(); 1294 nfs4_lock_state();
@@ -1354,40 +1296,38 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1354 conf = find_confirmed_client(&cr_ses->clientid); 1296 conf = find_confirmed_client(&cr_ses->clientid);
1355 1297
1356 if (conf) { 1298 if (conf) {
1357 slot = &conf->cl_slot; 1299 cs_slot = &conf->cl_cs_slot;
1358 status = check_slot_seqid(cr_ses->seqid, slot); 1300 status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
1359 if (status == nfserr_replay_cache) { 1301 if (status == nfserr_replay_cache) {
1360 dprintk("Got a create_session replay! seqid= %d\n", 1302 dprintk("Got a create_session replay! seqid= %d\n",
1361 slot->sl_seqid); 1303 cs_slot->sl_seqid);
1362 cstate->slot = slot;
1363 cstate->status = status;
1364 /* Return the cached reply status */ 1304 /* Return the cached reply status */
1365 status = nfsd4_replay_cache_entry(resp, NULL); 1305 status = nfsd4_replay_create_session(cr_ses, cs_slot);
1366 goto out; 1306 goto out;
1367 } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { 1307 } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
1368 status = nfserr_seq_misordered; 1308 status = nfserr_seq_misordered;
1369 dprintk("Sequence misordered!\n"); 1309 dprintk("Sequence misordered!\n");
1370 dprintk("Expected seqid= %d but got seqid= %d\n", 1310 dprintk("Expected seqid= %d but got seqid= %d\n",
1371 slot->sl_seqid, cr_ses->seqid); 1311 cs_slot->sl_seqid, cr_ses->seqid);
1372 goto out; 1312 goto out;
1373 } 1313 }
1374 conf->cl_slot.sl_seqid++; 1314 cs_slot->sl_seqid++;
1375 } else if (unconf) { 1315 } else if (unconf) {
1376 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || 1316 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
1377 (ip_addr != unconf->cl_addr)) { 1317 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
1378 status = nfserr_clid_inuse; 1318 status = nfserr_clid_inuse;
1379 goto out; 1319 goto out;
1380 } 1320 }
1381 1321
1382 slot = &unconf->cl_slot; 1322 cs_slot = &unconf->cl_cs_slot;
1383 status = check_slot_seqid(cr_ses->seqid, slot); 1323 status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
1384 if (status) { 1324 if (status) {
1385 /* an unconfirmed replay returns misordered */ 1325 /* an unconfirmed replay returns misordered */
1386 status = nfserr_seq_misordered; 1326 status = nfserr_seq_misordered;
1387 goto out; 1327 goto out_cache;
1388 } 1328 }
1389 1329
1390 slot->sl_seqid++; /* from 0 to 1 */ 1330 cs_slot->sl_seqid++; /* from 0 to 1 */
1391 move_to_confirmed(unconf); 1331 move_to_confirmed(unconf);
1392 1332
1393 /* 1333 /*
@@ -1396,6 +1336,19 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1396 cr_ses->flags &= ~SESSION4_PERSIST; 1336 cr_ses->flags &= ~SESSION4_PERSIST;
1397 cr_ses->flags &= ~SESSION4_RDMA; 1337 cr_ses->flags &= ~SESSION4_RDMA;
1398 1338
1339 if (cr_ses->flags & SESSION4_BACK_CHAN) {
1340 unconf->cl_cb_xprt = rqstp->rq_xprt;
1341 svc_xprt_get(unconf->cl_cb_xprt);
1342 rpc_copy_addr(
1343 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
1344 sa);
1345 unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
1346 unconf->cl_cb_conn.cb_minorversion =
1347 cstate->minorversion;
1348 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
1349 unconf->cl_cb_seq_nr = 1;
1350 nfsd4_probe_callback(unconf);
1351 }
1399 conf = unconf; 1352 conf = unconf;
1400 } else { 1353 } else {
1401 status = nfserr_stale_clientid; 1354 status = nfserr_stale_clientid;
@@ -1408,12 +1361,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1408 1361
1409 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, 1362 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
1410 NFS4_MAX_SESSIONID_LEN); 1363 NFS4_MAX_SESSIONID_LEN);
1411 cr_ses->seqid = slot->sl_seqid; 1364 cr_ses->seqid = cs_slot->sl_seqid;
1412 1365
1413 slot->sl_inuse = true; 1366out_cache:
1414 cstate->slot = slot; 1367 /* cache solo and embedded create sessions under the state lock */
1415 /* Ensure a page is used for the cache */ 1368 nfsd4_cache_create_session(cr_ses, cs_slot, status);
1416 slot->sl_cache_entry.ce_cachethis = 1;
1417out: 1369out:
1418 nfs4_unlock_state(); 1370 nfs4_unlock_state();
1419 dprintk("%s returns %d\n", __func__, ntohl(status)); 1371 dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1478,18 +1430,23 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1478 if (seq->slotid >= session->se_fchannel.maxreqs) 1430 if (seq->slotid >= session->se_fchannel.maxreqs)
1479 goto out; 1431 goto out;
1480 1432
1481 slot = &session->se_slots[seq->slotid]; 1433 slot = session->se_slots[seq->slotid];
1482 dprintk("%s: slotid %d\n", __func__, seq->slotid); 1434 dprintk("%s: slotid %d\n", __func__, seq->slotid);
1483 1435
1484 status = check_slot_seqid(seq->seqid, slot); 1436 /* We do not negotiate the number of slots yet, so set the
1437 * maxslots to the session maxreqs which is used to encode
1438 * sr_highest_slotid and the sr_target_slot id to maxslots */
1439 seq->maxslots = session->se_fchannel.maxreqs;
1440
1441 status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse);
1485 if (status == nfserr_replay_cache) { 1442 if (status == nfserr_replay_cache) {
1486 cstate->slot = slot; 1443 cstate->slot = slot;
1487 cstate->session = session; 1444 cstate->session = session;
1488 /* Return the cached reply status and set cstate->status 1445 /* Return the cached reply status and set cstate->status
1489 * for nfsd4_svc_encode_compoundres processing */ 1446 * for nfsd4_proc_compound processing */
1490 status = nfsd4_replay_cache_entry(resp, seq); 1447 status = nfsd4_replay_cache_entry(resp, seq);
1491 cstate->status = nfserr_replay_cache; 1448 cstate->status = nfserr_replay_cache;
1492 goto replay_cache; 1449 goto out;
1493 } 1450 }
1494 if (status) 1451 if (status)
1495 goto out; 1452 goto out;
@@ -1497,23 +1454,23 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1497 /* Success! bump slot seqid */ 1454 /* Success! bump slot seqid */
1498 slot->sl_inuse = true; 1455 slot->sl_inuse = true;
1499 slot->sl_seqid = seq->seqid; 1456 slot->sl_seqid = seq->seqid;
1500 slot->sl_cache_entry.ce_cachethis = seq->cachethis; 1457 slot->sl_cachethis = seq->cachethis;
1501 /* Always set the cache entry cachethis for solo sequence */
1502 if (nfsd4_is_solo_sequence(resp))
1503 slot->sl_cache_entry.ce_cachethis = 1;
1504 1458
1505 cstate->slot = slot; 1459 cstate->slot = slot;
1506 cstate->session = session; 1460 cstate->session = session;
1507 1461
1508replay_cache: 1462 /* Hold a session reference until done processing the compound:
1509 /* Renew the clientid on success and on replay.
1510 * Hold a session reference until done processing the compound:
1511 * nfsd4_put_session called only if the cstate slot is set. 1463 * nfsd4_put_session called only if the cstate slot is set.
1512 */ 1464 */
1513 renew_client(session->se_client);
1514 nfsd4_get_session(session); 1465 nfsd4_get_session(session);
1515out: 1466out:
1516 spin_unlock(&sessionid_lock); 1467 spin_unlock(&sessionid_lock);
1468 /* Renew the clientid on success and on replay */
1469 if (cstate->session) {
1470 nfs4_lock_state();
1471 renew_client(session->se_client);
1472 nfs4_unlock_state();
1473 }
1517 dprintk("%s: return %d\n", __func__, ntohl(status)); 1474 dprintk("%s: return %d\n", __func__, ntohl(status));
1518 return status; 1475 return status;
1519} 1476}
@@ -1522,7 +1479,7 @@ __be32
1522nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1479nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1523 struct nfsd4_setclientid *setclid) 1480 struct nfsd4_setclientid *setclid)
1524{ 1481{
1525 struct sockaddr_in *sin = svc_addr_in(rqstp); 1482 struct sockaddr *sa = svc_addr(rqstp);
1526 struct xdr_netobj clname = { 1483 struct xdr_netobj clname = {
1527 .len = setclid->se_namelen, 1484 .len = setclid->se_namelen,
1528 .data = setclid->se_name, 1485 .data = setclid->se_name,
@@ -1531,7 +1488,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1531 unsigned int strhashval; 1488 unsigned int strhashval;
1532 struct nfs4_client *conf, *unconf, *new; 1489 struct nfs4_client *conf, *unconf, *new;
1533 __be32 status; 1490 __be32 status;
1534 char *princ;
1535 char dname[HEXDIR_LEN]; 1491 char dname[HEXDIR_LEN];
1536 1492
1537 if (!check_name(clname)) 1493 if (!check_name(clname))
@@ -1554,8 +1510,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1554 /* RFC 3530 14.2.33 CASE 0: */ 1510 /* RFC 3530 14.2.33 CASE 0: */
1555 status = nfserr_clid_inuse; 1511 status = nfserr_clid_inuse;
1556 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { 1512 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
1557 dprintk("NFSD: setclientid: string in use by client" 1513 char addr_str[INET6_ADDRSTRLEN];
1558 " at %pI4\n", &conf->cl_addr); 1514 rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
1515 sizeof(addr_str));
1516 dprintk("NFSD: setclientid: string in use by client "
1517 "at %s\n", addr_str);
1559 goto out; 1518 goto out;
1560 } 1519 }
1561 } 1520 }
@@ -1573,7 +1532,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1573 */ 1532 */
1574 if (unconf) 1533 if (unconf)
1575 expire_client(unconf); 1534 expire_client(unconf);
1576 new = create_client(clname, dname); 1535 new = create_client(clname, dname, rqstp, &clverifier);
1577 if (new == NULL) 1536 if (new == NULL)
1578 goto out; 1537 goto out;
1579 gen_clid(new); 1538 gen_clid(new);
@@ -1590,7 +1549,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1590 */ 1549 */
1591 expire_client(unconf); 1550 expire_client(unconf);
1592 } 1551 }
1593 new = create_client(clname, dname); 1552 new = create_client(clname, dname, rqstp, &clverifier);
1594 if (new == NULL) 1553 if (new == NULL)
1595 goto out; 1554 goto out;
1596 copy_clid(new, conf); 1555 copy_clid(new, conf);
@@ -1600,7 +1559,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1600 * probable client reboot; state will be removed if 1559 * probable client reboot; state will be removed if
1601 * confirmed. 1560 * confirmed.
1602 */ 1561 */
1603 new = create_client(clname, dname); 1562 new = create_client(clname, dname, rqstp, &clverifier);
1604 if (new == NULL) 1563 if (new == NULL)
1605 goto out; 1564 goto out;
1606 gen_clid(new); 1565 gen_clid(new);
@@ -1611,25 +1570,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1611 * confirmed. 1570 * confirmed.
1612 */ 1571 */
1613 expire_client(unconf); 1572 expire_client(unconf);
1614 new = create_client(clname, dname); 1573 new = create_client(clname, dname, rqstp, &clverifier);
1615 if (new == NULL) 1574 if (new == NULL)
1616 goto out; 1575 goto out;
1617 gen_clid(new); 1576 gen_clid(new);
1618 } 1577 }
1619 copy_verf(new, &clverifier); 1578 gen_callback(new, setclid, rpc_get_scope_id(sa));
1620 new->cl_addr = sin->sin_addr.s_addr;
1621 new->cl_flavor = rqstp->rq_flavor;
1622 princ = svc_gss_principal(rqstp);
1623 if (princ) {
1624 new->cl_principal = kstrdup(princ, GFP_KERNEL);
1625 if (new->cl_principal == NULL) {
1626 free_client(new);
1627 goto out;
1628 }
1629 }
1630 copy_cred(&new->cl_cred, &rqstp->rq_cred);
1631 gen_confirm(new);
1632 gen_callback(new, setclid);
1633 add_to_unconfirmed(new, strhashval); 1579 add_to_unconfirmed(new, strhashval);
1634 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 1580 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
1635 setclid->se_clientid.cl_id = new->cl_clientid.cl_id; 1581 setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
@@ -1651,7 +1597,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1651 struct nfsd4_compound_state *cstate, 1597 struct nfsd4_compound_state *cstate,
1652 struct nfsd4_setclientid_confirm *setclientid_confirm) 1598 struct nfsd4_setclientid_confirm *setclientid_confirm)
1653{ 1599{
1654 struct sockaddr_in *sin = svc_addr_in(rqstp); 1600 struct sockaddr *sa = svc_addr(rqstp);
1655 struct nfs4_client *conf, *unconf; 1601 struct nfs4_client *conf, *unconf;
1656 nfs4_verifier confirm = setclientid_confirm->sc_confirm; 1602 nfs4_verifier confirm = setclientid_confirm->sc_confirm;
1657 clientid_t * clid = &setclientid_confirm->sc_clientid; 1603 clientid_t * clid = &setclientid_confirm->sc_clientid;
@@ -1670,9 +1616,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1670 unconf = find_unconfirmed_client(clid); 1616 unconf = find_unconfirmed_client(clid);
1671 1617
1672 status = nfserr_clid_inuse; 1618 status = nfserr_clid_inuse;
1673 if (conf && conf->cl_addr != sin->sin_addr.s_addr) 1619 if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa))
1674 goto out; 1620 goto out;
1675 if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) 1621 if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa))
1676 goto out; 1622 goto out;
1677 1623
1678 /* 1624 /*
@@ -4072,7 +4018,7 @@ set_max_delegations(void)
4072 4018
4073/* initialization to perform when the nfsd service is started: */ 4019/* initialization to perform when the nfsd service is started: */
4074 4020
4075static void 4021static int
4076__nfs4_state_start(void) 4022__nfs4_state_start(void)
4077{ 4023{
4078 unsigned long grace_time; 4024 unsigned long grace_time;
@@ -4084,19 +4030,26 @@ __nfs4_state_start(void)
4084 printk(KERN_INFO "NFSD: starting %ld-second grace period\n", 4030 printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
4085 grace_time/HZ); 4031 grace_time/HZ);
4086 laundry_wq = create_singlethread_workqueue("nfsd4"); 4032 laundry_wq = create_singlethread_workqueue("nfsd4");
4033 if (laundry_wq == NULL)
4034 return -ENOMEM;
4087 queue_delayed_work(laundry_wq, &laundromat_work, grace_time); 4035 queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
4088 set_max_delegations(); 4036 set_max_delegations();
4037 return set_callback_cred();
4089} 4038}
4090 4039
4091void 4040int
4092nfs4_state_start(void) 4041nfs4_state_start(void)
4093{ 4042{
4043 int ret;
4044
4094 if (nfs4_init) 4045 if (nfs4_init)
4095 return; 4046 return 0;
4096 nfsd4_load_reboot_recovery_data(); 4047 nfsd4_load_reboot_recovery_data();
4097 __nfs4_state_start(); 4048 ret = __nfs4_state_start();
4049 if (ret)
4050 return ret;
4098 nfs4_init = 1; 4051 nfs4_init = 1;
4099 return; 4052 return 0;
4100} 4053}
4101 4054
4102time_t 4055time_t
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2dcc7feaa6ff..0fbd50cee1f6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1599,7 +1599,8 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
1599static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) 1599static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat)
1600{ 1600{
1601 struct svc_fh tmp_fh; 1601 struct svc_fh tmp_fh;
1602 char *path, *rootpath; 1602 char *path = NULL, *rootpath;
1603 size_t rootlen;
1603 1604
1604 fh_init(&tmp_fh, NFS4_FHSIZE); 1605 fh_init(&tmp_fh, NFS4_FHSIZE);
1605 *stat = exp_pseudoroot(rqstp, &tmp_fh); 1606 *stat = exp_pseudoroot(rqstp, &tmp_fh);
@@ -1609,14 +1610,18 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *
1609 1610
1610 path = exp->ex_pathname; 1611 path = exp->ex_pathname;
1611 1612
1612 if (strncmp(path, rootpath, strlen(rootpath))) { 1613 rootlen = strlen(rootpath);
1614 if (strncmp(path, rootpath, rootlen)) {
1613 dprintk("nfsd: fs_locations failed;" 1615 dprintk("nfsd: fs_locations failed;"
1614 "%s is not contained in %s\n", path, rootpath); 1616 "%s is not contained in %s\n", path, rootpath);
1615 *stat = nfserr_notsupp; 1617 *stat = nfserr_notsupp;
1616 return NULL; 1618 path = NULL;
1619 goto out;
1617 } 1620 }
1618 1621 path += rootlen;
1619 return path + strlen(rootpath); 1622out:
1623 fh_put(&tmp_fh);
1624 return path;
1620} 1625}
1621 1626
1622/* 1627/*
@@ -1793,11 +1798,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1793 goto out_nfserr; 1798 goto out_nfserr;
1794 } 1799 }
1795 } 1800 }
1796 if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
1797 if (exp->ex_fslocs.locations == NULL) {
1798 bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1799 }
1800 }
1801 if ((buflen -= 16) < 0) 1801 if ((buflen -= 16) < 0)
1802 goto out_resource; 1802 goto out_resource;
1803 1803
@@ -1825,8 +1825,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1825 goto out_resource; 1825 goto out_resource;
1826 if (!aclsupport) 1826 if (!aclsupport)
1827 word0 &= ~FATTR4_WORD0_ACL; 1827 word0 &= ~FATTR4_WORD0_ACL;
1828 if (!exp->ex_fslocs.locations)
1829 word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1830 if (!word2) { 1828 if (!word2) {
1831 WRITE32(2); 1829 WRITE32(2);
1832 WRITE32(word0); 1830 WRITE32(word0);
@@ -3064,6 +3062,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
3064 WRITE32(0); 3062 WRITE32(0);
3065 3063
3066 ADJUST_ARGS(); 3064 ADJUST_ARGS();
3065 resp->cstate.datap = p; /* DRC cache data pointer */
3067 return 0; 3066 return 0;
3068} 3067}
3069 3068
@@ -3166,7 +3165,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
3166 return status; 3165 return status;
3167 3166
3168 session = resp->cstate.session; 3167 session = resp->cstate.session;
3169 if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0) 3168 if (session == NULL || slot->sl_cachethis == 0)
3170 return status; 3169 return status;
3171 3170
3172 if (resp->opcnt >= args->opcnt) 3171 if (resp->opcnt >= args->opcnt)
@@ -3291,6 +3290,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
3291 /* 3290 /*
3292 * All that remains is to write the tag and operation count... 3291 * All that remains is to write the tag and operation count...
3293 */ 3292 */
3293 struct nfsd4_compound_state *cs = &resp->cstate;
3294 struct kvec *iov; 3294 struct kvec *iov;
3295 p = resp->tagp; 3295 p = resp->tagp;
3296 *p++ = htonl(resp->taglen); 3296 *p++ = htonl(resp->taglen);
@@ -3304,17 +3304,11 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
3304 iov = &rqstp->rq_res.head[0]; 3304 iov = &rqstp->rq_res.head[0];
3305 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; 3305 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
3306 BUG_ON(iov->iov_len > PAGE_SIZE); 3306 BUG_ON(iov->iov_len > PAGE_SIZE);
3307 if (nfsd4_has_session(&resp->cstate)) { 3307 if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
3308 if (resp->cstate.status == nfserr_replay_cache && 3308 nfsd4_store_cache_entry(resp);
3309 !nfsd4_not_cached(resp)) { 3309 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
3310 iov->iov_len = resp->cstate.iovlen; 3310 resp->cstate.slot->sl_inuse = false;
3311 } else { 3311 nfsd4_put_session(resp->cstate.session);
3312 nfsd4_store_cache_entry(resp);
3313 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
3314 resp->cstate.slot->sl_inuse = 0;
3315 }
3316 if (resp->cstate.session)
3317 nfsd4_put_session(resp->cstate.session);
3318 } 3312 }
3319 return 1; 3313 return 1;
3320} 3314}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7e906c5b7671..00388d2a3c99 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -174,12 +174,13 @@ static const struct file_operations exports_operations = {
174}; 174};
175 175
176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
177extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
177 178
178static struct file_operations pool_stats_operations = { 179static struct file_operations pool_stats_operations = {
179 .open = nfsd_pool_stats_open, 180 .open = nfsd_pool_stats_open,
180 .read = seq_read, 181 .read = seq_read,
181 .llseek = seq_lseek, 182 .llseek = seq_lseek,
182 .release = seq_release, 183 .release = nfsd_pool_stats_release,
183 .owner = THIS_MODULE, 184 .owner = THIS_MODULE,
184}; 185};
185 186
@@ -776,10 +777,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
776 size -= len; 777 size -= len;
777 mesg += len; 778 mesg += len;
778 } 779 }
779 780 rv = mesg - buf;
780 mutex_unlock(&nfsd_mutex);
781 return (mesg-buf);
782
783out_free: 781out_free:
784 kfree(nthreads); 782 kfree(nthreads);
785 mutex_unlock(&nfsd_mutex); 783 mutex_unlock(&nfsd_mutex);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 8847f3fbfc1e..01965b2f3a76 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -397,44 +397,51 @@ static inline void _fh_update_old(struct dentry *dentry,
397 fh->ofh_dirino = 0; 397 fh->ofh_dirino = 0;
398} 398}
399 399
400__be32 400static bool is_root_export(struct svc_export *exp)
401fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
402 struct svc_fh *ref_fh)
403{ 401{
404 /* ref_fh is a reference file handle. 402 return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
405 * if it is non-null and for the same filesystem, then we should compose 403}
406 * a filehandle which is of the same version, where possible.
407 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
408 * Then create a 32byte filehandle using nfs_fhbase_old
409 *
410 */
411 404
412 u8 version; 405static struct super_block *exp_sb(struct svc_export *exp)
413 u8 fsid_type = 0; 406{
414 struct inode * inode = dentry->d_inode; 407 return exp->ex_path.dentry->d_inode->i_sb;
415 struct dentry *parent = dentry->d_parent; 408}
416 __u32 *datap;
417 dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev;
418 int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root);
419 409
420 dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", 410static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
421 MAJOR(ex_dev), MINOR(ex_dev), 411{
422 (long) exp->ex_path.dentry->d_inode->i_ino, 412 switch (fsid_type) {
423 parent->d_name.name, dentry->d_name.name, 413 case FSID_DEV:
424 (inode ? inode->i_ino : 0)); 414 if (!old_valid_dev(exp_sb(exp)->s_dev))
415 return 0;
416 /* FALL THROUGH */
417 case FSID_MAJOR_MINOR:
418 case FSID_ENCODE_DEV:
419 return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV;
420 case FSID_NUM:
421 return exp->ex_flags & NFSEXP_FSID;
422 case FSID_UUID8:
423 case FSID_UUID16:
424 if (!is_root_export(exp))
425 return 0;
426 /* fall through */
427 case FSID_UUID4_INUM:
428 case FSID_UUID16_INUM:
429 return exp->ex_uuid != NULL;
430 }
431 return 1;
432}
425 433
426 /* Choose filehandle version and fsid type based on 434
427 * the reference filehandle (if it is in the same export) 435static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh)
428 * or the export options. 436{
429 */ 437 u8 version;
430 retry: 438 u8 fsid_type;
439retry:
431 version = 1; 440 version = 1;
432 if (ref_fh && ref_fh->fh_export == exp) { 441 if (ref_fh && ref_fh->fh_export == exp) {
433 version = ref_fh->fh_handle.fh_version; 442 version = ref_fh->fh_handle.fh_version;
434 fsid_type = ref_fh->fh_handle.fh_fsid_type; 443 fsid_type = ref_fh->fh_handle.fh_fsid_type;
435 444
436 if (ref_fh == fhp)
437 fh_put(ref_fh);
438 ref_fh = NULL; 445 ref_fh = NULL;
439 446
440 switch (version) { 447 switch (version) {
@@ -447,58 +454,66 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
447 goto retry; 454 goto retry;
448 } 455 }
449 456
450 /* Need to check that this type works for this 457 /*
451 * export point. As the fsid -> filesystem mapping 458 * As the fsid -> filesystem mapping was guided by
452 * was guided by user-space, there is no guarantee 459 * user-space, there is no guarantee that the filesystem
453 * that the filesystem actually supports that fsid 460 * actually supports that fsid type. If it doesn't we
454 * type. If it doesn't we loop around again without 461 * loop around again without ref_fh set.
455 * ref_fh set.
456 */ 462 */
457 switch(fsid_type) { 463 if (!fsid_type_ok_for_exp(fsid_type, exp))
458 case FSID_DEV: 464 goto retry;
459 if (!old_valid_dev(ex_dev))
460 goto retry;
461 /* FALL THROUGH */
462 case FSID_MAJOR_MINOR:
463 case FSID_ENCODE_DEV:
464 if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags
465 & FS_REQUIRES_DEV))
466 goto retry;
467 break;
468 case FSID_NUM:
469 if (! (exp->ex_flags & NFSEXP_FSID))
470 goto retry;
471 break;
472 case FSID_UUID8:
473 case FSID_UUID16:
474 if (!root_export)
475 goto retry;
476 /* fall through */
477 case FSID_UUID4_INUM:
478 case FSID_UUID16_INUM:
479 if (exp->ex_uuid == NULL)
480 goto retry;
481 break;
482 }
483 } else if (exp->ex_flags & NFSEXP_FSID) { 465 } else if (exp->ex_flags & NFSEXP_FSID) {
484 fsid_type = FSID_NUM; 466 fsid_type = FSID_NUM;
485 } else if (exp->ex_uuid) { 467 } else if (exp->ex_uuid) {
486 if (fhp->fh_maxsize >= 64) { 468 if (fhp->fh_maxsize >= 64) {
487 if (root_export) 469 if (is_root_export(exp))
488 fsid_type = FSID_UUID16; 470 fsid_type = FSID_UUID16;
489 else 471 else
490 fsid_type = FSID_UUID16_INUM; 472 fsid_type = FSID_UUID16_INUM;
491 } else { 473 } else {
492 if (root_export) 474 if (is_root_export(exp))
493 fsid_type = FSID_UUID8; 475 fsid_type = FSID_UUID8;
494 else 476 else
495 fsid_type = FSID_UUID4_INUM; 477 fsid_type = FSID_UUID4_INUM;
496 } 478 }
497 } else if (!old_valid_dev(ex_dev)) 479 } else if (!old_valid_dev(exp_sb(exp)->s_dev))
498 /* for newer device numbers, we must use a newer fsid format */ 480 /* for newer device numbers, we must use a newer fsid format */
499 fsid_type = FSID_ENCODE_DEV; 481 fsid_type = FSID_ENCODE_DEV;
500 else 482 else
501 fsid_type = FSID_DEV; 483 fsid_type = FSID_DEV;
484 fhp->fh_handle.fh_version = version;
485 if (version)
486 fhp->fh_handle.fh_fsid_type = fsid_type;
487}
488
489__be32
490fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
491 struct svc_fh *ref_fh)
492{
493 /* ref_fh is a reference file handle.
494 * if it is non-null and for the same filesystem, then we should compose
495 * a filehandle which is of the same version, where possible.
496 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
497 * Then create a 32byte filehandle using nfs_fhbase_old
498 *
499 */
500
501 struct inode * inode = dentry->d_inode;
502 struct dentry *parent = dentry->d_parent;
503 __u32 *datap;
504 dev_t ex_dev = exp_sb(exp)->s_dev;
505
506 dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
507 MAJOR(ex_dev), MINOR(ex_dev),
508 (long) exp->ex_path.dentry->d_inode->i_ino,
509 parent->d_name.name, dentry->d_name.name,
510 (inode ? inode->i_ino : 0));
511
512 /* Choose filehandle version and fsid type based on
513 * the reference filehandle (if it is in the same export)
514 * or the export options.
515 */
516 set_version_and_fsid_type(fhp, exp, ref_fh);
502 517
503 if (ref_fh == fhp) 518 if (ref_fh == fhp)
504 fh_put(ref_fh); 519 fh_put(ref_fh);
@@ -516,7 +531,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
516 fhp->fh_export = exp; 531 fhp->fh_export = exp;
517 cache_get(&exp->h); 532 cache_get(&exp->h);
518 533
519 if (version == 0xca) { 534 if (fhp->fh_handle.fh_version == 0xca) {
520 /* old style filehandle please */ 535 /* old style filehandle please */
521 memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); 536 memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
522 fhp->fh_handle.fh_size = NFS_FHSIZE; 537 fhp->fh_handle.fh_size = NFS_FHSIZE;
@@ -530,22 +545,22 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
530 _fh_update_old(dentry, exp, &fhp->fh_handle); 545 _fh_update_old(dentry, exp, &fhp->fh_handle);
531 } else { 546 } else {
532 int len; 547 int len;
533 fhp->fh_handle.fh_version = 1;
534 fhp->fh_handle.fh_auth_type = 0; 548 fhp->fh_handle.fh_auth_type = 0;
535 datap = fhp->fh_handle.fh_auth+0; 549 datap = fhp->fh_handle.fh_auth+0;
536 fhp->fh_handle.fh_fsid_type = fsid_type; 550 mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev,
537 mk_fsid(fsid_type, datap, ex_dev,
538 exp->ex_path.dentry->d_inode->i_ino, 551 exp->ex_path.dentry->d_inode->i_ino,
539 exp->ex_fsid, exp->ex_uuid); 552 exp->ex_fsid, exp->ex_uuid);
540 553
541 len = key_len(fsid_type); 554 len = key_len(fhp->fh_handle.fh_fsid_type);
542 datap += len/4; 555 datap += len/4;
543 fhp->fh_handle.fh_size = 4 + len; 556 fhp->fh_handle.fh_size = 4 + len;
544 557
545 if (inode) 558 if (inode)
546 _fh_update(fhp, exp, dentry); 559 _fh_update(fhp, exp, dentry);
547 if (fhp->fh_handle.fh_fileid_type == 255) 560 if (fhp->fh_handle.fh_fileid_type == 255) {
561 fh_put(fhp);
548 return nfserr_opnotsupp; 562 return nfserr_opnotsupp;
563 }
549 } 564 }
550 565
551 return 0; 566 return 0;
@@ -639,8 +654,7 @@ enum fsid_source fsid_source(struct svc_fh *fhp)
639 case FSID_DEV: 654 case FSID_DEV:
640 case FSID_ENCODE_DEV: 655 case FSID_ENCODE_DEV:
641 case FSID_MAJOR_MINOR: 656 case FSID_MAJOR_MINOR:
642 if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags 657 if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV)
643 & FS_REQUIRES_DEV)
644 return FSIDSOURCE_DEV; 658 return FSIDSOURCE_DEV;
645 break; 659 break;
646 case FSID_NUM: 660 case FSID_NUM:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 24d58adfe5fd..67ea83eedd43 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -34,6 +34,7 @@
34#include <linux/nfsd/syscall.h> 34#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h> 35#include <linux/lockd/bind.h>
36#include <linux/nfsacl.h> 36#include <linux/nfsacl.h>
37#include <linux/seq_file.h>
37 38
38#define NFSDDBG_FACILITY NFSDDBG_SVC 39#define NFSDDBG_FACILITY NFSDDBG_SVC
39 40
@@ -66,6 +67,16 @@ struct timeval nfssvc_boot;
66DEFINE_MUTEX(nfsd_mutex); 67DEFINE_MUTEX(nfsd_mutex);
67struct svc_serv *nfsd_serv; 68struct svc_serv *nfsd_serv;
68 69
70/*
71 * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
72 * nfsd_drc_max_pages limits the total amount of memory available for
73 * version 4.1 DRC caches.
74 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
75 */
76spinlock_t nfsd_drc_lock;
77unsigned int nfsd_drc_max_mem;
78unsigned int nfsd_drc_mem_used;
79
69#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 80#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
70static struct svc_stat nfsd_acl_svcstats; 81static struct svc_stat nfsd_acl_svcstats;
71static struct svc_version * nfsd_acl_version[] = { 82static struct svc_version * nfsd_acl_version[] = {
@@ -235,13 +246,12 @@ void nfsd_reset_versions(void)
235 */ 246 */
236static void set_max_drc(void) 247static void set_max_drc(void)
237{ 248{
238 /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ 249 #define NFSD_DRC_SIZE_SHIFT 10
239 #define NFSD_DRC_SIZE_SHIFT 7 250 nfsd_drc_max_mem = (nr_free_buffer_pages()
240 nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() 251 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
241 >> NFSD_DRC_SIZE_SHIFT; 252 nfsd_drc_mem_used = 0;
242 nfsd_serv->sv_drc_pages_used = 0; 253 spin_lock_init(&nfsd_drc_lock);
243 dprintk("%s svc_drc_max_pages %u\n", __func__, 254 dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
244 nfsd_serv->sv_drc_max_pages);
245} 255}
246 256
247int nfsd_create_serv(void) 257int nfsd_create_serv(void)
@@ -401,7 +411,9 @@ nfsd_svc(unsigned short port, int nrservs)
401 error = nfsd_racache_init(2*nrservs); 411 error = nfsd_racache_init(2*nrservs);
402 if (error<0) 412 if (error<0)
403 goto out; 413 goto out;
404 nfs4_state_start(); 414 error = nfs4_state_start();
415 if (error)
416 goto out;
405 417
406 nfsd_reset_versions(); 418 nfsd_reset_versions();
407 419
@@ -569,10 +581,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
569 + rqstp->rq_res.head[0].iov_len; 581 + rqstp->rq_res.head[0].iov_len;
570 rqstp->rq_res.head[0].iov_len += sizeof(__be32); 582 rqstp->rq_res.head[0].iov_len += sizeof(__be32);
571 583
572 /* NFSv4.1 DRC requires statp */
573 if (rqstp->rq_vers == 4)
574 nfsd4_set_statp(rqstp, statp);
575
576 /* Now call the procedure handler, and encode NFS status. */ 584 /* Now call the procedure handler, and encode NFS status. */
577 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); 585 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
578 nfserr = map_new_errors(rqstp->rq_vers, nfserr); 586 nfserr = map_new_errors(rqstp->rq_vers, nfserr);
@@ -607,7 +615,25 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
607 615
608int nfsd_pool_stats_open(struct inode *inode, struct file *file) 616int nfsd_pool_stats_open(struct inode *inode, struct file *file)
609{ 617{
610 if (nfsd_serv == NULL) 618 int ret;
619 mutex_lock(&nfsd_mutex);
620 if (nfsd_serv == NULL) {
621 mutex_unlock(&nfsd_mutex);
611 return -ENODEV; 622 return -ENODEV;
612 return svc_pool_stats_open(nfsd_serv, file); 623 }
624 /* bump up the psudo refcount while traversing */
625 svc_get(nfsd_serv);
626 ret = svc_pool_stats_open(nfsd_serv, file);
627 mutex_unlock(&nfsd_mutex);
628 return ret;
629}
630
631int nfsd_pool_stats_release(struct inode *inode, struct file *file)
632{
633 int ret = seq_release(inode, file);
634 mutex_lock(&nfsd_mutex);
635 /* this function really, really should have been called svc_put() */
636 svc_destroy(nfsd_serv);
637 mutex_unlock(&nfsd_mutex);
638 return ret;
613} 639}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8fa09bfbcba7..a293f0273263 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -89,6 +89,12 @@ struct raparm_hbucket {
89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
90static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 90static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
91 91
92static inline int
93nfsd_v4client(struct svc_rqst *rq)
94{
95 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
96}
97
92/* 98/*
93 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 99 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
94 * a mount point. 100 * a mount point.
@@ -115,7 +121,8 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
115 path_put(&path); 121 path_put(&path);
116 goto out; 122 goto out;
117 } 123 }
118 if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { 124 if (nfsd_v4client(rqstp) ||
125 (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
119 /* successfully crossed mount point */ 126 /* successfully crossed mount point */
120 /* 127 /*
121 * This is subtle: path.dentry is *not* on path.mnt 128 * This is subtle: path.dentry is *not* on path.mnt
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ccf2e0dc077a..a34dea46b629 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -338,49 +338,6 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
338 } 338 }
339} 339}
340 340
341static inline int __nlm_cmp_addr4(const struct sockaddr *sap1,
342 const struct sockaddr *sap2)
343{
344 const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
345 const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
346 return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
347}
348
349#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
350static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
351 const struct sockaddr *sap2)
352{
353 const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
354 const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
355 return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
356}
357#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
358static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
359 const struct sockaddr *sap2)
360{
361 return 0;
362}
363#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
364
365/*
366 * Compare two host addresses
367 *
368 * Return TRUE if the addresses are the same; otherwise FALSE.
369 */
370static inline int nlm_cmp_addr(const struct sockaddr *sap1,
371 const struct sockaddr *sap2)
372{
373 if (sap1->sa_family == sap2->sa_family) {
374 switch (sap1->sa_family) {
375 case AF_INET:
376 return __nlm_cmp_addr4(sap1, sap2);
377 case AF_INET6:
378 return __nlm_cmp_addr6(sap1, sap2);
379 }
380 }
381 return 0;
382}
383
384/* 341/*
385 * Compare two NLM locks. 342 * Compare two NLM locks.
386 * When the second lock is of type F_UNLCK, this acts like a wildcard. 343 * When the second lock is of type F_UNLCK, this acts like a wildcard.
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 33b283601f62..c4c060208109 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -234,7 +234,7 @@ enum nfs_opnum4 {
234Needs to be updated if more operations are defined in future.*/ 234Needs to be updated if more operations are defined in future.*/
235 235
236#define FIRST_NFS4_OP OP_ACCESS 236#define FIRST_NFS4_OP OP_ACCESS
237#define LAST_NFS4_OP OP_RELEASE_LOCKOWNER 237#define LAST_NFS4_OP OP_RECLAIM_COMPLETE
238 238
239enum nfsstat4 { 239enum nfsstat4 {
240 NFS4_OK = 0, 240 NFS4_OK = 0,
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2b49d676d0c9..03bbe9039104 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -56,6 +56,9 @@ extern struct svc_version nfsd_version2, nfsd_version3,
56extern u32 nfsd_supported_minorversion; 56extern u32 nfsd_supported_minorversion;
57extern struct mutex nfsd_mutex; 57extern struct mutex nfsd_mutex;
58extern struct svc_serv *nfsd_serv; 58extern struct svc_serv *nfsd_serv;
59extern spinlock_t nfsd_drc_lock;
60extern unsigned int nfsd_drc_max_mem;
61extern unsigned int nfsd_drc_mem_used;
59 62
60extern struct seq_operations nfs_exports_op; 63extern struct seq_operations nfs_exports_op;
61 64
@@ -163,7 +166,7 @@ extern int nfsd_max_blksize;
163extern unsigned int max_delegations; 166extern unsigned int max_delegations;
164int nfs4_state_init(void); 167int nfs4_state_init(void);
165void nfsd4_free_slabs(void); 168void nfsd4_free_slabs(void);
166void nfs4_state_start(void); 169int nfs4_state_start(void);
167void nfs4_state_shutdown(void); 170void nfs4_state_shutdown(void);
168time_t nfs4_lease_time(void); 171time_t nfs4_lease_time(void);
169void nfs4_reset_lease(time_t leasetime); 172void nfs4_reset_lease(time_t leasetime);
@@ -171,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir);
171#else 174#else
172static inline int nfs4_state_init(void) { return 0; } 175static inline int nfs4_state_init(void) { return 0; }
173static inline void nfsd4_free_slabs(void) { } 176static inline void nfsd4_free_slabs(void) { }
174static inline void nfs4_state_start(void) { } 177static inline int nfs4_state_start(void) { return 0; }
175static inline void nfs4_state_shutdown(void) { } 178static inline void nfs4_state_shutdown(void) { }
176static inline time_t nfs4_lease_time(void) { return 0; } 179static inline time_t nfs4_lease_time(void) { return 0; }
177static inline void nfs4_reset_lease(time_t leasetime) { } 180static inline void nfs4_reset_lease(time_t leasetime) { }
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 57ab2ed08459..b38d11324189 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -60,6 +60,12 @@ typedef struct {
60#define si_stateownerid si_opaque.so_stateownerid 60#define si_stateownerid si_opaque.so_stateownerid
61#define si_fileid si_opaque.so_fileid 61#define si_fileid si_opaque.so_fileid
62 62
63struct nfsd4_cb_sequence {
64 /* args/res */
65 u32 cbs_minorversion;
66 struct nfs4_client *cbs_clp;
67};
68
63struct nfs4_delegation { 69struct nfs4_delegation {
64 struct list_head dl_perfile; 70 struct list_head dl_perfile;
65 struct list_head dl_perclnt; 71 struct list_head dl_perclnt;
@@ -81,38 +87,35 @@ struct nfs4_delegation {
81/* client delegation callback info */ 87/* client delegation callback info */
82struct nfs4_cb_conn { 88struct nfs4_cb_conn {
83 /* SETCLIENTID info */ 89 /* SETCLIENTID info */
84 u32 cb_addr; 90 struct sockaddr_storage cb_addr;
85 unsigned short cb_port; 91 size_t cb_addrlen;
86 u32 cb_prog; 92 u32 cb_prog;
87 u32 cb_minorversion; 93 u32 cb_minorversion;
88 u32 cb_ident; /* minorversion 0 only */ 94 u32 cb_ident; /* minorversion 0 only */
89 /* RPC client info */ 95 /* RPC client info */
90 atomic_t cb_set; /* successful CB_NULL call */ 96 atomic_t cb_set; /* successful CB_NULL call */
91 struct rpc_clnt * cb_client; 97 struct rpc_clnt * cb_client;
92 struct rpc_cred * cb_cred;
93}; 98};
94 99
95/* Maximum number of slots per session. 128 is useful for long haul TCP */ 100/* Maximum number of slots per session. 160 is useful for long haul TCP */
96#define NFSD_MAX_SLOTS_PER_SESSION 128 101#define NFSD_MAX_SLOTS_PER_SESSION 160
97/* Maximum number of pages per slot cache entry */
98#define NFSD_PAGES_PER_SLOT 1
99/* Maximum number of operations per session compound */ 102/* Maximum number of operations per session compound */
100#define NFSD_MAX_OPS_PER_COMPOUND 16 103#define NFSD_MAX_OPS_PER_COMPOUND 16
101 104/* Maximum session per slot cache size */
102struct nfsd4_cache_entry { 105#define NFSD_SLOT_CACHE_SIZE 1024
103 __be32 ce_status; 106/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
104 struct kvec ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */ 107#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
105 struct page *ce_respages[NFSD_PAGES_PER_SLOT + 1]; 108#define NFSD_MAX_MEM_PER_SESSION \
106 int ce_cachethis; 109 (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
107 short ce_resused;
108 int ce_opcnt;
109 int ce_rpchdrlen;
110};
111 110
112struct nfsd4_slot { 111struct nfsd4_slot {
113 bool sl_inuse; 112 bool sl_inuse;
114 u32 sl_seqid; 113 bool sl_cachethis;
115 struct nfsd4_cache_entry sl_cache_entry; 114 u16 sl_opcnt;
115 u32 sl_seqid;
116 __be32 sl_status;
117 u32 sl_datalen;
118 char sl_data[];
116}; 119};
117 120
118struct nfsd4_channel_attrs { 121struct nfsd4_channel_attrs {
@@ -126,6 +129,25 @@ struct nfsd4_channel_attrs {
126 u32 rdma_attrs; 129 u32 rdma_attrs;
127}; 130};
128 131
132struct nfsd4_create_session {
133 clientid_t clientid;
134 struct nfs4_sessionid sessionid;
135 u32 seqid;
136 u32 flags;
137 struct nfsd4_channel_attrs fore_channel;
138 struct nfsd4_channel_attrs back_channel;
139 u32 callback_prog;
140 u32 uid;
141 u32 gid;
142};
143
144/* The single slot clientid cache structure */
145struct nfsd4_clid_slot {
146 u32 sl_seqid;
147 __be32 sl_status;
148 struct nfsd4_create_session sl_cr_ses;
149};
150
129struct nfsd4_session { 151struct nfsd4_session {
130 struct kref se_ref; 152 struct kref se_ref;
131 struct list_head se_hash; /* hash by sessionid */ 153 struct list_head se_hash; /* hash by sessionid */
@@ -135,7 +157,7 @@ struct nfsd4_session {
135 struct nfs4_sessionid se_sessionid; 157 struct nfs4_sessionid se_sessionid;
136 struct nfsd4_channel_attrs se_fchannel; 158 struct nfsd4_channel_attrs se_fchannel;
137 struct nfsd4_channel_attrs se_bchannel; 159 struct nfsd4_channel_attrs se_bchannel;
138 struct nfsd4_slot se_slots[]; /* forward channel slots */ 160 struct nfsd4_slot *se_slots[]; /* forward channel slots */
139}; 161};
140 162
141static inline void 163static inline void
@@ -180,7 +202,7 @@ struct nfs4_client {
180 char cl_recdir[HEXDIR_LEN]; /* recovery dir */ 202 char cl_recdir[HEXDIR_LEN]; /* recovery dir */
181 nfs4_verifier cl_verifier; /* generated by client */ 203 nfs4_verifier cl_verifier; /* generated by client */
182 time_t cl_time; /* time of last lease renewal */ 204 time_t cl_time; /* time of last lease renewal */
183 __be32 cl_addr; /* client ipaddress */ 205 struct sockaddr_storage cl_addr; /* client ipaddress */
184 u32 cl_flavor; /* setclientid pseudoflavor */ 206 u32 cl_flavor; /* setclientid pseudoflavor */
185 char *cl_principal; /* setclientid principal name */ 207 char *cl_principal; /* setclientid principal name */
186 struct svc_cred cl_cred; /* setclientid principal */ 208 struct svc_cred cl_cred; /* setclientid principal */
@@ -192,9 +214,17 @@ struct nfs4_client {
192 214
193 /* for nfs41 */ 215 /* for nfs41 */
194 struct list_head cl_sessions; 216 struct list_head cl_sessions;
195 struct nfsd4_slot cl_slot; /* create_session slot */ 217 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
196 u32 cl_exchange_flags; 218 u32 cl_exchange_flags;
197 struct nfs4_sessionid cl_sessionid; 219 struct nfs4_sessionid cl_sessionid;
220
221 /* for nfs41 callbacks */
222 /* We currently support a single back channel with a single slot */
223 unsigned long cl_cb_slot_busy;
224 u32 cl_cb_seq_nr;
225 struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
226 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
227 /* wait here for slots */
198}; 228};
199 229
200/* struct nfs4_client_reset 230/* struct nfs4_client_reset
@@ -345,6 +375,7 @@ extern int nfs4_in_grace(void);
345extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 375extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
346extern void put_nfs4_client(struct nfs4_client *clp); 376extern void put_nfs4_client(struct nfs4_client *clp);
347extern void nfs4_free_stateowner(struct kref *kref); 377extern void nfs4_free_stateowner(struct kref *kref);
378extern int set_callback_cred(void);
348extern void nfsd4_probe_callback(struct nfs4_client *clp); 379extern void nfsd4_probe_callback(struct nfs4_client *clp);
349extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 380extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
350extern void nfs4_put_delegation(struct nfs4_delegation *dp); 381extern void nfs4_put_delegation(struct nfs4_delegation *dp);
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 2bacf7535069..73164c2b3d29 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -51,7 +51,7 @@ struct nfsd4_compound_state {
51 /* For sessions DRC */ 51 /* For sessions DRC */
52 struct nfsd4_session *session; 52 struct nfsd4_session *session;
53 struct nfsd4_slot *slot; 53 struct nfsd4_slot *slot;
54 __be32 *statp; 54 __be32 *datap;
55 size_t iovlen; 55 size_t iovlen;
56 u32 minorversion; 56 u32 minorversion;
57 u32 status; 57 u32 status;
@@ -366,18 +366,6 @@ struct nfsd4_exchange_id {
366 int spa_how; 366 int spa_how;
367}; 367};
368 368
369struct nfsd4_create_session {
370 clientid_t clientid;
371 struct nfs4_sessionid sessionid;
372 u32 seqid;
373 u32 flags;
374 struct nfsd4_channel_attrs fore_channel;
375 struct nfsd4_channel_attrs back_channel;
376 u32 callback_prog;
377 u32 uid;
378 u32 gid;
379};
380
381struct nfsd4_sequence { 369struct nfsd4_sequence {
382 struct nfs4_sessionid sessionid; /* request/response */ 370 struct nfs4_sessionid sessionid; /* request/response */
383 u32 seqid; /* request/response */ 371 u32 seqid; /* request/response */
@@ -479,13 +467,12 @@ struct nfsd4_compoundres {
479static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) 467static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
480{ 468{
481 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; 469 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
482 return args->opcnt == 1; 470 return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
483} 471}
484 472
485static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) 473static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
486{ 474{
487 return !resp->cstate.slot->sl_cache_entry.ce_cachethis || 475 return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
488 nfsd4_is_solo_sequence(resp);
489} 476}
490 477
491#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) 478#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 3f632182d8eb..996df4dac7d4 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -111,7 +111,7 @@ struct rpc_credops {
111 void (*crdestroy)(struct rpc_cred *); 111 void (*crdestroy)(struct rpc_cred *);
112 112
113 int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); 113 int (*crmatch)(struct auth_cred *, struct rpc_cred *, int);
114 void (*crbind)(struct rpc_task *, struct rpc_cred *); 114 void (*crbind)(struct rpc_task *, struct rpc_cred *, int);
115 __be32 * (*crmarshal)(struct rpc_task *, __be32 *); 115 __be32 * (*crmarshal)(struct rpc_task *, __be32 *);
116 int (*crrefresh)(struct rpc_task *); 116 int (*crrefresh)(struct rpc_task *);
117 __be32 * (*crvalidate)(struct rpc_task *, __be32 *); 117 __be32 * (*crvalidate)(struct rpc_task *, __be32 *);
@@ -140,7 +140,7 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *
140void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); 140void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
141struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); 141struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
142void rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int); 142void rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int);
143void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *); 143void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
144void put_rpccred(struct rpc_cred *); 144void put_rpccred(struct rpc_cred *);
145void rpcauth_unbindcred(struct rpc_task *); 145void rpcauth_unbindcred(struct rpc_task *);
146__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); 146__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index ab3f6e90caa5..8ed9642a5a76 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -22,6 +22,7 @@
22#include <linux/sunrpc/timer.h> 22#include <linux/sunrpc/timer.h>
23#include <asm/signal.h> 23#include <asm/signal.h>
24#include <linux/path.h> 24#include <linux/path.h>
25#include <net/ipv6.h>
25 26
26struct rpc_inode; 27struct rpc_inode;
27 28
@@ -113,6 +114,7 @@ struct rpc_create_args {
113 rpc_authflavor_t authflavor; 114 rpc_authflavor_t authflavor;
114 unsigned long flags; 115 unsigned long flags;
115 char *client_name; 116 char *client_name;
117 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
116}; 118};
117 119
118/* Values for "flags" field */ 120/* Values for "flags" field */
@@ -188,5 +190,117 @@ static inline void rpc_set_port(struct sockaddr *sap,
188#define IPV6_SCOPE_DELIMITER '%' 190#define IPV6_SCOPE_DELIMITER '%'
189#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") 191#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn")
190 192
193static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
194 const struct sockaddr *sap2)
195{
196 const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
197 const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
198
199 return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
200}
201
202static inline bool __rpc_copy_addr4(struct sockaddr *dst,
203 const struct sockaddr *src)
204{
205 const struct sockaddr_in *ssin = (struct sockaddr_in *) src;
206 struct sockaddr_in *dsin = (struct sockaddr_in *) dst;
207
208 dsin->sin_family = ssin->sin_family;
209 dsin->sin_addr.s_addr = ssin->sin_addr.s_addr;
210 return true;
211}
212
213#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
214static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
215 const struct sockaddr *sap2)
216{
217 const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
218 const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
219 return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
220}
221
222static inline bool __rpc_copy_addr6(struct sockaddr *dst,
223 const struct sockaddr *src)
224{
225 const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src;
226 struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
227
228 dsin6->sin6_family = ssin6->sin6_family;
229 ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr);
230 return true;
231}
232#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
233static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
234 const struct sockaddr *sap2)
235{
236 return false;
237}
238
239static inline bool __rpc_copy_addr6(struct sockaddr *dst,
240 const struct sockaddr *src)
241{
242 return false;
243}
244#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
245
246/**
247 * rpc_cmp_addr - compare the address portion of two sockaddrs.
248 * @sap1: first sockaddr
249 * @sap2: second sockaddr
250 *
251 * Just compares the family and address portion. Ignores port, scope, etc.
252 * Returns true if the addrs are equal, false if they aren't.
253 */
254static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
255 const struct sockaddr *sap2)
256{
257 if (sap1->sa_family == sap2->sa_family) {
258 switch (sap1->sa_family) {
259 case AF_INET:
260 return __rpc_cmp_addr4(sap1, sap2);
261 case AF_INET6:
262 return __rpc_cmp_addr6(sap1, sap2);
263 }
264 }
265 return false;
266}
267
268/**
269 * rpc_copy_addr - copy the address portion of one sockaddr to another
270 * @dst: destination sockaddr
271 * @src: source sockaddr
272 *
273 * Just copies the address portion and family. Ignores port, scope, etc.
274 * Caller is responsible for making certain that dst is large enough to hold
275 * the address in src. Returns true if address family is supported. Returns
276 * false otherwise.
277 */
278static inline bool rpc_copy_addr(struct sockaddr *dst,
279 const struct sockaddr *src)
280{
281 switch (src->sa_family) {
282 case AF_INET:
283 return __rpc_copy_addr4(dst, src);
284 case AF_INET6:
285 return __rpc_copy_addr6(dst, src);
286 }
287 return false;
288}
289
290/**
291 * rpc_get_scope_id - return scopeid for a given sockaddr
292 * @sa: sockaddr to get scopeid from
293 *
294 * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if
295 * not an AF_INET6 address.
296 */
297static inline u32 rpc_get_scope_id(const struct sockaddr *sa)
298{
299 if (sa->sa_family != AF_INET6)
300 return 0;
301
302 return ((struct sockaddr_in6 *) sa)->sin6_scope_id;
303}
304
191#endif /* __KERNEL__ */ 305#endif /* __KERNEL__ */
192#endif /* _LINUX_SUNRPC_CLNT_H */ 306#endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index ea8009695c69..52e8cb0a7569 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -94,8 +94,6 @@ struct svc_serv {
94 struct module * sv_module; /* optional module to count when 94 struct module * sv_module; /* optional module to count when
95 * adding threads */ 95 * adding threads */
96 svc_thread_fn sv_function; /* main function for threads */ 96 svc_thread_fn sv_function; /* main function for threads */
97 unsigned int sv_drc_max_pages; /* Total pages for DRC */
98 unsigned int sv_drc_pages_used;/* DRC pages used */
99#if defined(CONFIG_NFS_V4_1) 97#if defined(CONFIG_NFS_V4_1)
100 struct list_head sv_cb_list; /* queue for callback requests 98 struct list_head sv_cb_list; /* queue for callback requests
101 * that arrive over the same 99 * that arrive over the same
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 2223ae0b5ed5..5f4e18b3ce73 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -65,6 +65,7 @@ struct svc_xprt {
65 size_t xpt_locallen; /* length of address */ 65 size_t xpt_locallen; /* length of address */
66 struct sockaddr_storage xpt_remote; /* remote peer's address */ 66 struct sockaddr_storage xpt_remote; /* remote peer's address */
67 size_t xpt_remotelen; /* length of address */ 67 size_t xpt_remotelen; /* length of address */
68 struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */
68}; 69};
69 70
70int svc_reg_xprt_class(struct svc_xprt_class *); 71int svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 04dba23c59f2..1b353a76c304 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -28,6 +28,7 @@ struct svc_sock {
28 /* private TCP part */ 28 /* private TCP part */
29 u32 sk_reclen; /* length of record */ 29 u32 sk_reclen; /* length of record */
30 u32 sk_tcplen; /* current read length */ 30 u32 sk_tcplen; /* current read length */
31 struct rpc_xprt *sk_bc_xprt; /* NFSv4.1 backchannel xprt */
31}; 32};
32 33
33/* 34/*
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index c090df442572..6f9457a75b8f 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -124,6 +124,23 @@ struct rpc_xprt_ops {
124 void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); 124 void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
125}; 125};
126 126
127/*
128 * RPC transport identifiers
129 *
130 * To preserve compatibility with the historical use of raw IP protocol
131 * id's for transport selection, UDP and TCP identifiers are specified
132 * with the previous values. No such restriction exists for new transports,
133 * except that they may not collide with these values (17 and 6,
134 * respectively).
135 */
136#define XPRT_TRANSPORT_BC (1 << 31)
137enum xprt_transports {
138 XPRT_TRANSPORT_UDP = IPPROTO_UDP,
139 XPRT_TRANSPORT_TCP = IPPROTO_TCP,
140 XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC,
141 XPRT_TRANSPORT_RDMA = 256
142};
143
127struct rpc_xprt { 144struct rpc_xprt {
128 struct kref kref; /* Reference count */ 145 struct kref kref; /* Reference count */
129 struct rpc_xprt_ops * ops; /* transport methods */ 146 struct rpc_xprt_ops * ops; /* transport methods */
@@ -179,6 +196,7 @@ struct rpc_xprt {
179 spinlock_t reserve_lock; /* lock slot table */ 196 spinlock_t reserve_lock; /* lock slot table */
180 u32 xid; /* Next XID value to use */ 197 u32 xid; /* Next XID value to use */
181 struct rpc_task * snd_task; /* Task blocked in send */ 198 struct rpc_task * snd_task; /* Task blocked in send */
199 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
182#if defined(CONFIG_NFS_V4_1) 200#if defined(CONFIG_NFS_V4_1)
183 struct svc_serv *bc_serv; /* The RPC service which will */ 201 struct svc_serv *bc_serv; /* The RPC service which will */
184 /* process the callback */ 202 /* process the callback */
@@ -231,6 +249,7 @@ struct xprt_create {
231 struct sockaddr * srcaddr; /* optional local address */ 249 struct sockaddr * srcaddr; /* optional local address */
232 struct sockaddr * dstaddr; /* remote peer address */ 250 struct sockaddr * dstaddr; /* remote peer address */
233 size_t addrlen; 251 size_t addrlen;
252 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
234}; 253};
235 254
236struct xprt_class { 255struct xprt_class {
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 54a379c9e8eb..c2f04e1ae159 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -41,11 +41,6 @@
41#define _LINUX_SUNRPC_XPRTRDMA_H 41#define _LINUX_SUNRPC_XPRTRDMA_H
42 42
43/* 43/*
44 * RPC transport identifier for RDMA
45 */
46#define XPRT_TRANSPORT_RDMA 256
47
48/*
49 * rpcbind (v3+) RDMA netid. 44 * rpcbind (v3+) RDMA netid.
50 */ 45 */
51#define RPCBIND_NETID_RDMA "rdma" 46#define RPCBIND_NETID_RDMA "rdma"
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index c2a46c45c8f7..3f14a02e9cc0 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -13,17 +13,6 @@ int init_socket_xprt(void);
13void cleanup_socket_xprt(void); 13void cleanup_socket_xprt(void);
14 14
15/* 15/*
16 * RPC transport identifiers for UDP, TCP
17 *
18 * To preserve compatibility with the historical use of raw IP protocol
19 * id's for transport selection, these are specified with the previous
20 * values. No such restriction exists for new transports, except that
21 * they may not collide with these values (17 and 6, respectively).
22 */
23#define XPRT_TRANSPORT_UDP IPPROTO_UDP
24#define XPRT_TRANSPORT_TCP IPPROTO_TCP
25
26/*
27 * RPC slot table sizes for UDP, TCP transports 16 * RPC slot table sizes for UDP, TCP transports
28 */ 17 */
29extern unsigned int xprt_udp_slot_table_entries; 18extern unsigned int xprt_udp_slot_table_entries;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 0c431c277af5..54a4e042f104 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -385,7 +385,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
385EXPORT_SYMBOL_GPL(rpcauth_init_cred); 385EXPORT_SYMBOL_GPL(rpcauth_init_cred);
386 386
387void 387void
388rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) 388rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
389{ 389{
390 task->tk_msg.rpc_cred = get_rpccred(cred); 390 task->tk_msg.rpc_cred = get_rpccred(cred);
391 dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, 391 dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
@@ -394,7 +394,7 @@ rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
394EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); 394EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
395 395
396static void 396static void
397rpcauth_bind_root_cred(struct rpc_task *task) 397rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
398{ 398{
399 struct rpc_auth *auth = task->tk_client->cl_auth; 399 struct rpc_auth *auth = task->tk_client->cl_auth;
400 struct auth_cred acred = { 400 struct auth_cred acred = {
@@ -405,7 +405,7 @@ rpcauth_bind_root_cred(struct rpc_task *task)
405 405
406 dprintk("RPC: %5u looking up %s cred\n", 406 dprintk("RPC: %5u looking up %s cred\n",
407 task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); 407 task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
408 ret = auth->au_ops->lookup_cred(auth, &acred, 0); 408 ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
409 if (!IS_ERR(ret)) 409 if (!IS_ERR(ret))
410 task->tk_msg.rpc_cred = ret; 410 task->tk_msg.rpc_cred = ret;
411 else 411 else
@@ -413,14 +413,14 @@ rpcauth_bind_root_cred(struct rpc_task *task)
413} 413}
414 414
415static void 415static void
416rpcauth_bind_new_cred(struct rpc_task *task) 416rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
417{ 417{
418 struct rpc_auth *auth = task->tk_client->cl_auth; 418 struct rpc_auth *auth = task->tk_client->cl_auth;
419 struct rpc_cred *ret; 419 struct rpc_cred *ret;
420 420
421 dprintk("RPC: %5u looking up %s cred\n", 421 dprintk("RPC: %5u looking up %s cred\n",
422 task->tk_pid, auth->au_ops->au_name); 422 task->tk_pid, auth->au_ops->au_name);
423 ret = rpcauth_lookupcred(auth, 0); 423 ret = rpcauth_lookupcred(auth, lookupflags);
424 if (!IS_ERR(ret)) 424 if (!IS_ERR(ret))
425 task->tk_msg.rpc_cred = ret; 425 task->tk_msg.rpc_cred = ret;
426 else 426 else
@@ -430,12 +430,16 @@ rpcauth_bind_new_cred(struct rpc_task *task)
430void 430void
431rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) 431rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
432{ 432{
433 int lookupflags = 0;
434
435 if (flags & RPC_TASK_ASYNC)
436 lookupflags |= RPCAUTH_LOOKUP_NEW;
433 if (cred != NULL) 437 if (cred != NULL)
434 cred->cr_ops->crbind(task, cred); 438 cred->cr_ops->crbind(task, cred, lookupflags);
435 else if (flags & RPC_TASK_ROOTCREDS) 439 else if (flags & RPC_TASK_ROOTCREDS)
436 rpcauth_bind_root_cred(task); 440 rpcauth_bind_root_cred(task, lookupflags);
437 else 441 else
438 rpcauth_bind_new_cred(task); 442 rpcauth_bind_new_cred(task, lookupflags);
439} 443}
440 444
441void 445void
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 4028502f0528..bf88bf8e9365 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -55,13 +55,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void)
55EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); 55EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
56 56
57static void 57static void
58generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) 58generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
59{ 59{
60 struct rpc_auth *auth = task->tk_client->cl_auth; 60 struct rpc_auth *auth = task->tk_client->cl_auth;
61 struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; 61 struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
62 struct rpc_cred *ret; 62 struct rpc_cred *ret;
63 63
64 ret = auth->au_ops->lookup_cred(auth, acred, 0); 64 ret = auth->au_ops->lookup_cred(auth, acred, lookupflags);
65 if (!IS_ERR(ret)) 65 if (!IS_ERR(ret))
66 task->tk_msg.rpc_cred = ret; 66 task->tk_msg.rpc_cred = ret;
67 else 67 else
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 2e6a148d277c..f6c51e562a02 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1374,8 +1374,10 @@ svcauth_gss_release(struct svc_rqst *rqstp)
1374 if (stat) 1374 if (stat)
1375 goto out_err; 1375 goto out_err;
1376 break; 1376 break;
1377 default: 1377 /*
1378 goto out_err; 1378 * For any other gc_svc value, svcauth_gss_accept() already set
1379 * the auth_error appropriately; just fall through:
1380 */
1379 } 1381 }
1380 1382
1381out: 1383out:
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 45cdaff9b361..d6eee291a0e2 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -103,23 +103,21 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
103EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); 103EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
104 104
105 105
106static void queue_loose(struct cache_detail *detail, struct cache_head *ch); 106static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
107 107
108static int cache_fresh_locked(struct cache_head *head, time_t expiry) 108static void cache_fresh_locked(struct cache_head *head, time_t expiry)
109{ 109{
110 head->expiry_time = expiry; 110 head->expiry_time = expiry;
111 head->last_refresh = get_seconds(); 111 head->last_refresh = get_seconds();
112 return !test_and_set_bit(CACHE_VALID, &head->flags); 112 set_bit(CACHE_VALID, &head->flags);
113} 113}
114 114
115static void cache_fresh_unlocked(struct cache_head *head, 115static void cache_fresh_unlocked(struct cache_head *head,
116 struct cache_detail *detail, int new) 116 struct cache_detail *detail)
117{ 117{
118 if (new)
119 cache_revisit_request(head);
120 if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { 118 if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
121 cache_revisit_request(head); 119 cache_revisit_request(head);
122 queue_loose(detail, head); 120 cache_dequeue(detail, head);
123 } 121 }
124} 122}
125 123
@@ -132,7 +130,6 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
132 */ 130 */
133 struct cache_head **head; 131 struct cache_head **head;
134 struct cache_head *tmp; 132 struct cache_head *tmp;
135 int is_new;
136 133
137 if (!test_bit(CACHE_VALID, &old->flags)) { 134 if (!test_bit(CACHE_VALID, &old->flags)) {
138 write_lock(&detail->hash_lock); 135 write_lock(&detail->hash_lock);
@@ -141,9 +138,9 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
141 set_bit(CACHE_NEGATIVE, &old->flags); 138 set_bit(CACHE_NEGATIVE, &old->flags);
142 else 139 else
143 detail->update(old, new); 140 detail->update(old, new);
144 is_new = cache_fresh_locked(old, new->expiry_time); 141 cache_fresh_locked(old, new->expiry_time);
145 write_unlock(&detail->hash_lock); 142 write_unlock(&detail->hash_lock);
146 cache_fresh_unlocked(old, detail, is_new); 143 cache_fresh_unlocked(old, detail);
147 return old; 144 return old;
148 } 145 }
149 write_unlock(&detail->hash_lock); 146 write_unlock(&detail->hash_lock);
@@ -167,11 +164,11 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
167 *head = tmp; 164 *head = tmp;
168 detail->entries++; 165 detail->entries++;
169 cache_get(tmp); 166 cache_get(tmp);
170 is_new = cache_fresh_locked(tmp, new->expiry_time); 167 cache_fresh_locked(tmp, new->expiry_time);
171 cache_fresh_locked(old, 0); 168 cache_fresh_locked(old, 0);
172 write_unlock(&detail->hash_lock); 169 write_unlock(&detail->hash_lock);
173 cache_fresh_unlocked(tmp, detail, is_new); 170 cache_fresh_unlocked(tmp, detail);
174 cache_fresh_unlocked(old, detail, 0); 171 cache_fresh_unlocked(old, detail);
175 cache_put(old, detail); 172 cache_put(old, detail);
176 return tmp; 173 return tmp;
177} 174}
@@ -184,6 +181,22 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
184 return cd->cache_upcall(cd, h); 181 return cd->cache_upcall(cd, h);
185} 182}
186 183
184static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
185{
186 if (!test_bit(CACHE_VALID, &h->flags) ||
187 h->expiry_time < get_seconds())
188 return -EAGAIN;
189 else if (detail->flush_time > h->last_refresh)
190 return -EAGAIN;
191 else {
192 /* entry is valid */
193 if (test_bit(CACHE_NEGATIVE, &h->flags))
194 return -ENOENT;
195 else
196 return 0;
197 }
198}
199
187/* 200/*
188 * This is the generic cache management routine for all 201 * This is the generic cache management routine for all
189 * the authentication caches. 202 * the authentication caches.
@@ -192,8 +205,10 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
192 * 205 *
193 * 206 *
194 * Returns 0 if the cache_head can be used, or cache_puts it and returns 207 * Returns 0 if the cache_head can be used, or cache_puts it and returns
195 * -EAGAIN if upcall is pending, 208 * -EAGAIN if upcall is pending and request has been queued
196 * -ETIMEDOUT if upcall failed and should be retried, 209 * -ETIMEDOUT if upcall failed or request could not be queue or
210 * upcall completed but item is still invalid (implying that
211 * the cache item has been replaced with a newer one).
197 * -ENOENT if cache entry was negative 212 * -ENOENT if cache entry was negative
198 */ 213 */
199int cache_check(struct cache_detail *detail, 214int cache_check(struct cache_detail *detail,
@@ -203,17 +218,7 @@ int cache_check(struct cache_detail *detail,
203 long refresh_age, age; 218 long refresh_age, age;
204 219
205 /* First decide return status as best we can */ 220 /* First decide return status as best we can */
206 if (!test_bit(CACHE_VALID, &h->flags) || 221 rv = cache_is_valid(detail, h);
207 h->expiry_time < get_seconds())
208 rv = -EAGAIN;
209 else if (detail->flush_time > h->last_refresh)
210 rv = -EAGAIN;
211 else {
212 /* entry is valid */
213 if (test_bit(CACHE_NEGATIVE, &h->flags))
214 rv = -ENOENT;
215 else rv = 0;
216 }
217 222
218 /* now see if we want to start an upcall */ 223 /* now see if we want to start an upcall */
219 refresh_age = (h->expiry_time - h->last_refresh); 224 refresh_age = (h->expiry_time - h->last_refresh);
@@ -229,10 +234,11 @@ int cache_check(struct cache_detail *detail,
229 switch (cache_make_upcall(detail, h)) { 234 switch (cache_make_upcall(detail, h)) {
230 case -EINVAL: 235 case -EINVAL:
231 clear_bit(CACHE_PENDING, &h->flags); 236 clear_bit(CACHE_PENDING, &h->flags);
237 cache_revisit_request(h);
232 if (rv == -EAGAIN) { 238 if (rv == -EAGAIN) {
233 set_bit(CACHE_NEGATIVE, &h->flags); 239 set_bit(CACHE_NEGATIVE, &h->flags);
234 cache_fresh_unlocked(h, detail, 240 cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY);
235 cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY)); 241 cache_fresh_unlocked(h, detail);
236 rv = -ENOENT; 242 rv = -ENOENT;
237 } 243 }
238 break; 244 break;
@@ -245,10 +251,14 @@ int cache_check(struct cache_detail *detail,
245 } 251 }
246 } 252 }
247 253
248 if (rv == -EAGAIN) 254 if (rv == -EAGAIN) {
249 if (cache_defer_req(rqstp, h) != 0) 255 if (cache_defer_req(rqstp, h) < 0) {
250 rv = -ETIMEDOUT; 256 /* Request is not deferred */
251 257 rv = cache_is_valid(detail, h);
258 if (rv == -EAGAIN)
259 rv = -ETIMEDOUT;
260 }
261 }
252 if (rv) 262 if (rv)
253 cache_put(h, detail); 263 cache_put(h, detail);
254 return rv; 264 return rv;
@@ -396,7 +406,7 @@ static int cache_clean(void)
396 ) 406 )
397 continue; 407 continue;
398 if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) 408 if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
399 queue_loose(current_detail, ch); 409 cache_dequeue(current_detail, ch);
400 410
401 if (atomic_read(&ch->ref.refcount) == 1) 411 if (atomic_read(&ch->ref.refcount) == 1)
402 break; 412 break;
@@ -412,8 +422,10 @@ static int cache_clean(void)
412 if (!ch) 422 if (!ch)
413 current_index ++; 423 current_index ++;
414 spin_unlock(&cache_list_lock); 424 spin_unlock(&cache_list_lock);
415 if (ch) 425 if (ch) {
426 cache_revisit_request(ch);
416 cache_put(ch, d); 427 cache_put(ch, d);
428 }
417 } else 429 } else
418 spin_unlock(&cache_list_lock); 430 spin_unlock(&cache_list_lock);
419 431
@@ -488,7 +500,7 @@ static int cache_defer_cnt;
488 500
489static int cache_defer_req(struct cache_req *req, struct cache_head *item) 501static int cache_defer_req(struct cache_req *req, struct cache_head *item)
490{ 502{
491 struct cache_deferred_req *dreq; 503 struct cache_deferred_req *dreq, *discard;
492 int hash = DFR_HASH(item); 504 int hash = DFR_HASH(item);
493 505
494 if (cache_defer_cnt >= DFR_MAX) { 506 if (cache_defer_cnt >= DFR_MAX) {
@@ -496,11 +508,11 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
496 * or continue and drop the oldest below 508 * or continue and drop the oldest below
497 */ 509 */
498 if (net_random()&1) 510 if (net_random()&1)
499 return -ETIMEDOUT; 511 return -ENOMEM;
500 } 512 }
501 dreq = req->defer(req); 513 dreq = req->defer(req);
502 if (dreq == NULL) 514 if (dreq == NULL)
503 return -ETIMEDOUT; 515 return -ENOMEM;
504 516
505 dreq->item = item; 517 dreq->item = item;
506 518
@@ -513,23 +525,24 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
513 list_add(&dreq->hash, &cache_defer_hash[hash]); 525 list_add(&dreq->hash, &cache_defer_hash[hash]);
514 526
515 /* it is in, now maybe clean up */ 527 /* it is in, now maybe clean up */
516 dreq = NULL; 528 discard = NULL;
517 if (++cache_defer_cnt > DFR_MAX) { 529 if (++cache_defer_cnt > DFR_MAX) {
518 dreq = list_entry(cache_defer_list.prev, 530 discard = list_entry(cache_defer_list.prev,
519 struct cache_deferred_req, recent); 531 struct cache_deferred_req, recent);
520 list_del(&dreq->recent); 532 list_del_init(&discard->recent);
521 list_del(&dreq->hash); 533 list_del_init(&discard->hash);
522 cache_defer_cnt--; 534 cache_defer_cnt--;
523 } 535 }
524 spin_unlock(&cache_defer_lock); 536 spin_unlock(&cache_defer_lock);
525 537
526 if (dreq) { 538 if (discard)
527 /* there was one too many */ 539 /* there was one too many */
528 dreq->revisit(dreq, 1); 540 discard->revisit(discard, 1);
529 } 541
530 if (!test_bit(CACHE_PENDING, &item->flags)) { 542 if (!test_bit(CACHE_PENDING, &item->flags)) {
531 /* must have just been validated... */ 543 /* must have just been validated... */
532 cache_revisit_request(item); 544 cache_revisit_request(item);
545 return -EAGAIN;
533 } 546 }
534 return 0; 547 return 0;
535} 548}
@@ -551,7 +564,7 @@ static void cache_revisit_request(struct cache_head *item)
551 dreq = list_entry(lp, struct cache_deferred_req, hash); 564 dreq = list_entry(lp, struct cache_deferred_req, hash);
552 lp = lp->next; 565 lp = lp->next;
553 if (dreq->item == item) { 566 if (dreq->item == item) {
554 list_del(&dreq->hash); 567 list_del_init(&dreq->hash);
555 list_move(&dreq->recent, &pending); 568 list_move(&dreq->recent, &pending);
556 cache_defer_cnt--; 569 cache_defer_cnt--;
557 } 570 }
@@ -577,7 +590,7 @@ void cache_clean_deferred(void *owner)
577 590
578 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { 591 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
579 if (dreq->owner == owner) { 592 if (dreq->owner == owner) {
580 list_del(&dreq->hash); 593 list_del_init(&dreq->hash);
581 list_move(&dreq->recent, &pending); 594 list_move(&dreq->recent, &pending);
582 cache_defer_cnt--; 595 cache_defer_cnt--;
583 } 596 }
@@ -887,7 +900,7 @@ static int cache_release(struct inode *inode, struct file *filp,
887 900
888 901
889 902
890static void queue_loose(struct cache_detail *detail, struct cache_head *ch) 903static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
891{ 904{
892 struct cache_queue *cq; 905 struct cache_queue *cq;
893 spin_lock(&queue_lock); 906 spin_lock(&queue_lock);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fac0ca93f06b..a417d5ab5dd7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -288,6 +288,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
288 .srcaddr = args->saddress, 288 .srcaddr = args->saddress,
289 .dstaddr = args->address, 289 .dstaddr = args->address,
290 .addrlen = args->addrsize, 290 .addrlen = args->addrsize,
291 .bc_xprt = args->bc_xprt,
291 }; 292 };
292 char servername[48]; 293 char servername[48];
293 294
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 8f459abe97cf..cef74ba0666c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -21,6 +21,8 @@
21 21
22#include <linux/sunrpc/clnt.h> 22#include <linux/sunrpc/clnt.h>
23 23
24#include "sunrpc.h"
25
24#ifdef RPC_DEBUG 26#ifdef RPC_DEBUG
25#define RPCDBG_FACILITY RPCDBG_SCHED 27#define RPCDBG_FACILITY RPCDBG_SCHED
26#define RPC_TASK_MAGIC_ID 0xf00baa 28#define RPC_TASK_MAGIC_ID 0xf00baa
@@ -711,11 +713,6 @@ static void rpc_async_schedule(struct work_struct *work)
711 __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); 713 __rpc_execute(container_of(work, struct rpc_task, u.tk_work));
712} 714}
713 715
714struct rpc_buffer {
715 size_t len;
716 char data[];
717};
718
719/** 716/**
720 * rpc_malloc - allocate an RPC buffer 717 * rpc_malloc - allocate an RPC buffer
721 * @task: RPC task that will use this buffer 718 * @task: RPC task that will use this buffer
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 5d9dd742264b..90c292e2738b 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -27,11 +27,25 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27#ifndef _NET_SUNRPC_SUNRPC_H 27#ifndef _NET_SUNRPC_SUNRPC_H
28#define _NET_SUNRPC_SUNRPC_H 28#define _NET_SUNRPC_SUNRPC_H
29 29
30#include <linux/net.h>
31
32/*
33 * Header for dynamically allocated rpc buffers.
34 */
35struct rpc_buffer {
36 size_t len;
37 char data[];
38};
39
30static inline int rpc_reply_expected(struct rpc_task *task) 40static inline int rpc_reply_expected(struct rpc_task *task)
31{ 41{
32 return (task->tk_msg.rpc_proc != NULL) && 42 return (task->tk_msg.rpc_proc != NULL) &&
33 (task->tk_msg.rpc_proc->p_decode != NULL); 43 (task->tk_msg.rpc_proc->p_decode != NULL);
34} 44}
35 45
46int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
47 struct page *headpage, unsigned long headoffset,
48 struct page *tailpage, unsigned long tailoffset);
49
36#endif /* _NET_SUNRPC_SUNRPC_H */ 50#endif /* _NET_SUNRPC_SUNRPC_H */
37 51
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 27d44332f017..df124f78ee48 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -160,6 +160,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
160 mutex_init(&xprt->xpt_mutex); 160 mutex_init(&xprt->xpt_mutex);
161 spin_lock_init(&xprt->xpt_lock); 161 spin_lock_init(&xprt->xpt_lock);
162 set_bit(XPT_BUSY, &xprt->xpt_flags); 162 set_bit(XPT_BUSY, &xprt->xpt_flags);
163 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
163} 164}
164EXPORT_SYMBOL_GPL(svc_xprt_init); 165EXPORT_SYMBOL_GPL(svc_xprt_init);
165 166
@@ -710,10 +711,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
710 spin_unlock_bh(&pool->sp_lock); 711 spin_unlock_bh(&pool->sp_lock);
711 712
712 len = 0; 713 len = 0;
713 if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { 714 if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
714 dprintk("svc_recv: found XPT_CLOSE\n");
715 svc_delete_xprt(xprt);
716 } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
717 struct svc_xprt *newxpt; 715 struct svc_xprt *newxpt;
718 newxpt = xprt->xpt_ops->xpo_accept(xprt); 716 newxpt = xprt->xpt_ops->xpo_accept(xprt);
719 if (newxpt) { 717 if (newxpt) {
@@ -739,7 +737,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
739 svc_xprt_received(newxpt); 737 svc_xprt_received(newxpt);
740 } 738 }
741 svc_xprt_received(xprt); 739 svc_xprt_received(xprt);
742 } else { 740 } else if (!test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
743 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", 741 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
744 rqstp, pool->sp_id, xprt, 742 rqstp, pool->sp_id, xprt,
745 atomic_read(&xprt->xpt_ref.refcount)); 743 atomic_read(&xprt->xpt_ref.refcount));
@@ -752,6 +750,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
752 dprintk("svc: got len=%d\n", len); 750 dprintk("svc: got len=%d\n", len);
753 } 751 }
754 752
753 if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
754 dprintk("svc_recv: found XPT_CLOSE\n");
755 svc_delete_xprt(xprt);
756 }
757
755 /* No data, incomplete (TCP) read, or accept() */ 758 /* No data, incomplete (TCP) read, or accept() */
756 if (len == 0 || len == -EAGAIN) { 759 if (len == 0 || len == -EAGAIN) {
757 rqstp->rq_res.len = 0; 760 rqstp->rq_res.len = 0;
@@ -808,6 +811,7 @@ int svc_send(struct svc_rqst *rqstp)
808 else 811 else
809 len = xprt->xpt_ops->xpo_sendto(rqstp); 812 len = xprt->xpt_ops->xpo_sendto(rqstp);
810 mutex_unlock(&xprt->xpt_mutex); 813 mutex_unlock(&xprt->xpt_mutex);
814 rpc_wake_up(&xprt->xpt_bc_pending);
811 svc_xprt_release(rqstp); 815 svc_xprt_release(rqstp);
812 816
813 if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) 817 if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
@@ -1166,11 +1170,6 @@ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
1166 1170
1167 dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); 1171 dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
1168 1172
1169 lock_kernel();
1170 /* bump up the pseudo refcount while traversing */
1171 svc_get(serv);
1172 unlock_kernel();
1173
1174 if (!pidx) 1173 if (!pidx)
1175 return SEQ_START_TOKEN; 1174 return SEQ_START_TOKEN;
1176 return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]); 1175 return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
@@ -1198,12 +1197,6 @@ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
1198 1197
1199static void svc_pool_stats_stop(struct seq_file *m, void *p) 1198static void svc_pool_stats_stop(struct seq_file *m, void *p)
1200{ 1199{
1201 struct svc_serv *serv = m->private;
1202
1203 lock_kernel();
1204 /* this function really, really should have been called svc_put() */
1205 svc_destroy(serv);
1206 unlock_kernel();
1207} 1200}
1208 1201
1209static int svc_pool_stats_show(struct seq_file *m, void *p) 1202static int svc_pool_stats_show(struct seq_file *m, void *p)
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 6caffa34ac01..117f68a8aa40 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -668,6 +668,7 @@ static int unix_gid_find(uid_t uid, struct group_info **gip,
668 case 0: 668 case 0:
669 *gip = ug->gi; 669 *gip = ug->gi;
670 get_group_info(*gip); 670 get_group_info(*gip);
671 cache_put(&ug->h, &unix_gid_cache);
671 return 0; 672 return 0;
672 default: 673 default:
673 return -EAGAIN; 674 return -EAGAIN;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 23128ee191ae..ccc5e83cae5d 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -49,6 +49,7 @@
49#include <linux/sunrpc/msg_prot.h> 49#include <linux/sunrpc/msg_prot.h>
50#include <linux/sunrpc/svcsock.h> 50#include <linux/sunrpc/svcsock.h>
51#include <linux/sunrpc/stats.h> 51#include <linux/sunrpc/stats.h>
52#include <linux/sunrpc/xprt.h>
52 53
53#define RPCDBG_FACILITY RPCDBG_SVCXPRT 54#define RPCDBG_FACILITY RPCDBG_SVCXPRT
54 55
@@ -153,49 +154,27 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
153} 154}
154 155
155/* 156/*
156 * Generic sendto routine 157 * send routine intended to be shared by the fore- and back-channel
157 */ 158 */
158static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) 159int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
160 struct page *headpage, unsigned long headoffset,
161 struct page *tailpage, unsigned long tailoffset)
159{ 162{
160 struct svc_sock *svsk =
161 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
162 struct socket *sock = svsk->sk_sock;
163 int slen;
164 union {
165 struct cmsghdr hdr;
166 long all[SVC_PKTINFO_SPACE / sizeof(long)];
167 } buffer;
168 struct cmsghdr *cmh = &buffer.hdr;
169 int len = 0;
170 int result; 163 int result;
171 int size; 164 int size;
172 struct page **ppage = xdr->pages; 165 struct page **ppage = xdr->pages;
173 size_t base = xdr->page_base; 166 size_t base = xdr->page_base;
174 unsigned int pglen = xdr->page_len; 167 unsigned int pglen = xdr->page_len;
175 unsigned int flags = MSG_MORE; 168 unsigned int flags = MSG_MORE;
176 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 169 int slen;
170 int len = 0;
177 171
178 slen = xdr->len; 172 slen = xdr->len;
179 173
180 if (rqstp->rq_prot == IPPROTO_UDP) {
181 struct msghdr msg = {
182 .msg_name = &rqstp->rq_addr,
183 .msg_namelen = rqstp->rq_addrlen,
184 .msg_control = cmh,
185 .msg_controllen = sizeof(buffer),
186 .msg_flags = MSG_MORE,
187 };
188
189 svc_set_cmsg_data(rqstp, cmh);
190
191 if (sock_sendmsg(sock, &msg, 0) < 0)
192 goto out;
193 }
194
195 /* send head */ 174 /* send head */
196 if (slen == xdr->head[0].iov_len) 175 if (slen == xdr->head[0].iov_len)
197 flags = 0; 176 flags = 0;
198 len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, 177 len = kernel_sendpage(sock, headpage, headoffset,
199 xdr->head[0].iov_len, flags); 178 xdr->head[0].iov_len, flags);
200 if (len != xdr->head[0].iov_len) 179 if (len != xdr->head[0].iov_len)
201 goto out; 180 goto out;
@@ -219,16 +198,58 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
219 base = 0; 198 base = 0;
220 ppage++; 199 ppage++;
221 } 200 }
201
222 /* send tail */ 202 /* send tail */
223 if (xdr->tail[0].iov_len) { 203 if (xdr->tail[0].iov_len) {
224 result = kernel_sendpage(sock, rqstp->rq_respages[0], 204 result = kernel_sendpage(sock, tailpage, tailoffset,
225 ((unsigned long)xdr->tail[0].iov_base) 205 xdr->tail[0].iov_len, 0);
226 & (PAGE_SIZE-1),
227 xdr->tail[0].iov_len, 0);
228
229 if (result > 0) 206 if (result > 0)
230 len += result; 207 len += result;
231 } 208 }
209
210out:
211 return len;
212}
213
214
215/*
216 * Generic sendto routine
217 */
218static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
219{
220 struct svc_sock *svsk =
221 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
222 struct socket *sock = svsk->sk_sock;
223 union {
224 struct cmsghdr hdr;
225 long all[SVC_PKTINFO_SPACE / sizeof(long)];
226 } buffer;
227 struct cmsghdr *cmh = &buffer.hdr;
228 int len = 0;
229 unsigned long tailoff;
230 unsigned long headoff;
231 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
232
233 if (rqstp->rq_prot == IPPROTO_UDP) {
234 struct msghdr msg = {
235 .msg_name = &rqstp->rq_addr,
236 .msg_namelen = rqstp->rq_addrlen,
237 .msg_control = cmh,
238 .msg_controllen = sizeof(buffer),
239 .msg_flags = MSG_MORE,
240 };
241
242 svc_set_cmsg_data(rqstp, cmh);
243
244 if (sock_sendmsg(sock, &msg, 0) < 0)
245 goto out;
246 }
247
248 tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
249 headoff = 0;
250 len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
251 rqstp->rq_respages[0], tailoff);
252
232out: 253out:
233 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", 254 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
234 svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, 255 svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
@@ -432,29 +453,49 @@ static void svc_tcp_write_space(struct sock *sk)
432} 453}
433 454
434/* 455/*
456 * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
457 */
458static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
459 struct cmsghdr *cmh)
460{
461 struct in_pktinfo *pki = CMSG_DATA(cmh);
462 if (cmh->cmsg_type != IP_PKTINFO)
463 return 0;
464 rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
465 return 1;
466}
467
468/*
469 * See net/ipv6/datagram.c : datagram_recv_ctl
470 */
471static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
472 struct cmsghdr *cmh)
473{
474 struct in6_pktinfo *pki = CMSG_DATA(cmh);
475 if (cmh->cmsg_type != IPV6_PKTINFO)
476 return 0;
477 ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
478 return 1;
479}
480
481/*
435 * Copy the UDP datagram's destination address to the rqstp structure. 482 * Copy the UDP datagram's destination address to the rqstp structure.
436 * The 'destination' address in this case is the address to which the 483 * The 'destination' address in this case is the address to which the
437 * peer sent the datagram, i.e. our local address. For multihomed 484 * peer sent the datagram, i.e. our local address. For multihomed
438 * hosts, this can change from msg to msg. Note that only the IP 485 * hosts, this can change from msg to msg. Note that only the IP
439 * address changes, the port number should remain the same. 486 * address changes, the port number should remain the same.
440 */ 487 */
441static void svc_udp_get_dest_address(struct svc_rqst *rqstp, 488static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
442 struct cmsghdr *cmh) 489 struct cmsghdr *cmh)
443{ 490{
444 struct svc_sock *svsk = 491 switch (cmh->cmsg_level) {
445 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 492 case SOL_IP:
446 switch (svsk->sk_sk->sk_family) { 493 return svc_udp_get_dest_address4(rqstp, cmh);
447 case AF_INET: { 494 case SOL_IPV6:
448 struct in_pktinfo *pki = CMSG_DATA(cmh); 495 return svc_udp_get_dest_address6(rqstp, cmh);
449 rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
450 break;
451 }
452 case AF_INET6: {
453 struct in6_pktinfo *pki = CMSG_DATA(cmh);
454 ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
455 break;
456 }
457 } 496 }
497
498 return 0;
458} 499}
459 500
460/* 501/*
@@ -531,16 +572,15 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
531 572
532 rqstp->rq_prot = IPPROTO_UDP; 573 rqstp->rq_prot = IPPROTO_UDP;
533 574
534 if (cmh->cmsg_level != IPPROTO_IP || 575 if (!svc_udp_get_dest_address(rqstp, cmh)) {
535 cmh->cmsg_type != IP_PKTINFO) {
536 if (net_ratelimit()) 576 if (net_ratelimit())
537 printk("rpcsvc: received unknown control message:" 577 printk(KERN_WARNING
538 "%d/%d\n", 578 "svc: received unknown control message %d/%d; "
539 cmh->cmsg_level, cmh->cmsg_type); 579 "dropping RPC reply datagram\n",
580 cmh->cmsg_level, cmh->cmsg_type);
540 skb_free_datagram(svsk->sk_sk, skb); 581 skb_free_datagram(svsk->sk_sk, skb);
541 return 0; 582 return 0;
542 } 583 }
543 svc_udp_get_dest_address(rqstp, cmh);
544 584
545 if (skb_is_nonlinear(skb)) { 585 if (skb_is_nonlinear(skb)) {
546 /* we have to copy */ 586 /* we have to copy */
@@ -651,8 +691,7 @@ static struct svc_xprt_class svc_udp_class = {
651 691
652static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) 692static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
653{ 693{
654 int one = 1; 694 int err, level, optname, one = 1;
655 mm_segment_t oldfs;
656 695
657 svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); 696 svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
658 clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); 697 clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
@@ -671,12 +710,22 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
671 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 710 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
672 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 711 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
673 712
674 oldfs = get_fs();
675 set_fs(KERNEL_DS);
676 /* make sure we get destination address info */ 713 /* make sure we get destination address info */
677 svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO, 714 switch (svsk->sk_sk->sk_family) {
678 (char __user *)&one, sizeof(one)); 715 case AF_INET:
679 set_fs(oldfs); 716 level = SOL_IP;
717 optname = IP_PKTINFO;
718 break;
719 case AF_INET6:
720 level = SOL_IPV6;
721 optname = IPV6_RECVPKTINFO;
722 break;
723 default:
724 BUG();
725 }
726 err = kernel_setsockopt(svsk->sk_sock, level, optname,
727 (char *)&one, sizeof(one));
728 dprintk("svc: kernel_setsockopt returned %d\n", err);
680} 729}
681 730
682/* 731/*
@@ -826,21 +875,15 @@ failed:
826} 875}
827 876
828/* 877/*
829 * Receive data from a TCP socket. 878 * Receive data.
879 * If we haven't gotten the record length yet, get the next four bytes.
880 * Otherwise try to gobble up as much as possible up to the complete
881 * record length.
830 */ 882 */
831static int svc_tcp_recvfrom(struct svc_rqst *rqstp) 883static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
832{ 884{
833 struct svc_sock *svsk =
834 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
835 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 885 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
836 int len; 886 int len;
837 struct kvec *vec;
838 int pnum, vlen;
839
840 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
841 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
842 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
843 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
844 887
845 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) 888 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
846 /* sndbuf needs to have room for one request 889 /* sndbuf needs to have room for one request
@@ -861,10 +904,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
861 904
862 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 905 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
863 906
864 /* Receive data. If we haven't got the record length yet, get
865 * the next four bytes. Otherwise try to gobble up as much as
866 * possible up to the complete record length.
867 */
868 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 907 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
869 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; 908 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
870 struct kvec iov; 909 struct kvec iov;
@@ -879,7 +918,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
879 dprintk("svc: short recvfrom while reading record " 918 dprintk("svc: short recvfrom while reading record "
880 "length (%d of %d)\n", len, want); 919 "length (%d of %d)\n", len, want);
881 svc_xprt_received(&svsk->sk_xprt); 920 svc_xprt_received(&svsk->sk_xprt);
882 return -EAGAIN; /* record header not complete */ 921 goto err_again; /* record header not complete */
883 } 922 }
884 923
885 svsk->sk_reclen = ntohl(svsk->sk_reclen); 924 svsk->sk_reclen = ntohl(svsk->sk_reclen);
@@ -894,6 +933,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
894 "per record not supported\n"); 933 "per record not supported\n");
895 goto err_delete; 934 goto err_delete;
896 } 935 }
936
897 svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; 937 svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK;
898 dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); 938 dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
899 if (svsk->sk_reclen > serv->sv_max_mesg) { 939 if (svsk->sk_reclen > serv->sv_max_mesg) {
@@ -914,17 +954,121 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
914 dprintk("svc: incomplete TCP record (%d of %d)\n", 954 dprintk("svc: incomplete TCP record (%d of %d)\n",
915 len, svsk->sk_reclen); 955 len, svsk->sk_reclen);
916 svc_xprt_received(&svsk->sk_xprt); 956 svc_xprt_received(&svsk->sk_xprt);
917 return -EAGAIN; /* record not complete */ 957 goto err_again; /* record not complete */
918 } 958 }
919 len = svsk->sk_reclen; 959 len = svsk->sk_reclen;
920 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 960 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
921 961
962 return len;
963 error:
964 if (len == -EAGAIN) {
965 dprintk("RPC: TCP recv_record got EAGAIN\n");
966 svc_xprt_received(&svsk->sk_xprt);
967 }
968 return len;
969 err_delete:
970 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
971 err_again:
972 return -EAGAIN;
973}
974
975static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
976 struct rpc_rqst **reqpp, struct kvec *vec)
977{
978 struct rpc_rqst *req = NULL;
979 u32 *p;
980 u32 xid;
981 u32 calldir;
982 int len;
983
984 len = svc_recvfrom(rqstp, vec, 1, 8);
985 if (len < 0)
986 goto error;
987
988 p = (u32 *)rqstp->rq_arg.head[0].iov_base;
989 xid = *p++;
990 calldir = *p;
991
992 if (calldir == 0) {
993 /* REQUEST is the most common case */
994 vec[0] = rqstp->rq_arg.head[0];
995 } else {
996 /* REPLY */
997 if (svsk->sk_bc_xprt)
998 req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid);
999
1000 if (!req) {
1001 printk(KERN_NOTICE
1002 "%s: Got unrecognized reply: "
1003 "calldir 0x%x sk_bc_xprt %p xid %08x\n",
1004 __func__, ntohl(calldir),
1005 svsk->sk_bc_xprt, xid);
1006 vec[0] = rqstp->rq_arg.head[0];
1007 goto out;
1008 }
1009
1010 memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
1011 sizeof(struct xdr_buf));
1012 /* copy the xid and call direction */
1013 memcpy(req->rq_private_buf.head[0].iov_base,
1014 rqstp->rq_arg.head[0].iov_base, 8);
1015 vec[0] = req->rq_private_buf.head[0];
1016 }
1017 out:
1018 vec[0].iov_base += 8;
1019 vec[0].iov_len -= 8;
1020 len = svsk->sk_reclen - 8;
1021 error:
1022 *reqpp = req;
1023 return len;
1024}
1025
1026/*
1027 * Receive data from a TCP socket.
1028 */
1029static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1030{
1031 struct svc_sock *svsk =
1032 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
1033 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1034 int len;
1035 struct kvec *vec;
1036 int pnum, vlen;
1037 struct rpc_rqst *req = NULL;
1038
1039 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
1040 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
1041 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
1042 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
1043
1044 len = svc_tcp_recv_record(svsk, rqstp);
1045 if (len < 0)
1046 goto error;
1047
922 vec = rqstp->rq_vec; 1048 vec = rqstp->rq_vec;
923 vec[0] = rqstp->rq_arg.head[0]; 1049 vec[0] = rqstp->rq_arg.head[0];
924 vlen = PAGE_SIZE; 1050 vlen = PAGE_SIZE;
1051
1052 /*
1053 * We have enough data for the whole tcp record. Let's try and read the
1054 * first 8 bytes to get the xid and the call direction. We can use this
1055 * to figure out if this is a call or a reply to a callback. If
1056 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
1057 * In that case, don't bother with the calldir and just read the data.
1058 * It will be rejected in svc_process.
1059 */
1060 if (len >= 8) {
1061 len = svc_process_calldir(svsk, rqstp, &req, vec);
1062 if (len < 0)
1063 goto err_again;
1064 vlen -= 8;
1065 }
1066
925 pnum = 1; 1067 pnum = 1;
926 while (vlen < len) { 1068 while (vlen < len) {
927 vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]); 1069 vec[pnum].iov_base = (req) ?
1070 page_address(req->rq_private_buf.pages[pnum - 1]) :
1071 page_address(rqstp->rq_pages[pnum]);
928 vec[pnum].iov_len = PAGE_SIZE; 1072 vec[pnum].iov_len = PAGE_SIZE;
929 pnum++; 1073 pnum++;
930 vlen += PAGE_SIZE; 1074 vlen += PAGE_SIZE;
@@ -934,8 +1078,18 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
934 /* Now receive data */ 1078 /* Now receive data */
935 len = svc_recvfrom(rqstp, vec, pnum, len); 1079 len = svc_recvfrom(rqstp, vec, pnum, len);
936 if (len < 0) 1080 if (len < 0)
937 goto error; 1081 goto err_again;
938 1082
1083 /*
1084 * Account for the 8 bytes we read earlier
1085 */
1086 len += 8;
1087
1088 if (req) {
1089 xprt_complete_rqst(req->rq_task, len);
1090 len = 0;
1091 goto out;
1092 }
939 dprintk("svc: TCP complete record (%d bytes)\n", len); 1093 dprintk("svc: TCP complete record (%d bytes)\n", len);
940 rqstp->rq_arg.len = len; 1094 rqstp->rq_arg.len = len;
941 rqstp->rq_arg.page_base = 0; 1095 rqstp->rq_arg.page_base = 0;
@@ -949,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
949 rqstp->rq_xprt_ctxt = NULL; 1103 rqstp->rq_xprt_ctxt = NULL;
950 rqstp->rq_prot = IPPROTO_TCP; 1104 rqstp->rq_prot = IPPROTO_TCP;
951 1105
1106out:
952 /* Reset TCP read info */ 1107 /* Reset TCP read info */
953 svsk->sk_reclen = 0; 1108 svsk->sk_reclen = 0;
954 svsk->sk_tcplen = 0; 1109 svsk->sk_tcplen = 0;
@@ -960,21 +1115,19 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
960 1115
961 return len; 1116 return len;
962 1117
963 err_delete: 1118err_again:
964 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
965 return -EAGAIN;
966
967 error:
968 if (len == -EAGAIN) { 1119 if (len == -EAGAIN) {
969 dprintk("RPC: TCP recvfrom got EAGAIN\n"); 1120 dprintk("RPC: TCP recvfrom got EAGAIN\n");
970 svc_xprt_received(&svsk->sk_xprt); 1121 svc_xprt_received(&svsk->sk_xprt);
971 } else { 1122 return len;
1123 }
1124error:
1125 if (len != -EAGAIN) {
972 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1126 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
973 svsk->sk_xprt.xpt_server->sv_name, -len); 1127 svsk->sk_xprt.xpt_server->sv_name, -len);
974 goto err_delete; 1128 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
975 } 1129 }
976 1130 return -EAGAIN;
977 return len;
978} 1131}
979 1132
980/* 1133/*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f412a852bc73..fd46d42afa89 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -832,6 +832,11 @@ static void xprt_timer(struct rpc_task *task)
832 spin_unlock_bh(&xprt->transport_lock); 832 spin_unlock_bh(&xprt->transport_lock);
833} 833}
834 834
835static inline int xprt_has_timer(struct rpc_xprt *xprt)
836{
837 return xprt->idle_timeout != 0;
838}
839
835/** 840/**
836 * xprt_prepare_transmit - reserve the transport before sending a request 841 * xprt_prepare_transmit - reserve the transport before sending a request
837 * @task: RPC task about to send a request 842 * @task: RPC task about to send a request
@@ -1013,7 +1018,7 @@ void xprt_release(struct rpc_task *task)
1013 if (!list_empty(&req->rq_list)) 1018 if (!list_empty(&req->rq_list))
1014 list_del(&req->rq_list); 1019 list_del(&req->rq_list);
1015 xprt->last_used = jiffies; 1020 xprt->last_used = jiffies;
1016 if (list_empty(&xprt->recv)) 1021 if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
1017 mod_timer(&xprt->timer, 1022 mod_timer(&xprt->timer,
1018 xprt->last_used + xprt->idle_timeout); 1023 xprt->last_used + xprt->idle_timeout);
1019 spin_unlock_bh(&xprt->transport_lock); 1024 spin_unlock_bh(&xprt->transport_lock);
@@ -1082,8 +1087,11 @@ found:
1082#endif /* CONFIG_NFS_V4_1 */ 1087#endif /* CONFIG_NFS_V4_1 */
1083 1088
1084 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1089 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1085 setup_timer(&xprt->timer, xprt_init_autodisconnect, 1090 if (xprt_has_timer(xprt))
1086 (unsigned long)xprt); 1091 setup_timer(&xprt->timer, xprt_init_autodisconnect,
1092 (unsigned long)xprt);
1093 else
1094 init_timer(&xprt->timer);
1087 xprt->last_used = jiffies; 1095 xprt->last_used = jiffies;
1088 xprt->cwnd = RPC_INITCWND; 1096 xprt->cwnd = RPC_INITCWND;
1089 xprt->bind_index = 0; 1097 xprt->bind_index = 0;
@@ -1102,7 +1110,6 @@ found:
1102 1110
1103 dprintk("RPC: created transport %p with %u slots\n", xprt, 1111 dprintk("RPC: created transport %p with %u slots\n", xprt,
1104 xprt->max_reqs); 1112 xprt->max_reqs);
1105
1106 return xprt; 1113 return xprt;
1107} 1114}
1108 1115
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5151f9f6c573..0cf5e8c27a10 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -730,12 +730,12 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
730 goto err; 730 goto err;
731 731
732 mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); 732 mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
733 if (!mr) 733 if (IS_ERR(mr))
734 goto err_free_frmr; 734 goto err_free_frmr;
735 735
736 pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, 736 pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
737 RPCSVC_MAXPAGES); 737 RPCSVC_MAXPAGES);
738 if (!pl) 738 if (IS_ERR(pl))
739 goto err_free_mr; 739 goto err_free_mr;
740 740
741 frmr->mr = mr; 741 frmr->mr = mr;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 62438f3a914d..bee415465754 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -32,6 +32,7 @@
32#include <linux/tcp.h> 32#include <linux/tcp.h>
33#include <linux/sunrpc/clnt.h> 33#include <linux/sunrpc/clnt.h>
34#include <linux/sunrpc/sched.h> 34#include <linux/sunrpc/sched.h>
35#include <linux/sunrpc/svcsock.h>
35#include <linux/sunrpc/xprtsock.h> 36#include <linux/sunrpc/xprtsock.h>
36#include <linux/file.h> 37#include <linux/file.h>
37#ifdef CONFIG_NFS_V4_1 38#ifdef CONFIG_NFS_V4_1
@@ -43,6 +44,7 @@
43#include <net/udp.h> 44#include <net/udp.h>
44#include <net/tcp.h> 45#include <net/tcp.h>
45 46
47#include "sunrpc.h"
46/* 48/*
47 * xprtsock tunables 49 * xprtsock tunables
48 */ 50 */
@@ -2098,6 +2100,134 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
2098 xprt->stat.bklog_u); 2100 xprt->stat.bklog_u);
2099} 2101}
2100 2102
2103/*
2104 * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason
2105 * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
2106 * to use the server side send routines.
2107 */
2108void *bc_malloc(struct rpc_task *task, size_t size)
2109{
2110 struct page *page;
2111 struct rpc_buffer *buf;
2112
2113 BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer));
2114 page = alloc_page(GFP_KERNEL);
2115
2116 if (!page)
2117 return NULL;
2118
2119 buf = page_address(page);
2120 buf->len = PAGE_SIZE;
2121
2122 return buf->data;
2123}
2124
2125/*
2126 * Free the space allocated in the bc_alloc routine
2127 */
2128void bc_free(void *buffer)
2129{
2130 struct rpc_buffer *buf;
2131
2132 if (!buffer)
2133 return;
2134
2135 buf = container_of(buffer, struct rpc_buffer, data);
2136 free_page((unsigned long)buf);
2137}
2138
2139/*
2140 * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
2141 * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
2142 */
2143static int bc_sendto(struct rpc_rqst *req)
2144{
2145 int len;
2146 struct xdr_buf *xbufp = &req->rq_snd_buf;
2147 struct rpc_xprt *xprt = req->rq_xprt;
2148 struct sock_xprt *transport =
2149 container_of(xprt, struct sock_xprt, xprt);
2150 struct socket *sock = transport->sock;
2151 unsigned long headoff;
2152 unsigned long tailoff;
2153
2154 /*
2155 * Set up the rpc header and record marker stuff
2156 */
2157 xs_encode_tcp_record_marker(xbufp);
2158
2159 tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
2160 headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
2161 len = svc_send_common(sock, xbufp,
2162 virt_to_page(xbufp->head[0].iov_base), headoff,
2163 xbufp->tail[0].iov_base, tailoff);
2164
2165 if (len != xbufp->len) {
2166 printk(KERN_NOTICE "Error sending entire callback!\n");
2167 len = -EAGAIN;
2168 }
2169
2170 return len;
2171}
2172
2173/*
2174 * The send routine. Borrows from svc_send
2175 */
2176static int bc_send_request(struct rpc_task *task)
2177{
2178 struct rpc_rqst *req = task->tk_rqstp;
2179 struct svc_xprt *xprt;
2180 struct svc_sock *svsk;
2181 u32 len;
2182
2183 dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
2184 /*
2185 * Get the server socket associated with this callback xprt
2186 */
2187 xprt = req->rq_xprt->bc_xprt;
2188 svsk = container_of(xprt, struct svc_sock, sk_xprt);
2189
2190 /*
2191 * Grab the mutex to serialize data as the connection is shared
2192 * with the fore channel
2193 */
2194 if (!mutex_trylock(&xprt->xpt_mutex)) {
2195 rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
2196 if (!mutex_trylock(&xprt->xpt_mutex))
2197 return -EAGAIN;
2198 rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
2199 }
2200 if (test_bit(XPT_DEAD, &xprt->xpt_flags))
2201 len = -ENOTCONN;
2202 else
2203 len = bc_sendto(req);
2204 mutex_unlock(&xprt->xpt_mutex);
2205
2206 if (len > 0)
2207 len = 0;
2208
2209 return len;
2210}
2211
2212/*
2213 * The close routine. Since this is client initiated, we do nothing
2214 */
2215
2216static void bc_close(struct rpc_xprt *xprt)
2217{
2218 return;
2219}
2220
2221/*
2222 * The xprt destroy routine. Again, because this connection is client
2223 * initiated, we do nothing
2224 */
2225
2226static void bc_destroy(struct rpc_xprt *xprt)
2227{
2228 return;
2229}
2230
2101static struct rpc_xprt_ops xs_udp_ops = { 2231static struct rpc_xprt_ops xs_udp_ops = {
2102 .set_buffer_size = xs_udp_set_buffer_size, 2232 .set_buffer_size = xs_udp_set_buffer_size,
2103 .reserve_xprt = xprt_reserve_xprt_cong, 2233 .reserve_xprt = xprt_reserve_xprt_cong,
@@ -2134,6 +2264,22 @@ static struct rpc_xprt_ops xs_tcp_ops = {
2134 .print_stats = xs_tcp_print_stats, 2264 .print_stats = xs_tcp_print_stats,
2135}; 2265};
2136 2266
2267/*
2268 * The rpc_xprt_ops for the server backchannel
2269 */
2270
2271static struct rpc_xprt_ops bc_tcp_ops = {
2272 .reserve_xprt = xprt_reserve_xprt,
2273 .release_xprt = xprt_release_xprt,
2274 .buf_alloc = bc_malloc,
2275 .buf_free = bc_free,
2276 .send_request = bc_send_request,
2277 .set_retrans_timeout = xprt_set_retrans_timeout_def,
2278 .close = bc_close,
2279 .destroy = bc_destroy,
2280 .print_stats = xs_tcp_print_stats,
2281};
2282
2137static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2283static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2138 unsigned int slot_table_size) 2284 unsigned int slot_table_size)
2139{ 2285{
@@ -2322,11 +2468,93 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2322 return ERR_PTR(-EINVAL); 2468 return ERR_PTR(-EINVAL);
2323} 2469}
2324 2470
2471/**
2472 * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
2473 * @args: rpc transport creation arguments
2474 *
2475 */
2476static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2477{
2478 struct sockaddr *addr = args->dstaddr;
2479 struct rpc_xprt *xprt;
2480 struct sock_xprt *transport;
2481 struct svc_sock *bc_sock;
2482
2483 if (!args->bc_xprt)
2484 ERR_PTR(-EINVAL);
2485
2486 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2487 if (IS_ERR(xprt))
2488 return xprt;
2489 transport = container_of(xprt, struct sock_xprt, xprt);
2490
2491 xprt->prot = IPPROTO_TCP;
2492 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
2493 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
2494 xprt->timeout = &xs_tcp_default_timeout;
2495
2496 /* backchannel */
2497 xprt_set_bound(xprt);
2498 xprt->bind_timeout = 0;
2499 xprt->connect_timeout = 0;
2500 xprt->reestablish_timeout = 0;
2501 xprt->idle_timeout = 0;
2502
2503 /*
2504 * The backchannel uses the same socket connection as the
2505 * forechannel
2506 */
2507 xprt->bc_xprt = args->bc_xprt;
2508 bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
2509 bc_sock->sk_bc_xprt = xprt;
2510 transport->sock = bc_sock->sk_sock;
2511 transport->inet = bc_sock->sk_sk;
2512
2513 xprt->ops = &bc_tcp_ops;
2514
2515 switch (addr->sa_family) {
2516 case AF_INET:
2517 xs_format_peer_addresses(xprt, "tcp",
2518 RPCBIND_NETID_TCP);
2519 break;
2520 case AF_INET6:
2521 xs_format_peer_addresses(xprt, "tcp",
2522 RPCBIND_NETID_TCP6);
2523 break;
2524 default:
2525 kfree(xprt);
2526 return ERR_PTR(-EAFNOSUPPORT);
2527 }
2528
2529 if (xprt_bound(xprt))
2530 dprintk("RPC: set up xprt to %s (port %s) via %s\n",
2531 xprt->address_strings[RPC_DISPLAY_ADDR],
2532 xprt->address_strings[RPC_DISPLAY_PORT],
2533 xprt->address_strings[RPC_DISPLAY_PROTO]);
2534 else
2535 dprintk("RPC: set up xprt to %s (autobind) via %s\n",
2536 xprt->address_strings[RPC_DISPLAY_ADDR],
2537 xprt->address_strings[RPC_DISPLAY_PROTO]);
2538
2539 /*
2540 * Since we don't want connections for the backchannel, we set
2541 * the xprt status to connected
2542 */
2543 xprt_set_connected(xprt);
2544
2545
2546 if (try_module_get(THIS_MODULE))
2547 return xprt;
2548 kfree(xprt->slot);
2549 kfree(xprt);
2550 return ERR_PTR(-EINVAL);
2551}
2552
2325static struct xprt_class xs_udp_transport = { 2553static struct xprt_class xs_udp_transport = {
2326 .list = LIST_HEAD_INIT(xs_udp_transport.list), 2554 .list = LIST_HEAD_INIT(xs_udp_transport.list),
2327 .name = "udp", 2555 .name = "udp",
2328 .owner = THIS_MODULE, 2556 .owner = THIS_MODULE,
2329 .ident = IPPROTO_UDP, 2557 .ident = XPRT_TRANSPORT_UDP,
2330 .setup = xs_setup_udp, 2558 .setup = xs_setup_udp,
2331}; 2559};
2332 2560
@@ -2334,10 +2562,18 @@ static struct xprt_class xs_tcp_transport = {
2334 .list = LIST_HEAD_INIT(xs_tcp_transport.list), 2562 .list = LIST_HEAD_INIT(xs_tcp_transport.list),
2335 .name = "tcp", 2563 .name = "tcp",
2336 .owner = THIS_MODULE, 2564 .owner = THIS_MODULE,
2337 .ident = IPPROTO_TCP, 2565 .ident = XPRT_TRANSPORT_TCP,
2338 .setup = xs_setup_tcp, 2566 .setup = xs_setup_tcp,
2339}; 2567};
2340 2568
2569static struct xprt_class xs_bc_tcp_transport = {
2570 .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list),
2571 .name = "tcp NFSv4.1 backchannel",
2572 .owner = THIS_MODULE,
2573 .ident = XPRT_TRANSPORT_BC_TCP,
2574 .setup = xs_setup_bc_tcp,
2575};
2576
2341/** 2577/**
2342 * init_socket_xprt - set up xprtsock's sysctls, register with RPC client 2578 * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
2343 * 2579 *
@@ -2351,6 +2587,7 @@ int init_socket_xprt(void)
2351 2587
2352 xprt_register_transport(&xs_udp_transport); 2588 xprt_register_transport(&xs_udp_transport);
2353 xprt_register_transport(&xs_tcp_transport); 2589 xprt_register_transport(&xs_tcp_transport);
2590 xprt_register_transport(&xs_bc_tcp_transport);
2354 2591
2355 return 0; 2592 return 0;
2356} 2593}
@@ -2370,6 +2607,7 @@ void cleanup_socket_xprt(void)
2370 2607
2371 xprt_unregister_transport(&xs_udp_transport); 2608 xprt_unregister_transport(&xs_udp_transport);
2372 xprt_unregister_transport(&xs_tcp_transport); 2609 xprt_unregister_transport(&xs_tcp_transport);
2610 xprt_unregister_transport(&xs_bc_tcp_transport);
2373} 2611}
2374 2612
2375static int param_set_uint_minmax(const char *val, struct kernel_param *kp, 2613static int param_set_uint_minmax(const char *val, struct kernel_param *kp,