aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfsd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/Kconfig4
-rw-r--r--fs/nfsd/cache.h1
-rw-r--r--fs/nfsd/export.c1
-rw-r--r--fs/nfsd/nfs3proc.c13
-rw-r--r--fs/nfsd/nfs4callback.c145
-rw-r--r--fs/nfsd/nfs4idmap.c20
-rw-r--r--fs/nfsd/nfs4proc.c49
-rw-r--r--fs/nfsd/nfs4recover.c205
-rw-r--r--fs/nfsd/nfs4state.c118
-rw-r--r--fs/nfsd/nfs4xdr.c75
-rw-r--r--fs/nfsd/nfscache.c214
-rw-r--r--fs/nfsd/nfsctl.c45
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfsfh.c6
-rw-r--r--fs/nfsd/state.h28
-rw-r--r--fs/nfsd/vfs.c37
-rw-r--r--fs/nfsd/xdr4.h14
17 files changed, 664 insertions, 313 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index f994e750e0d1..73395156bdb4 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -71,6 +71,7 @@ config NFSD_V4
71 select FS_POSIX_ACL 71 select FS_POSIX_ACL
72 select SUNRPC_GSS 72 select SUNRPC_GSS
73 select CRYPTO 73 select CRYPTO
74 select GRACE_PERIOD
74 help 75 help
75 This option enables support in your system's NFS server for 76 This option enables support in your system's NFS server for
76 version 4 of the NFS protocol (RFC 3530). 77 version 4 of the NFS protocol (RFC 3530).
@@ -94,9 +95,6 @@ config NFSD_V4_SECURITY_LABEL
94 If you do not wish to enable fine-grained security labels SELinux or 95 If you do not wish to enable fine-grained security labels SELinux or
95 Smack policies on NFSv4 files, say N. 96 Smack policies on NFSv4 files, say N.
96 97
97 WARNING: there is still a chance of backwards-incompatible protocol changes.
98 For now we recommend "Y" only for developers and testers.
99
100config NFSD_FAULT_INJECTION 98config NFSD_FAULT_INJECTION
101 bool "NFS server manual fault injection" 99 bool "NFS server manual fault injection"
102 depends on NFSD_V4 && DEBUG_KERNEL 100 depends on NFSD_V4 && DEBUG_KERNEL
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index b582f9ab6b2a..dd96a3830004 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -18,7 +18,6 @@
18 * is much larger than a sockaddr_in6. 18 * is much larger than a sockaddr_in6.
19 */ 19 */
20struct svc_cacherep { 20struct svc_cacherep {
21 struct hlist_node c_hash;
22 struct list_head c_lru; 21 struct list_head c_lru;
23 22
24 unsigned char c_state, /* unused, inprog, done */ 23 unsigned char c_state, /* unused, inprog, done */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 72ffd7cce3c3..30a739d896ff 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1145,6 +1145,7 @@ static struct flags {
1145 { NFSEXP_ALLSQUASH, {"all_squash", ""}}, 1145 { NFSEXP_ALLSQUASH, {"all_squash", ""}},
1146 { NFSEXP_ASYNC, {"async", "sync"}}, 1146 { NFSEXP_ASYNC, {"async", "sync"}},
1147 { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}}, 1147 { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
1148 { NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}},
1148 { NFSEXP_NOHIDE, {"nohide", ""}}, 1149 { NFSEXP_NOHIDE, {"nohide", ""}},
1149 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, 1150 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
1150 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, 1151 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index fa2525b2e9d7..12f2aab4f614 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -223,11 +223,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
223 newfhp = fh_init(&resp->fh, NFS3_FHSIZE); 223 newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
224 attr = &argp->attrs; 224 attr = &argp->attrs;
225 225
226 /* Get the directory inode */
227 nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
228 if (nfserr)
229 RETURN_STATUS(nfserr);
230
231 /* Unfudge the mode bits */ 226 /* Unfudge the mode bits */
232 attr->ia_mode &= ~S_IFMT; 227 attr->ia_mode &= ~S_IFMT;
233 if (!(attr->ia_valid & ATTR_MODE)) { 228 if (!(attr->ia_valid & ATTR_MODE)) {
@@ -471,6 +466,14 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
471 resp->buflen = resp->count; 466 resp->buflen = resp->count;
472 resp->rqstp = rqstp; 467 resp->rqstp = rqstp;
473 offset = argp->cookie; 468 offset = argp->cookie;
469
470 nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP);
471 if (nfserr)
472 RETURN_STATUS(nfserr);
473
474 if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS)
475 RETURN_STATUS(nfserr_notsupp);
476
474 nfserr = nfsd_readdir(rqstp, &resp->fh, 477 nfserr = nfsd_readdir(rqstp, &resp->fh,
475 &offset, 478 &offset,
476 &resp->common, 479 &resp->common,
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index e0be57b0f79b..4fe4be1ee82e 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -49,11 +49,8 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
49 49
50/* Index of predefined Linux callback client operations */ 50/* Index of predefined Linux callback client operations */
51 51
52enum { 52#define to_delegation(cb) \
53 NFSPROC4_CLNT_CB_NULL = 0, 53 container_of(cb, struct nfs4_delegation, dl_recall)
54 NFSPROC4_CLNT_CB_RECALL,
55 NFSPROC4_CLNT_CB_SEQUENCE,
56};
57 54
58struct nfs4_cb_compound_hdr { 55struct nfs4_cb_compound_hdr {
59 /* args */ 56 /* args */
@@ -494,7 +491,7 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
494static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, 491static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
495 const struct nfsd4_callback *cb) 492 const struct nfsd4_callback *cb)
496{ 493{
497 const struct nfs4_delegation *args = cb->cb_op; 494 const struct nfs4_delegation *dp = to_delegation(cb);
498 struct nfs4_cb_compound_hdr hdr = { 495 struct nfs4_cb_compound_hdr hdr = {
499 .ident = cb->cb_clp->cl_cb_ident, 496 .ident = cb->cb_clp->cl_cb_ident,
500 .minorversion = cb->cb_minorversion, 497 .minorversion = cb->cb_minorversion,
@@ -502,7 +499,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
502 499
503 encode_cb_compound4args(xdr, &hdr); 500 encode_cb_compound4args(xdr, &hdr);
504 encode_cb_sequence4args(xdr, cb, &hdr); 501 encode_cb_sequence4args(xdr, cb, &hdr);
505 encode_cb_recall4args(xdr, args, &hdr); 502 encode_cb_recall4args(xdr, dp, &hdr);
506 encode_cb_nops(&hdr); 503 encode_cb_nops(&hdr);
507} 504}
508 505
@@ -746,27 +743,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
746 743
747static struct workqueue_struct *callback_wq; 744static struct workqueue_struct *callback_wq;
748 745
749static void run_nfsd4_cb(struct nfsd4_callback *cb)
750{
751 queue_work(callback_wq, &cb->cb_work);
752}
753
754static void do_probe_callback(struct nfs4_client *clp)
755{
756 struct nfsd4_callback *cb = &clp->cl_cb_null;
757
758 cb->cb_op = NULL;
759 cb->cb_clp = clp;
760
761 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
762 cb->cb_msg.rpc_argp = NULL;
763 cb->cb_msg.rpc_resp = NULL;
764
765 cb->cb_ops = &nfsd4_cb_probe_ops;
766
767 run_nfsd4_cb(cb);
768}
769
770/* 746/*
771 * Poke the callback thread to process any updates to the callback 747 * Poke the callback thread to process any updates to the callback
772 * parameters, and send a null probe. 748 * parameters, and send a null probe.
@@ -775,7 +751,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp)
775{ 751{
776 clp->cl_cb_state = NFSD4_CB_UNKNOWN; 752 clp->cl_cb_state = NFSD4_CB_UNKNOWN;
777 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); 753 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
778 do_probe_callback(clp); 754 nfsd4_run_cb(&clp->cl_cb_null);
779} 755}
780 756
781void nfsd4_probe_callback_sync(struct nfs4_client *clp) 757void nfsd4_probe_callback_sync(struct nfs4_client *clp)
@@ -847,23 +823,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
847 rpc_wake_up_next(&clp->cl_cb_waitq); 823 rpc_wake_up_next(&clp->cl_cb_waitq);
848 dprintk("%s: freed slot, new seqid=%d\n", __func__, 824 dprintk("%s: freed slot, new seqid=%d\n", __func__,
849 clp->cl_cb_session->se_cb_seq_nr); 825 clp->cl_cb_session->se_cb_seq_nr);
850
851 /* We're done looking into the sequence information */
852 task->tk_msg.rpc_resp = NULL;
853 } 826 }
854}
855
856
857static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
858{
859 struct nfsd4_callback *cb = calldata;
860 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
861 struct nfs4_client *clp = cb->cb_clp;
862 struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
863
864 nfsd4_cb_done(task, calldata);
865 827
866 if (current_rpc_client != task->tk_client) { 828 if (clp->cl_cb_client != task->tk_client) {
867 /* We're shutting down or changing cl_cb_client; leave 829 /* We're shutting down or changing cl_cb_client; leave
868 * it to nfsd4_process_cb_update to restart the call if 830 * it to nfsd4_process_cb_update to restart the call if
869 * necessary. */ 831 * necessary. */
@@ -872,47 +834,42 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
872 834
873 if (cb->cb_done) 835 if (cb->cb_done)
874 return; 836 return;
875 switch (task->tk_status) { 837
838 switch (cb->cb_ops->done(cb, task)) {
876 case 0: 839 case 0:
877 cb->cb_done = true; 840 task->tk_status = 0;
841 rpc_restart_call_prepare(task);
878 return; 842 return;
879 case -EBADHANDLE: 843 case 1:
880 case -NFS4ERR_BAD_STATEID:
881 /* Race: client probably got cb_recall
882 * before open reply granting delegation */
883 break; 844 break;
884 default: 845 case -1:
885 /* Network partition? */ 846 /* Network partition? */
886 nfsd4_mark_cb_down(clp, task->tk_status); 847 nfsd4_mark_cb_down(clp, task->tk_status);
848 break;
849 default:
850 BUG();
887 } 851 }
888 if (dp->dl_retries--) {
889 rpc_delay(task, 2*HZ);
890 task->tk_status = 0;
891 rpc_restart_call_prepare(task);
892 return;
893 }
894 nfsd4_mark_cb_down(clp, task->tk_status);
895 cb->cb_done = true; 852 cb->cb_done = true;
896} 853}
897 854
898static void nfsd4_cb_recall_release(void *calldata) 855static void nfsd4_cb_release(void *calldata)
899{ 856{
900 struct nfsd4_callback *cb = calldata; 857 struct nfsd4_callback *cb = calldata;
901 struct nfs4_client *clp = cb->cb_clp; 858 struct nfs4_client *clp = cb->cb_clp;
902 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
903 859
904 if (cb->cb_done) { 860 if (cb->cb_done) {
905 spin_lock(&clp->cl_lock); 861 spin_lock(&clp->cl_lock);
906 list_del(&cb->cb_per_client); 862 list_del(&cb->cb_per_client);
907 spin_unlock(&clp->cl_lock); 863 spin_unlock(&clp->cl_lock);
908 nfs4_put_stid(&dp->dl_stid); 864
865 cb->cb_ops->release(cb);
909 } 866 }
910} 867}
911 868
912static const struct rpc_call_ops nfsd4_cb_recall_ops = { 869static const struct rpc_call_ops nfsd4_cb_ops = {
913 .rpc_call_prepare = nfsd4_cb_prepare, 870 .rpc_call_prepare = nfsd4_cb_prepare,
914 .rpc_call_done = nfsd4_cb_recall_done, 871 .rpc_call_done = nfsd4_cb_done,
915 .rpc_release = nfsd4_cb_recall_release, 872 .rpc_release = nfsd4_cb_release,
916}; 873};
917 874
918int nfsd4_create_callback_queue(void) 875int nfsd4_create_callback_queue(void)
@@ -937,16 +894,10 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
937 * instead, nfsd4_run_cb_null() will detect the killed 894 * instead, nfsd4_run_cb_null() will detect the killed
938 * client, destroy the rpc client, and stop: 895 * client, destroy the rpc client, and stop:
939 */ 896 */
940 do_probe_callback(clp); 897 nfsd4_run_cb(&clp->cl_cb_null);
941 flush_workqueue(callback_wq); 898 flush_workqueue(callback_wq);
942} 899}
943 900
944static void nfsd4_release_cb(struct nfsd4_callback *cb)
945{
946 if (cb->cb_ops->rpc_release)
947 cb->cb_ops->rpc_release(cb);
948}
949
950/* requires cl_lock: */ 901/* requires cl_lock: */
951static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) 902static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
952{ 903{
@@ -1009,63 +960,49 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
1009 } 960 }
1010 /* Yay, the callback channel's back! Restart any callbacks: */ 961 /* Yay, the callback channel's back! Restart any callbacks: */
1011 list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) 962 list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
1012 run_nfsd4_cb(cb); 963 queue_work(callback_wq, &cb->cb_work);
1013} 964}
1014 965
1015static void 966static void
1016nfsd4_run_callback_rpc(struct nfsd4_callback *cb) 967nfsd4_run_cb_work(struct work_struct *work)
1017{ 968{
969 struct nfsd4_callback *cb =
970 container_of(work, struct nfsd4_callback, cb_work);
1018 struct nfs4_client *clp = cb->cb_clp; 971 struct nfs4_client *clp = cb->cb_clp;
1019 struct rpc_clnt *clnt; 972 struct rpc_clnt *clnt;
1020 973
974 if (cb->cb_ops && cb->cb_ops->prepare)
975 cb->cb_ops->prepare(cb);
976
1021 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) 977 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
1022 nfsd4_process_cb_update(cb); 978 nfsd4_process_cb_update(cb);
1023 979
1024 clnt = clp->cl_cb_client; 980 clnt = clp->cl_cb_client;
1025 if (!clnt) { 981 if (!clnt) {
1026 /* Callback channel broken, or client killed; give up: */ 982 /* Callback channel broken, or client killed; give up: */
1027 nfsd4_release_cb(cb); 983 if (cb->cb_ops && cb->cb_ops->release)
984 cb->cb_ops->release(cb);
1028 return; 985 return;
1029 } 986 }
1030 cb->cb_msg.rpc_cred = clp->cl_cb_cred; 987 cb->cb_msg.rpc_cred = clp->cl_cb_cred;
1031 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 988 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
1032 cb->cb_ops, cb); 989 cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
1033}
1034
1035void
1036nfsd4_run_cb_null(struct work_struct *w)
1037{
1038 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
1039 cb_work);
1040 nfsd4_run_callback_rpc(cb);
1041}
1042
1043void
1044nfsd4_run_cb_recall(struct work_struct *w)
1045{
1046 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
1047 cb_work);
1048
1049 nfsd4_prepare_cb_recall(cb->cb_op);
1050 nfsd4_run_callback_rpc(cb);
1051} 990}
1052 991
1053void nfsd4_cb_recall(struct nfs4_delegation *dp) 992void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
993 struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op)
1054{ 994{
1055 struct nfsd4_callback *cb = &dp->dl_recall;
1056 struct nfs4_client *clp = dp->dl_stid.sc_client;
1057
1058 dp->dl_retries = 1;
1059 cb->cb_op = dp;
1060 cb->cb_clp = clp; 995 cb->cb_clp = clp;
1061 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; 996 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
1062 cb->cb_msg.rpc_argp = cb; 997 cb->cb_msg.rpc_argp = cb;
1063 cb->cb_msg.rpc_resp = cb; 998 cb->cb_msg.rpc_resp = cb;
1064 999 cb->cb_ops = ops;
1065 cb->cb_ops = &nfsd4_cb_recall_ops; 1000 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
1066
1067 INIT_LIST_HEAD(&cb->cb_per_client); 1001 INIT_LIST_HEAD(&cb->cb_per_client);
1068 cb->cb_done = true; 1002 cb->cb_done = true;
1003}
1069 1004
1070 run_nfsd4_cb(&dp->dl_recall); 1005void nfsd4_run_cb(struct nfsd4_callback *cb)
1006{
1007 queue_work(callback_wq, &cb->cb_work);
1071} 1008}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index a0ab0a847d69..e1b3d3d472da 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -215,7 +215,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
215 memset(&ent, 0, sizeof(ent)); 215 memset(&ent, 0, sizeof(ent));
216 216
217 /* Authentication name */ 217 /* Authentication name */
218 if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) 218 len = qword_get(&buf, buf1, PAGE_SIZE);
219 if (len <= 0 || len >= IDMAP_NAMESZ)
219 goto out; 220 goto out;
220 memcpy(ent.authname, buf1, sizeof(ent.authname)); 221 memcpy(ent.authname, buf1, sizeof(ent.authname));
221 222
@@ -245,12 +246,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
245 /* Name */ 246 /* Name */
246 error = -EINVAL; 247 error = -EINVAL;
247 len = qword_get(&buf, buf1, PAGE_SIZE); 248 len = qword_get(&buf, buf1, PAGE_SIZE);
248 if (len < 0) 249 if (len < 0 || len >= IDMAP_NAMESZ)
249 goto out; 250 goto out;
250 if (len == 0) 251 if (len == 0)
251 set_bit(CACHE_NEGATIVE, &ent.h.flags); 252 set_bit(CACHE_NEGATIVE, &ent.h.flags);
252 else if (len >= IDMAP_NAMESZ)
253 goto out;
254 else 253 else
255 memcpy(ent.name, buf1, sizeof(ent.name)); 254 memcpy(ent.name, buf1, sizeof(ent.name));
256 error = -ENOMEM; 255 error = -ENOMEM;
@@ -259,15 +258,12 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
259 goto out; 258 goto out;
260 259
261 cache_put(&res->h, cd); 260 cache_put(&res->h, cd);
262
263 error = 0; 261 error = 0;
264out: 262out:
265 kfree(buf1); 263 kfree(buf1);
266
267 return error; 264 return error;
268} 265}
269 266
270
271static struct ent * 267static struct ent *
272idtoname_lookup(struct cache_detail *cd, struct ent *item) 268idtoname_lookup(struct cache_detail *cd, struct ent *item)
273{ 269{
@@ -368,7 +364,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
368{ 364{
369 struct ent ent, *res; 365 struct ent ent, *res;
370 char *buf1; 366 char *buf1;
371 int error = -EINVAL; 367 int len, error = -EINVAL;
372 368
373 if (buf[buflen - 1] != '\n') 369 if (buf[buflen - 1] != '\n')
374 return (-EINVAL); 370 return (-EINVAL);
@@ -381,7 +377,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
381 memset(&ent, 0, sizeof(ent)); 377 memset(&ent, 0, sizeof(ent));
382 378
383 /* Authentication name */ 379 /* Authentication name */
384 if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) 380 len = qword_get(&buf, buf1, PAGE_SIZE);
381 if (len <= 0 || len >= IDMAP_NAMESZ)
385 goto out; 382 goto out;
386 memcpy(ent.authname, buf1, sizeof(ent.authname)); 383 memcpy(ent.authname, buf1, sizeof(ent.authname));
387 384
@@ -392,8 +389,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
392 IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; 389 IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
393 390
394 /* Name */ 391 /* Name */
395 error = qword_get(&buf, buf1, PAGE_SIZE); 392 len = qword_get(&buf, buf1, PAGE_SIZE);
396 if (error <= 0 || error >= IDMAP_NAMESZ) 393 if (len <= 0 || len >= IDMAP_NAMESZ)
397 goto out; 394 goto out;
398 memcpy(ent.name, buf1, sizeof(ent.name)); 395 memcpy(ent.name, buf1, sizeof(ent.name));
399 396
@@ -421,7 +418,6 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
421 error = 0; 418 error = 0;
422out: 419out:
423 kfree(buf1); 420 kfree(buf1);
424
425 return (error); 421 return (error);
426} 422}
427 423
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5e0dc528a0e8..cdeb3cfd6f32 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1013,6 +1013,49 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1013 return status; 1013 return status;
1014} 1014}
1015 1015
1016static __be32
1017nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1018 struct nfsd4_seek *seek)
1019{
1020 int whence;
1021 __be32 status;
1022 struct file *file;
1023
1024 status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
1025 &seek->seek_stateid,
1026 RD_STATE, &file);
1027 if (status) {
1028 dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
1029 return status;
1030 }
1031
1032 switch (seek->seek_whence) {
1033 case NFS4_CONTENT_DATA:
1034 whence = SEEK_DATA;
1035 break;
1036 case NFS4_CONTENT_HOLE:
1037 whence = SEEK_HOLE;
1038 break;
1039 default:
1040 status = nfserr_union_notsupp;
1041 goto out;
1042 }
1043
1044 /*
1045 * Note: This call does change file->f_pos, but nothing in NFSD
1046 * should ever file->f_pos.
1047 */
1048 seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
1049 if (seek->seek_pos < 0)
1050 status = nfserrno(seek->seek_pos);
1051 else if (seek->seek_pos >= i_size_read(file_inode(file)))
1052 seek->seek_eof = true;
1053
1054out:
1055 fput(file);
1056 return status;
1057}
1058
1016/* This routine never returns NFS_OK! If there are no other errors, it 1059/* This routine never returns NFS_OK! If there are no other errors, it
1017 * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the 1060 * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
1018 * attributes matched. VERIFY is implemented by mapping NFSERR_SAME 1061 * attributes matched. VERIFY is implemented by mapping NFSERR_SAME
@@ -1881,6 +1924,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
1881 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, 1924 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
1882 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 1925 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1883 }, 1926 },
1927
1928 /* NFSv4.2 operations */
1929 [OP_SEEK] = {
1930 .op_func = (nfsd4op_func)nfsd4_seek,
1931 .op_name = "OP_SEEK",
1932 },
1884}; 1933};
1885 1934
1886int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) 1935int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 9c271f42604a..ea95a2bc21b5 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -58,7 +58,7 @@ struct nfsd4_client_tracking_ops {
58 void (*create)(struct nfs4_client *); 58 void (*create)(struct nfs4_client *);
59 void (*remove)(struct nfs4_client *); 59 void (*remove)(struct nfs4_client *);
60 int (*check)(struct nfs4_client *); 60 int (*check)(struct nfs4_client *);
61 void (*grace_done)(struct nfsd_net *, time_t); 61 void (*grace_done)(struct nfsd_net *);
62}; 62};
63 63
64/* Globals */ 64/* Globals */
@@ -188,7 +188,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
188 188
189 status = mnt_want_write_file(nn->rec_file); 189 status = mnt_want_write_file(nn->rec_file);
190 if (status) 190 if (status)
191 return; 191 goto out_creds;
192 192
193 dir = nn->rec_file->f_path.dentry; 193 dir = nn->rec_file->f_path.dentry;
194 /* lock the parent */ 194 /* lock the parent */
@@ -228,6 +228,7 @@ out_unlock:
228 user_recovery_dirname); 228 user_recovery_dirname);
229 } 229 }
230 mnt_drop_write_file(nn->rec_file); 230 mnt_drop_write_file(nn->rec_file);
231out_creds:
231 nfs4_reset_creds(original_cred); 232 nfs4_reset_creds(original_cred);
232} 233}
233 234
@@ -392,7 +393,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
392} 393}
393 394
394static void 395static void
395nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) 396nfsd4_recdir_purge_old(struct nfsd_net *nn)
396{ 397{
397 int status; 398 int status;
398 399
@@ -479,6 +480,16 @@ nfsd4_init_recdir(struct net *net)
479 return status; 480 return status;
480} 481}
481 482
483static void
484nfsd4_shutdown_recdir(struct net *net)
485{
486 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
487
488 if (!nn->rec_file)
489 return;
490 fput(nn->rec_file);
491 nn->rec_file = NULL;
492}
482 493
483static int 494static int
484nfs4_legacy_state_init(struct net *net) 495nfs4_legacy_state_init(struct net *net)
@@ -512,10 +523,13 @@ nfsd4_load_reboot_recovery_data(struct net *net)
512 int status; 523 int status;
513 524
514 status = nfsd4_init_recdir(net); 525 status = nfsd4_init_recdir(net);
515 if (!status)
516 status = nfsd4_recdir_load(net);
517 if (status) 526 if (status)
518 printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); 527 return status;
528
529 status = nfsd4_recdir_load(net);
530 if (status)
531 nfsd4_shutdown_recdir(net);
532
519 return status; 533 return status;
520} 534}
521 535
@@ -546,21 +560,12 @@ err:
546} 560}
547 561
548static void 562static void
549nfsd4_shutdown_recdir(struct nfsd_net *nn)
550{
551 if (!nn->rec_file)
552 return;
553 fput(nn->rec_file);
554 nn->rec_file = NULL;
555}
556
557static void
558nfsd4_legacy_tracking_exit(struct net *net) 563nfsd4_legacy_tracking_exit(struct net *net)
559{ 564{
560 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 565 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
561 566
562 nfs4_release_reclaim(nn); 567 nfs4_release_reclaim(nn);
563 nfsd4_shutdown_recdir(nn); 568 nfsd4_shutdown_recdir(net);
564 nfs4_legacy_state_shutdown(net); 569 nfs4_legacy_state_shutdown(net);
565} 570}
566 571
@@ -1016,7 +1021,7 @@ nfsd4_cld_check(struct nfs4_client *clp)
1016} 1021}
1017 1022
1018static void 1023static void
1019nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) 1024nfsd4_cld_grace_done(struct nfsd_net *nn)
1020{ 1025{
1021 int ret; 1026 int ret;
1022 struct cld_upcall *cup; 1027 struct cld_upcall *cup;
@@ -1029,7 +1034,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time)
1029 } 1034 }
1030 1035
1031 cup->cu_msg.cm_cmd = Cld_GraceDone; 1036 cup->cu_msg.cm_cmd = Cld_GraceDone;
1032 cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time; 1037 cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
1033 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); 1038 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1034 if (!ret) 1039 if (!ret)
1035 ret = cup->cu_msg.cm_status; 1040 ret = cup->cu_msg.cm_status;
@@ -1062,6 +1067,8 @@ MODULE_PARM_DESC(cltrack_legacy_disable,
1062 1067
1063#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" 1068#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR="
1064#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" 1069#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR="
1070#define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION="
1071#define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START="
1065 1072
1066static char * 1073static char *
1067nfsd4_cltrack_legacy_topdir(void) 1074nfsd4_cltrack_legacy_topdir(void)
@@ -1126,10 +1133,60 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
1126 return result; 1133 return result;
1127} 1134}
1128 1135
1136static char *
1137nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
1138{
1139 int copied;
1140 size_t len;
1141 char *result;
1142
1143 /* prefix + Y/N character + terminating NULL */
1144 len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1;
1145
1146 result = kmalloc(len, GFP_KERNEL);
1147 if (!result)
1148 return result;
1149
1150 copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c",
1151 clp->cl_minorversion ? 'Y' : 'N');
1152 if (copied >= len) {
1153 /* just return nothing if output was truncated */
1154 kfree(result);
1155 return NULL;
1156 }
1157
1158 return result;
1159}
1160
1161static char *
1162nfsd4_cltrack_grace_start(time_t grace_start)
1163{
1164 int copied;
1165 size_t len;
1166 char *result;
1167
1168 /* prefix + max width of int64_t string + terminating NULL */
1169 len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1;
1170
1171 result = kmalloc(len, GFP_KERNEL);
1172 if (!result)
1173 return result;
1174
1175 copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld",
1176 grace_start);
1177 if (copied >= len) {
1178 /* just return nothing if output was truncated */
1179 kfree(result);
1180 return NULL;
1181 }
1182
1183 return result;
1184}
1185
1129static int 1186static int
1130nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) 1187nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1)
1131{ 1188{
1132 char *envp[2]; 1189 char *envp[3];
1133 char *argv[4]; 1190 char *argv[4];
1134 int ret; 1191 int ret;
1135 1192
@@ -1140,10 +1197,12 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy)
1140 1197
1141 dprintk("%s: cmd: %s\n", __func__, cmd); 1198 dprintk("%s: cmd: %s\n", __func__, cmd);
1142 dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); 1199 dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)");
1143 dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); 1200 dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)");
1201 dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)");
1144 1202
1145 envp[0] = legacy; 1203 envp[0] = env0;
1146 envp[1] = NULL; 1204 envp[1] = env1;
1205 envp[2] = NULL;
1147 1206
1148 argv[0] = (char *)cltrack_prog; 1207 argv[0] = (char *)cltrack_prog;
1149 argv[1] = cmd; 1208 argv[1] = cmd;
@@ -1187,28 +1246,78 @@ bin_to_hex_dup(const unsigned char *src, int srclen)
1187} 1246}
1188 1247
1189static int 1248static int
1190nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) 1249nfsd4_umh_cltrack_init(struct net *net)
1191{ 1250{
1251 int ret;
1252 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1253 char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
1254
1192 /* XXX: The usermode helper s not working in container yet. */ 1255 /* XXX: The usermode helper s not working in container yet. */
1193 if (net != &init_net) { 1256 if (net != &init_net) {
1194 WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " 1257 WARN(1, KERN_ERR "NFSD: attempt to initialize umh client "
1195 "tracking in a container!\n"); 1258 "tracking in a container!\n");
1196 return -EINVAL; 1259 return -EINVAL;
1197 } 1260 }
1198 return nfsd4_umh_cltrack_upcall("init", NULL, NULL); 1261
1262 ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
1263 kfree(grace_start);
1264 return ret;
1265}
1266
1267static void
1268nfsd4_cltrack_upcall_lock(struct nfs4_client *clp)
1269{
1270 wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK,
1271 TASK_UNINTERRUPTIBLE);
1272}
1273
1274static void
1275nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp)
1276{
1277 smp_mb__before_atomic();
1278 clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
1279 smp_mb__after_atomic();
1280 wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK);
1199} 1281}
1200 1282
1201static void 1283static void
1202nfsd4_umh_cltrack_create(struct nfs4_client *clp) 1284nfsd4_umh_cltrack_create(struct nfs4_client *clp)
1203{ 1285{
1204 char *hexid; 1286 char *hexid, *has_session, *grace_start;
1287 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1288
1289 /*
1290 * With v4.0 clients, there's little difference in outcome between a
1291 * create and check operation, and we can end up calling into this
1292 * function multiple times per client (once for each openowner). So,
1293 * for v4.0 clients skip upcalling once the client has been recorded
1294 * on stable storage.
1295 *
1296 * For v4.1+ clients, the outcome of the two operations is different,
1297 * so we must ensure that we upcall for the create operation. v4.1+
1298 * clients call this on RECLAIM_COMPLETE though, so we should only end
1299 * up doing a single create upcall per client.
1300 */
1301 if (clp->cl_minorversion == 0 &&
1302 test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1303 return;
1205 1304
1206 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); 1305 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1207 if (!hexid) { 1306 if (!hexid) {
1208 dprintk("%s: can't allocate memory for upcall!\n", __func__); 1307 dprintk("%s: can't allocate memory for upcall!\n", __func__);
1209 return; 1308 return;
1210 } 1309 }
1211 nfsd4_umh_cltrack_upcall("create", hexid, NULL); 1310
1311 has_session = nfsd4_cltrack_client_has_session(clp);
1312 grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
1313
1314 nfsd4_cltrack_upcall_lock(clp);
1315 if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start))
1316 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1317 nfsd4_cltrack_upcall_unlock(clp);
1318
1319 kfree(has_session);
1320 kfree(grace_start);
1212 kfree(hexid); 1321 kfree(hexid);
1213} 1322}
1214 1323
@@ -1217,12 +1326,21 @@ nfsd4_umh_cltrack_remove(struct nfs4_client *clp)
1217{ 1326{
1218 char *hexid; 1327 char *hexid;
1219 1328
1329 if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1330 return;
1331
1220 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); 1332 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1221 if (!hexid) { 1333 if (!hexid) {
1222 dprintk("%s: can't allocate memory for upcall!\n", __func__); 1334 dprintk("%s: can't allocate memory for upcall!\n", __func__);
1223 return; 1335 return;
1224 } 1336 }
1225 nfsd4_umh_cltrack_upcall("remove", hexid, NULL); 1337
1338 nfsd4_cltrack_upcall_lock(clp);
1339 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) &&
1340 nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0)
1341 clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1342 nfsd4_cltrack_upcall_unlock(clp);
1343
1226 kfree(hexid); 1344 kfree(hexid);
1227} 1345}
1228 1346
@@ -1230,30 +1348,45 @@ static int
1230nfsd4_umh_cltrack_check(struct nfs4_client *clp) 1348nfsd4_umh_cltrack_check(struct nfs4_client *clp)
1231{ 1349{
1232 int ret; 1350 int ret;
1233 char *hexid, *legacy; 1351 char *hexid, *has_session, *legacy;
1352
1353 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1354 return 0;
1234 1355
1235 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); 1356 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1236 if (!hexid) { 1357 if (!hexid) {
1237 dprintk("%s: can't allocate memory for upcall!\n", __func__); 1358 dprintk("%s: can't allocate memory for upcall!\n", __func__);
1238 return -ENOMEM; 1359 return -ENOMEM;
1239 } 1360 }
1361
1362 has_session = nfsd4_cltrack_client_has_session(clp);
1240 legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); 1363 legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name);
1241 ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy); 1364
1365 nfsd4_cltrack_upcall_lock(clp);
1366 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) {
1367 ret = 0;
1368 } else {
1369 ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy);
1370 if (ret == 0)
1371 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1372 }
1373 nfsd4_cltrack_upcall_unlock(clp);
1374 kfree(has_session);
1242 kfree(legacy); 1375 kfree(legacy);
1243 kfree(hexid); 1376 kfree(hexid);
1377
1244 return ret; 1378 return ret;
1245} 1379}
1246 1380
1247static void 1381static void
1248nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, 1382nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
1249 time_t boot_time)
1250{ 1383{
1251 char *legacy; 1384 char *legacy;
1252 char timestr[22]; /* FIXME: better way to determine max size? */ 1385 char timestr[22]; /* FIXME: better way to determine max size? */
1253 1386
1254 sprintf(timestr, "%ld", boot_time); 1387 sprintf(timestr, "%ld", nn->boot_time);
1255 legacy = nfsd4_cltrack_legacy_topdir(); 1388 legacy = nfsd4_cltrack_legacy_topdir();
1256 nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); 1389 nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
1257 kfree(legacy); 1390 kfree(legacy);
1258} 1391}
1259 1392
@@ -1356,10 +1489,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
1356} 1489}
1357 1490
1358void 1491void
1359nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) 1492nfsd4_record_grace_done(struct nfsd_net *nn)
1360{ 1493{
1361 if (nn->client_tracking_ops) 1494 if (nn->client_tracking_ops)
1362 nn->client_tracking_ops->grace_done(nn, boot_time); 1495 nn->client_tracking_ops->grace_done(nn);
1363} 1496}
1364 1497
1365static int 1498static int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2e80a59e7e91..551f32d7f5c7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -96,6 +96,8 @@ static struct kmem_cache *deleg_slab;
96 96
97static void free_session(struct nfsd4_session *); 97static void free_session(struct nfsd4_session *);
98 98
99static struct nfsd4_callback_ops nfsd4_cb_recall_ops;
100
99static bool is_session_dead(struct nfsd4_session *ses) 101static bool is_session_dead(struct nfsd4_session *ses)
100{ 102{
101 return ses->se_flags & NFS4_SESSION_DEAD; 103 return ses->se_flags & NFS4_SESSION_DEAD;
@@ -645,7 +647,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
645 INIT_LIST_HEAD(&dp->dl_perclnt); 647 INIT_LIST_HEAD(&dp->dl_perclnt);
646 INIT_LIST_HEAD(&dp->dl_recall_lru); 648 INIT_LIST_HEAD(&dp->dl_recall_lru);
647 dp->dl_type = NFS4_OPEN_DELEGATE_READ; 649 dp->dl_type = NFS4_OPEN_DELEGATE_READ;
648 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); 650 dp->dl_retries = 1;
651 nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
652 &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
649 return dp; 653 return dp;
650out_dec: 654out_dec:
651 atomic_long_dec(&num_delegations); 655 atomic_long_dec(&num_delegations);
@@ -673,15 +677,20 @@ nfs4_put_stid(struct nfs4_stid *s)
673 677
674static void nfs4_put_deleg_lease(struct nfs4_file *fp) 678static void nfs4_put_deleg_lease(struct nfs4_file *fp)
675{ 679{
676 lockdep_assert_held(&state_lock); 680 struct file *filp = NULL;
681 struct file_lock *fl;
677 682
678 if (!fp->fi_lease) 683 spin_lock(&fp->fi_lock);
679 return; 684 if (fp->fi_lease && atomic_dec_and_test(&fp->fi_delegees)) {
680 if (atomic_dec_and_test(&fp->fi_delegees)) { 685 swap(filp, fp->fi_deleg_file);
681 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); 686 fl = fp->fi_lease;
682 fp->fi_lease = NULL; 687 fp->fi_lease = NULL;
683 fput(fp->fi_deleg_file); 688 }
684 fp->fi_deleg_file = NULL; 689 spin_unlock(&fp->fi_lock);
690
691 if (filp) {
692 vfs_setlease(filp, F_UNLCK, &fl);
693 fput(filp);
685 } 694 }
686} 695}
687 696
@@ -717,8 +726,6 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
717 list_del_init(&dp->dl_recall_lru); 726 list_del_init(&dp->dl_recall_lru);
718 list_del_init(&dp->dl_perfile); 727 list_del_init(&dp->dl_perfile);
719 spin_unlock(&fp->fi_lock); 728 spin_unlock(&fp->fi_lock);
720 if (fp)
721 nfs4_put_deleg_lease(fp);
722} 729}
723 730
724static void destroy_delegation(struct nfs4_delegation *dp) 731static void destroy_delegation(struct nfs4_delegation *dp)
@@ -726,6 +733,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
726 spin_lock(&state_lock); 733 spin_lock(&state_lock);
727 unhash_delegation_locked(dp); 734 unhash_delegation_locked(dp);
728 spin_unlock(&state_lock); 735 spin_unlock(&state_lock);
736 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
729 nfs4_put_stid(&dp->dl_stid); 737 nfs4_put_stid(&dp->dl_stid);
730} 738}
731 739
@@ -735,6 +743,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
735 743
736 WARN_ON(!list_empty(&dp->dl_recall_lru)); 744 WARN_ON(!list_empty(&dp->dl_recall_lru));
737 745
746 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
747
738 if (clp->cl_minorversion == 0) 748 if (clp->cl_minorversion == 0)
739 nfs4_put_stid(&dp->dl_stid); 749 nfs4_put_stid(&dp->dl_stid);
740 else { 750 else {
@@ -1635,6 +1645,7 @@ __destroy_client(struct nfs4_client *clp)
1635 while (!list_empty(&reaplist)) { 1645 while (!list_empty(&reaplist)) {
1636 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1646 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1637 list_del_init(&dp->dl_recall_lru); 1647 list_del_init(&dp->dl_recall_lru);
1648 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
1638 nfs4_put_stid(&dp->dl_stid); 1649 nfs4_put_stid(&dp->dl_stid);
1639 } 1650 }
1640 while (!list_empty(&clp->cl_revoked)) { 1651 while (!list_empty(&clp->cl_revoked)) {
@@ -1862,7 +1873,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
1862 free_client(clp); 1873 free_client(clp);
1863 return NULL; 1874 return NULL;
1864 } 1875 }
1865 INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); 1876 nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
1866 clp->cl_time = get_seconds(); 1877 clp->cl_time = get_seconds();
1867 clear_bit(0, &clp->cl_cb_slot_busy); 1878 clear_bit(0, &clp->cl_cb_slot_busy);
1868 copy_verf(clp, verf); 1879 copy_verf(clp, verf);
@@ -3349,8 +3360,12 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
3349 return ret; 3360 return ret;
3350} 3361}
3351 3362
3352void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) 3363#define cb_to_delegation(cb) \
3364 container_of(cb, struct nfs4_delegation, dl_recall)
3365
3366static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
3353{ 3367{
3368 struct nfs4_delegation *dp = cb_to_delegation(cb);
3354 struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, 3369 struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
3355 nfsd_net_id); 3370 nfsd_net_id);
3356 3371
@@ -3371,6 +3386,43 @@ void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp)
3371 spin_unlock(&state_lock); 3386 spin_unlock(&state_lock);
3372} 3387}
3373 3388
3389static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
3390 struct rpc_task *task)
3391{
3392 struct nfs4_delegation *dp = cb_to_delegation(cb);
3393
3394 switch (task->tk_status) {
3395 case 0:
3396 return 1;
3397 case -EBADHANDLE:
3398 case -NFS4ERR_BAD_STATEID:
3399 /*
3400 * Race: client probably got cb_recall before open reply
3401 * granting delegation.
3402 */
3403 if (dp->dl_retries--) {
3404 rpc_delay(task, 2 * HZ);
3405 return 0;
3406 }
3407 /*FALLTHRU*/
3408 default:
3409 return -1;
3410 }
3411}
3412
3413static void nfsd4_cb_recall_release(struct nfsd4_callback *cb)
3414{
3415 struct nfs4_delegation *dp = cb_to_delegation(cb);
3416
3417 nfs4_put_stid(&dp->dl_stid);
3418}
3419
3420static struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
3421 .prepare = nfsd4_cb_recall_prepare,
3422 .done = nfsd4_cb_recall_done,
3423 .release = nfsd4_cb_recall_release,
3424};
3425
3374static void nfsd_break_one_deleg(struct nfs4_delegation *dp) 3426static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
3375{ 3427{
3376 /* 3428 /*
@@ -3381,7 +3433,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
3381 * it's safe to take a reference. 3433 * it's safe to take a reference.
3382 */ 3434 */
3383 atomic_inc(&dp->dl_stid.sc_count); 3435 atomic_inc(&dp->dl_stid.sc_count);
3384 nfsd4_cb_recall(dp); 3436 nfsd4_run_cb(&dp->dl_recall);
3385} 3437}
3386 3438
3387/* Called from break_lease() with i_lock held. */ 3439/* Called from break_lease() with i_lock held. */
@@ -3759,7 +3811,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
3759 fl = locks_alloc_lock(); 3811 fl = locks_alloc_lock();
3760 if (!fl) 3812 if (!fl)
3761 return NULL; 3813 return NULL;
3762 locks_init_lock(fl);
3763 fl->fl_lmops = &nfsd_lease_mng_ops; 3814 fl->fl_lmops = &nfsd_lease_mng_ops;
3764 fl->fl_flags = FL_DELEG; 3815 fl->fl_flags = FL_DELEG;
3765 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; 3816 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
@@ -4107,7 +4158,7 @@ out:
4107 return status; 4158 return status;
4108} 4159}
4109 4160
4110static void 4161void
4111nfsd4_end_grace(struct nfsd_net *nn) 4162nfsd4_end_grace(struct nfsd_net *nn)
4112{ 4163{
4113 /* do nothing if grace period already ended */ 4164 /* do nothing if grace period already ended */
@@ -4116,14 +4167,28 @@ nfsd4_end_grace(struct nfsd_net *nn)
4116 4167
4117 dprintk("NFSD: end of grace period\n"); 4168 dprintk("NFSD: end of grace period\n");
4118 nn->grace_ended = true; 4169 nn->grace_ended = true;
4119 nfsd4_record_grace_done(nn, nn->boot_time); 4170 /*
4171 * If the server goes down again right now, an NFSv4
4172 * client will still be allowed to reclaim after it comes back up,
4173 * even if it hasn't yet had a chance to reclaim state this time.
4174 *
4175 */
4176 nfsd4_record_grace_done(nn);
4177 /*
4178 * At this point, NFSv4 clients can still reclaim. But if the
4179 * server crashes, any that have not yet reclaimed will be out
4180 * of luck on the next boot.
4181 *
4182 * (NFSv4.1+ clients are considered to have reclaimed once they
4183 * call RECLAIM_COMPLETE. NFSv4.0 clients are considered to
4184 * have reclaimed after their first OPEN.)
4185 */
4120 locks_end_grace(&nn->nfsd4_manager); 4186 locks_end_grace(&nn->nfsd4_manager);
4121 /* 4187 /*
4122 * Now that every NFSv4 client has had the chance to recover and 4188 * At this point, and once lockd and/or any other containers
4123 * to see the (possibly new, possibly shorter) lease time, we 4189 * exit their grace period, further reclaims will fail and
4124 * can safely set the next grace time to the current lease time: 4190 * regular locking can resume.
4125 */ 4191 */
4126 nn->nfsd4_grace = nn->nfsd4_lease;
4127} 4192}
4128 4193
4129static time_t 4194static time_t
@@ -5210,7 +5275,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5210 } 5275 }
5211 5276
5212 fp = lock_stp->st_stid.sc_file; 5277 fp = lock_stp->st_stid.sc_file;
5213 locks_init_lock(file_lock);
5214 switch (lock->lk_type) { 5278 switch (lock->lk_type) {
5215 case NFS4_READ_LT: 5279 case NFS4_READ_LT:
5216 case NFS4_READW_LT: 5280 case NFS4_READW_LT:
@@ -5354,7 +5418,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5354 status = nfserr_jukebox; 5418 status = nfserr_jukebox;
5355 goto out; 5419 goto out;
5356 } 5420 }
5357 locks_init_lock(file_lock); 5421
5358 switch (lockt->lt_type) { 5422 switch (lockt->lt_type) {
5359 case NFS4_READ_LT: 5423 case NFS4_READ_LT:
5360 case NFS4_READW_LT: 5424 case NFS4_READW_LT:
@@ -5432,7 +5496,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5432 status = nfserr_jukebox; 5496 status = nfserr_jukebox;
5433 goto fput; 5497 goto fput;
5434 } 5498 }
5435 locks_init_lock(file_lock); 5499
5436 file_lock->fl_type = F_UNLCK; 5500 file_lock->fl_type = F_UNLCK;
5437 file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); 5501 file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner);
5438 file_lock->fl_pid = current->tgid; 5502 file_lock->fl_pid = current->tgid;
@@ -5645,6 +5709,9 @@ nfs4_check_open_reclaim(clientid_t *clid,
5645 if (status) 5709 if (status)
5646 return nfserr_reclaim_bad; 5710 return nfserr_reclaim_bad;
5647 5711
5712 if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags))
5713 return nfserr_no_grace;
5714
5648 if (nfsd4_client_record_check(cstate->clp)) 5715 if (nfsd4_client_record_check(cstate->clp))
5649 return nfserr_reclaim_bad; 5716 return nfserr_reclaim_bad;
5650 5717
@@ -6342,10 +6409,10 @@ nfs4_state_start_net(struct net *net)
6342 ret = nfs4_state_create_net(net); 6409 ret = nfs4_state_create_net(net);
6343 if (ret) 6410 if (ret)
6344 return ret; 6411 return ret;
6345 nfsd4_client_tracking_init(net);
6346 nn->boot_time = get_seconds(); 6412 nn->boot_time = get_seconds();
6347 locks_start_grace(net, &nn->nfsd4_manager);
6348 nn->grace_ended = false; 6413 nn->grace_ended = false;
6414 locks_start_grace(net, &nn->nfsd4_manager);
6415 nfsd4_client_tracking_init(net);
6349 printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", 6416 printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
6350 nn->nfsd4_grace, net); 6417 nn->nfsd4_grace, net);
6351 queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); 6418 queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
@@ -6402,6 +6469,7 @@ nfs4_state_shutdown_net(struct net *net)
6402 list_for_each_safe(pos, next, &reaplist) { 6469 list_for_each_safe(pos, next, &reaplist) {
6403 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6470 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6404 list_del_init(&dp->dl_recall_lru); 6471 list_del_init(&dp->dl_recall_lru);
6472 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
6405 nfs4_put_stid(&dp->dl_stid); 6473 nfs4_put_stid(&dp->dl_stid);
6406 } 6474 }
6407 6475
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f9821ce6658a..7ec646380005 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -31,13 +31,6 @@
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 * TODO: Neil Brown made the following observation: We currently
36 * initially reserve NFSD_BUFSIZE space on the transmit queue and
37 * never release any of that until the request is complete.
38 * It would be good to calculate a new maximum response size while
39 * decoding the COMPOUND, and call svc_reserve with this number
40 * at the end of nfs4svc_decode_compoundargs.
41 */ 34 */
42 35
43#include <linux/slab.h> 36#include <linux/slab.h>
@@ -1521,6 +1514,22 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
1521} 1514}
1522 1515
1523static __be32 1516static __be32
1517nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
1518{
1519 DECODE_HEAD;
1520
1521 status = nfsd4_decode_stateid(argp, &seek->seek_stateid);
1522 if (status)
1523 return status;
1524
1525 READ_BUF(8 + 4);
1526 p = xdr_decode_hyper(p, &seek->seek_offset);
1527 seek->seek_whence = be32_to_cpup(p);
1528
1529 DECODE_TAIL;
1530}
1531
1532static __be32
1524nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) 1533nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
1525{ 1534{
1526 return nfs_ok; 1535 return nfs_ok;
@@ -1593,6 +1602,20 @@ static nfsd4_dec nfsd4_dec_ops[] = {
1593 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, 1602 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
1594 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, 1603 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
1595 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, 1604 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
1605
1606 /* new operations for NFSv4.2 */
1607 [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
1608 [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp,
1609 [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
1610 [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
1611 [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
1612 [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
1613 [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
1614 [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp,
1615 [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp,
1616 [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
1617 [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
1618 [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
1596}; 1619};
1597 1620
1598static inline bool 1621static inline bool
@@ -1670,6 +1693,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1670 readbytes += nfsd4_max_reply(argp->rqstp, op); 1693 readbytes += nfsd4_max_reply(argp->rqstp, op);
1671 } else 1694 } else
1672 max_reply += nfsd4_max_reply(argp->rqstp, op); 1695 max_reply += nfsd4_max_reply(argp->rqstp, op);
1696 /*
1697 * OP_LOCK may return a conflicting lock. (Special case
1698 * because it will just skip encoding this if it runs
1699 * out of xdr buffer space, and it is the only operation
1700 * that behaves this way.)
1701 */
1702 if (op->opnum == OP_LOCK)
1703 max_reply += NFS4_OPAQUE_LIMIT;
1673 1704
1674 if (op->status) { 1705 if (op->status) {
1675 argp->opcnt = i+1; 1706 argp->opcnt = i+1;
@@ -3751,6 +3782,22 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
3751} 3782}
3752 3783
3753static __be32 3784static __be32
3785nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
3786 struct nfsd4_seek *seek)
3787{
3788 __be32 *p;
3789
3790 if (nfserr)
3791 return nfserr;
3792
3793 p = xdr_reserve_space(&resp->xdr, 4 + 8);
3794 *p++ = cpu_to_be32(seek->seek_eof);
3795 p = xdr_encode_hyper(p, seek->seek_pos);
3796
3797 return nfserr;
3798}
3799
3800static __be32
3754nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) 3801nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
3755{ 3802{
3756 return nfserr; 3803 return nfserr;
@@ -3822,6 +3869,20 @@ static nfsd4_enc nfsd4_enc_ops[] = {
3822 [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, 3869 [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
3823 [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, 3870 [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
3824 [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, 3871 [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
3872
3873 /* NFSv4.2 operations */
3874 [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
3875 [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop,
3876 [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
3877 [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
3878 [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
3879 [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
3880 [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
3881 [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
3882 [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop,
3883 [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
3884 [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
3885 [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
3825}; 3886};
3826 3887
3827/* 3888/*
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index ff9567633245..122f69185ef5 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -27,8 +27,12 @@
27 */ 27 */
28#define TARGET_BUCKET_SIZE 64 28#define TARGET_BUCKET_SIZE 64
29 29
30static struct hlist_head * cache_hash; 30struct nfsd_drc_bucket {
31static struct list_head lru_head; 31 struct list_head lru_head;
32 spinlock_t cache_lock;
33};
34
35static struct nfsd_drc_bucket *drc_hashtbl;
32static struct kmem_cache *drc_slab; 36static struct kmem_cache *drc_slab;
33 37
34/* max number of entries allowed in the cache */ 38/* max number of entries allowed in the cache */
@@ -36,6 +40,7 @@ static unsigned int max_drc_entries;
36 40
37/* number of significant bits in the hash value */ 41/* number of significant bits in the hash value */
38static unsigned int maskbits; 42static unsigned int maskbits;
43static unsigned int drc_hashsize;
39 44
40/* 45/*
41 * Stats and other tracking of on the duplicate reply cache. All of these and 46 * Stats and other tracking of on the duplicate reply cache. All of these and
@@ -43,7 +48,7 @@ static unsigned int maskbits;
43 */ 48 */
44 49
45/* total number of entries */ 50/* total number of entries */
46static unsigned int num_drc_entries; 51static atomic_t num_drc_entries;
47 52
48/* cache misses due only to checksum comparison failures */ 53/* cache misses due only to checksum comparison failures */
49static unsigned int payload_misses; 54static unsigned int payload_misses;
@@ -75,7 +80,6 @@ static struct shrinker nfsd_reply_cache_shrinker = {
75 * A cache entry is "single use" if c_state == RC_INPROG 80 * A cache entry is "single use" if c_state == RC_INPROG
76 * Otherwise, it when accessing _prev or _next, the lock must be held. 81 * Otherwise, it when accessing _prev or _next, the lock must be held.
77 */ 82 */
78static DEFINE_SPINLOCK(cache_lock);
79static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); 83static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func);
80 84
81/* 85/*
@@ -116,6 +120,12 @@ nfsd_hashsize(unsigned int limit)
116 return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); 120 return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
117} 121}
118 122
123static u32
124nfsd_cache_hash(__be32 xid)
125{
126 return hash_32(be32_to_cpu(xid), maskbits);
127}
128
119static struct svc_cacherep * 129static struct svc_cacherep *
120nfsd_reply_cache_alloc(void) 130nfsd_reply_cache_alloc(void)
121{ 131{
@@ -126,7 +136,6 @@ nfsd_reply_cache_alloc(void)
126 rp->c_state = RC_UNUSED; 136 rp->c_state = RC_UNUSED;
127 rp->c_type = RC_NOCACHE; 137 rp->c_type = RC_NOCACHE;
128 INIT_LIST_HEAD(&rp->c_lru); 138 INIT_LIST_HEAD(&rp->c_lru);
129 INIT_HLIST_NODE(&rp->c_hash);
130 } 139 }
131 return rp; 140 return rp;
132} 141}
@@ -138,29 +147,27 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
138 drc_mem_usage -= rp->c_replvec.iov_len; 147 drc_mem_usage -= rp->c_replvec.iov_len;
139 kfree(rp->c_replvec.iov_base); 148 kfree(rp->c_replvec.iov_base);
140 } 149 }
141 if (!hlist_unhashed(&rp->c_hash))
142 hlist_del(&rp->c_hash);
143 list_del(&rp->c_lru); 150 list_del(&rp->c_lru);
144 --num_drc_entries; 151 atomic_dec(&num_drc_entries);
145 drc_mem_usage -= sizeof(*rp); 152 drc_mem_usage -= sizeof(*rp);
146 kmem_cache_free(drc_slab, rp); 153 kmem_cache_free(drc_slab, rp);
147} 154}
148 155
149static void 156static void
150nfsd_reply_cache_free(struct svc_cacherep *rp) 157nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
151{ 158{
152 spin_lock(&cache_lock); 159 spin_lock(&b->cache_lock);
153 nfsd_reply_cache_free_locked(rp); 160 nfsd_reply_cache_free_locked(rp);
154 spin_unlock(&cache_lock); 161 spin_unlock(&b->cache_lock);
155} 162}
156 163
157int nfsd_reply_cache_init(void) 164int nfsd_reply_cache_init(void)
158{ 165{
159 unsigned int hashsize; 166 unsigned int hashsize;
167 unsigned int i;
160 168
161 INIT_LIST_HEAD(&lru_head);
162 max_drc_entries = nfsd_cache_size_limit(); 169 max_drc_entries = nfsd_cache_size_limit();
163 num_drc_entries = 0; 170 atomic_set(&num_drc_entries, 0);
164 hashsize = nfsd_hashsize(max_drc_entries); 171 hashsize = nfsd_hashsize(max_drc_entries);
165 maskbits = ilog2(hashsize); 172 maskbits = ilog2(hashsize);
166 173
@@ -170,9 +177,14 @@ int nfsd_reply_cache_init(void)
170 if (!drc_slab) 177 if (!drc_slab)
171 goto out_nomem; 178 goto out_nomem;
172 179
173 cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); 180 drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
174 if (!cache_hash) 181 if (!drc_hashtbl)
175 goto out_nomem; 182 goto out_nomem;
183 for (i = 0; i < hashsize; i++) {
184 INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
185 spin_lock_init(&drc_hashtbl[i].cache_lock);
186 }
187 drc_hashsize = hashsize;
176 188
177 return 0; 189 return 0;
178out_nomem: 190out_nomem:
@@ -184,17 +196,22 @@ out_nomem:
184void nfsd_reply_cache_shutdown(void) 196void nfsd_reply_cache_shutdown(void)
185{ 197{
186 struct svc_cacherep *rp; 198 struct svc_cacherep *rp;
199 unsigned int i;
187 200
188 unregister_shrinker(&nfsd_reply_cache_shrinker); 201 unregister_shrinker(&nfsd_reply_cache_shrinker);
189 cancel_delayed_work_sync(&cache_cleaner); 202 cancel_delayed_work_sync(&cache_cleaner);
190 203
191 while (!list_empty(&lru_head)) { 204 for (i = 0; i < drc_hashsize; i++) {
192 rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); 205 struct list_head *head = &drc_hashtbl[i].lru_head;
193 nfsd_reply_cache_free_locked(rp); 206 while (!list_empty(head)) {
207 rp = list_first_entry(head, struct svc_cacherep, c_lru);
208 nfsd_reply_cache_free_locked(rp);
209 }
194 } 210 }
195 211
196 kfree (cache_hash); 212 kfree (drc_hashtbl);
197 cache_hash = NULL; 213 drc_hashtbl = NULL;
214 drc_hashsize = 0;
198 215
199 if (drc_slab) { 216 if (drc_slab) {
200 kmem_cache_destroy(drc_slab); 217 kmem_cache_destroy(drc_slab);
@@ -207,61 +224,63 @@ void nfsd_reply_cache_shutdown(void)
207 * not already scheduled. 224 * not already scheduled.
208 */ 225 */
209static void 226static void
210lru_put_end(struct svc_cacherep *rp) 227lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
211{ 228{
212 rp->c_timestamp = jiffies; 229 rp->c_timestamp = jiffies;
213 list_move_tail(&rp->c_lru, &lru_head); 230 list_move_tail(&rp->c_lru, &b->lru_head);
214 schedule_delayed_work(&cache_cleaner, RC_EXPIRE); 231 schedule_delayed_work(&cache_cleaner, RC_EXPIRE);
215} 232}
216 233
217/*
218 * Move a cache entry from one hash list to another
219 */
220static void
221hash_refile(struct svc_cacherep *rp)
222{
223 hlist_del_init(&rp->c_hash);
224 /*
225 * No point in byte swapping c_xid since we're just using it to pick
226 * a hash bucket.
227 */
228 hlist_add_head(&rp->c_hash, cache_hash +
229 hash_32((__force u32)rp->c_xid, maskbits));
230}
231
232/*
233 * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
234 * Also prune the oldest ones when the total exceeds the max number of entries.
235 */
236static long 234static long
237prune_cache_entries(void) 235prune_bucket(struct nfsd_drc_bucket *b)
238{ 236{
239 struct svc_cacherep *rp, *tmp; 237 struct svc_cacherep *rp, *tmp;
240 long freed = 0; 238 long freed = 0;
241 239
242 list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { 240 list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
243 /* 241 /*
244 * Don't free entries attached to calls that are still 242 * Don't free entries attached to calls that are still
245 * in-progress, but do keep scanning the list. 243 * in-progress, but do keep scanning the list.
246 */ 244 */
247 if (rp->c_state == RC_INPROG) 245 if (rp->c_state == RC_INPROG)
248 continue; 246 continue;
249 if (num_drc_entries <= max_drc_entries && 247 if (atomic_read(&num_drc_entries) <= max_drc_entries &&
250 time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) 248 time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
251 break; 249 break;
252 nfsd_reply_cache_free_locked(rp); 250 nfsd_reply_cache_free_locked(rp);
253 freed++; 251 freed++;
254 } 252 }
253 return freed;
254}
255
256/*
257 * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
258 * Also prune the oldest ones when the total exceeds the max number of entries.
259 */
260static long
261prune_cache_entries(void)
262{
263 unsigned int i;
264 long freed = 0;
265 bool cancel = true;
266
267 for (i = 0; i < drc_hashsize; i++) {
268 struct nfsd_drc_bucket *b = &drc_hashtbl[i];
269
270 if (list_empty(&b->lru_head))
271 continue;
272 spin_lock(&b->cache_lock);
273 freed += prune_bucket(b);
274 if (!list_empty(&b->lru_head))
275 cancel = false;
276 spin_unlock(&b->cache_lock);
277 }
255 278
256 /* 279 /*
257 * Conditionally rearm the job. If we cleaned out the list, then 280 * Conditionally rearm the job to run in RC_EXPIRE since we just
258 * cancel any pending run (since there won't be any work to do). 281 * ran the pruner.
259 * Otherwise, we rearm the job or modify the existing one to run in
260 * RC_EXPIRE since we just ran the pruner.
261 */ 282 */
262 if (list_empty(&lru_head)) 283 if (!cancel)
263 cancel_delayed_work(&cache_cleaner);
264 else
265 mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); 284 mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
266 return freed; 285 return freed;
267} 286}
@@ -269,32 +288,19 @@ prune_cache_entries(void)
269static void 288static void
270cache_cleaner_func(struct work_struct *unused) 289cache_cleaner_func(struct work_struct *unused)
271{ 290{
272 spin_lock(&cache_lock);
273 prune_cache_entries(); 291 prune_cache_entries();
274 spin_unlock(&cache_lock);
275} 292}
276 293
277static unsigned long 294static unsigned long
278nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) 295nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
279{ 296{
280 unsigned long num; 297 return atomic_read(&num_drc_entries);
281
282 spin_lock(&cache_lock);
283 num = num_drc_entries;
284 spin_unlock(&cache_lock);
285
286 return num;
287} 298}
288 299
289static unsigned long 300static unsigned long
290nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) 301nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
291{ 302{
292 unsigned long freed; 303 return prune_cache_entries();
293
294 spin_lock(&cache_lock);
295 freed = prune_cache_entries();
296 spin_unlock(&cache_lock);
297 return freed;
298} 304}
299/* 305/*
300 * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes 306 * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
@@ -332,20 +338,24 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
332static bool 338static bool
333nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) 339nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
334{ 340{
335 /* Check RPC header info first */ 341 /* Check RPC XID first */
336 if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || 342 if (rqstp->rq_xid != rp->c_xid)
337 rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers ||
338 rqstp->rq_arg.len != rp->c_len ||
339 !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
340 rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
341 return false; 343 return false;
342
343 /* compare checksum of NFS data */ 344 /* compare checksum of NFS data */
344 if (csum != rp->c_csum) { 345 if (csum != rp->c_csum) {
345 ++payload_misses; 346 ++payload_misses;
346 return false; 347 return false;
347 } 348 }
348 349
350 /* Other discriminators */
351 if (rqstp->rq_proc != rp->c_proc ||
352 rqstp->rq_prot != rp->c_prot ||
353 rqstp->rq_vers != rp->c_vers ||
354 rqstp->rq_arg.len != rp->c_len ||
355 !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
356 rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
357 return false;
358
349 return true; 359 return true;
350} 360}
351 361
@@ -355,18 +365,14 @@ nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
355 * NULL on failure. 365 * NULL on failure.
356 */ 366 */
357static struct svc_cacherep * 367static struct svc_cacherep *
358nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) 368nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp,
369 __wsum csum)
359{ 370{
360 struct svc_cacherep *rp, *ret = NULL; 371 struct svc_cacherep *rp, *ret = NULL;
361 struct hlist_head *rh; 372 struct list_head *rh = &b->lru_head;
362 unsigned int entries = 0; 373 unsigned int entries = 0;
363 374
364 /* 375 list_for_each_entry(rp, rh, c_lru) {
365 * No point in byte swapping rq_xid since we're just using it to pick
366 * a hash bucket.
367 */
368 rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)];
369 hlist_for_each_entry(rp, rh, c_hash) {
370 ++entries; 376 ++entries;
371 if (nfsd_cache_match(rqstp, csum, rp)) { 377 if (nfsd_cache_match(rqstp, csum, rp)) {
372 ret = rp; 378 ret = rp;
@@ -377,11 +383,12 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
377 /* tally hash chain length stats */ 383 /* tally hash chain length stats */
378 if (entries > longest_chain) { 384 if (entries > longest_chain) {
379 longest_chain = entries; 385 longest_chain = entries;
380 longest_chain_cachesize = num_drc_entries; 386 longest_chain_cachesize = atomic_read(&num_drc_entries);
381 } else if (entries == longest_chain) { 387 } else if (entries == longest_chain) {
382 /* prefer to keep the smallest cachesize possible here */ 388 /* prefer to keep the smallest cachesize possible here */
383 longest_chain_cachesize = min(longest_chain_cachesize, 389 longest_chain_cachesize = min_t(unsigned int,
384 num_drc_entries); 390 longest_chain_cachesize,
391 atomic_read(&num_drc_entries));
385 } 392 }
386 393
387 return ret; 394 return ret;
@@ -403,6 +410,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
403 vers = rqstp->rq_vers, 410 vers = rqstp->rq_vers,
404 proc = rqstp->rq_proc; 411 proc = rqstp->rq_proc;
405 __wsum csum; 412 __wsum csum;
413 u32 hash = nfsd_cache_hash(xid);
414 struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
406 unsigned long age; 415 unsigned long age;
407 int type = rqstp->rq_cachetype; 416 int type = rqstp->rq_cachetype;
408 int rtn = RC_DOIT; 417 int rtn = RC_DOIT;
@@ -420,16 +429,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
420 * preallocate an entry. 429 * preallocate an entry.
421 */ 430 */
422 rp = nfsd_reply_cache_alloc(); 431 rp = nfsd_reply_cache_alloc();
423 spin_lock(&cache_lock); 432 spin_lock(&b->cache_lock);
424 if (likely(rp)) { 433 if (likely(rp)) {
425 ++num_drc_entries; 434 atomic_inc(&num_drc_entries);
426 drc_mem_usage += sizeof(*rp); 435 drc_mem_usage += sizeof(*rp);
427 } 436 }
428 437
429 /* go ahead and prune the cache */ 438 /* go ahead and prune the cache */
430 prune_cache_entries(); 439 prune_bucket(b);
431 440
432 found = nfsd_cache_search(rqstp, csum); 441 found = nfsd_cache_search(b, rqstp, csum);
433 if (found) { 442 if (found) {
434 if (likely(rp)) 443 if (likely(rp))
435 nfsd_reply_cache_free_locked(rp); 444 nfsd_reply_cache_free_locked(rp);
@@ -454,8 +463,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
454 rp->c_len = rqstp->rq_arg.len; 463 rp->c_len = rqstp->rq_arg.len;
455 rp->c_csum = csum; 464 rp->c_csum = csum;
456 465
457 hash_refile(rp); 466 lru_put_end(b, rp);
458 lru_put_end(rp);
459 467
460 /* release any buffer */ 468 /* release any buffer */
461 if (rp->c_type == RC_REPLBUFF) { 469 if (rp->c_type == RC_REPLBUFF) {
@@ -465,14 +473,14 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
465 } 473 }
466 rp->c_type = RC_NOCACHE; 474 rp->c_type = RC_NOCACHE;
467 out: 475 out:
468 spin_unlock(&cache_lock); 476 spin_unlock(&b->cache_lock);
469 return rtn; 477 return rtn;
470 478
471found_entry: 479found_entry:
472 nfsdstats.rchits++; 480 nfsdstats.rchits++;
473 /* We found a matching entry which is either in progress or done. */ 481 /* We found a matching entry which is either in progress or done. */
474 age = jiffies - rp->c_timestamp; 482 age = jiffies - rp->c_timestamp;
475 lru_put_end(rp); 483 lru_put_end(b, rp);
476 484
477 rtn = RC_DROPIT; 485 rtn = RC_DROPIT;
478 /* Request being processed or excessive rexmits */ 486 /* Request being processed or excessive rexmits */
@@ -527,18 +535,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
527{ 535{
528 struct svc_cacherep *rp = rqstp->rq_cacherep; 536 struct svc_cacherep *rp = rqstp->rq_cacherep;
529 struct kvec *resv = &rqstp->rq_res.head[0], *cachv; 537 struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
538 u32 hash;
539 struct nfsd_drc_bucket *b;
530 int len; 540 int len;
531 size_t bufsize = 0; 541 size_t bufsize = 0;
532 542
533 if (!rp) 543 if (!rp)
534 return; 544 return;
535 545
546 hash = nfsd_cache_hash(rp->c_xid);
547 b = &drc_hashtbl[hash];
548
536 len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); 549 len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
537 len >>= 2; 550 len >>= 2;
538 551
539 /* Don't cache excessive amounts of data and XDR failures */ 552 /* Don't cache excessive amounts of data and XDR failures */
540 if (!statp || len > (256 >> 2)) { 553 if (!statp || len > (256 >> 2)) {
541 nfsd_reply_cache_free(rp); 554 nfsd_reply_cache_free(b, rp);
542 return; 555 return;
543 } 556 }
544 557
@@ -553,23 +566,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
553 bufsize = len << 2; 566 bufsize = len << 2;
554 cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); 567 cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
555 if (!cachv->iov_base) { 568 if (!cachv->iov_base) {
556 nfsd_reply_cache_free(rp); 569 nfsd_reply_cache_free(b, rp);
557 return; 570 return;
558 } 571 }
559 cachv->iov_len = bufsize; 572 cachv->iov_len = bufsize;
560 memcpy(cachv->iov_base, statp, bufsize); 573 memcpy(cachv->iov_base, statp, bufsize);
561 break; 574 break;
562 case RC_NOCACHE: 575 case RC_NOCACHE:
563 nfsd_reply_cache_free(rp); 576 nfsd_reply_cache_free(b, rp);
564 return; 577 return;
565 } 578 }
566 spin_lock(&cache_lock); 579 spin_lock(&b->cache_lock);
567 drc_mem_usage += bufsize; 580 drc_mem_usage += bufsize;
568 lru_put_end(rp); 581 lru_put_end(b, rp);
569 rp->c_secure = rqstp->rq_secure; 582 rp->c_secure = rqstp->rq_secure;
570 rp->c_type = cachetype; 583 rp->c_type = cachetype;
571 rp->c_state = RC_DONE; 584 rp->c_state = RC_DONE;
572 spin_unlock(&cache_lock); 585 spin_unlock(&b->cache_lock);
573 return; 586 return;
574} 587}
575 588
@@ -600,9 +613,9 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
600 */ 613 */
601static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) 614static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
602{ 615{
603 spin_lock(&cache_lock);
604 seq_printf(m, "max entries: %u\n", max_drc_entries); 616 seq_printf(m, "max entries: %u\n", max_drc_entries);
605 seq_printf(m, "num entries: %u\n", num_drc_entries); 617 seq_printf(m, "num entries: %u\n",
618 atomic_read(&num_drc_entries));
606 seq_printf(m, "hash buckets: %u\n", 1 << maskbits); 619 seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
607 seq_printf(m, "mem usage: %u\n", drc_mem_usage); 620 seq_printf(m, "mem usage: %u\n", drc_mem_usage);
608 seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); 621 seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
@@ -611,7 +624,6 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
611 seq_printf(m, "payload misses: %u\n", payload_misses); 624 seq_printf(m, "payload misses: %u\n", payload_misses);
612 seq_printf(m, "longest chain len: %u\n", longest_chain); 625 seq_printf(m, "longest chain len: %u\n", longest_chain);
613 seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); 626 seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
614 spin_unlock(&cache_lock);
615 return 0; 627 return 0;
616} 628}
617 629
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 4e042105fb6e..ca73ca79a0ee 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -49,6 +49,7 @@ enum {
49 NFSD_Leasetime, 49 NFSD_Leasetime,
50 NFSD_Gracetime, 50 NFSD_Gracetime,
51 NFSD_RecoveryDir, 51 NFSD_RecoveryDir,
52 NFSD_V4EndGrace,
52#endif 53#endif
53}; 54};
54 55
@@ -68,6 +69,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
68static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 69static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
69static ssize_t write_gracetime(struct file *file, char *buf, size_t size); 70static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
70static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); 71static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
72static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size);
71#endif 73#endif
72 74
73static ssize_t (*write_op[])(struct file *, char *, size_t) = { 75static ssize_t (*write_op[])(struct file *, char *, size_t) = {
@@ -84,6 +86,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
84 [NFSD_Leasetime] = write_leasetime, 86 [NFSD_Leasetime] = write_leasetime,
85 [NFSD_Gracetime] = write_gracetime, 87 [NFSD_Gracetime] = write_gracetime,
86 [NFSD_RecoveryDir] = write_recoverydir, 88 [NFSD_RecoveryDir] = write_recoverydir,
89 [NFSD_V4EndGrace] = write_v4_end_grace,
87#endif 90#endif
88}; 91};
89 92
@@ -1077,6 +1080,47 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
1077 return rv; 1080 return rv;
1078} 1081}
1079 1082
1083/**
1084 * write_v4_end_grace - release grace period for nfsd's v4.x lock manager
1085 *
1086 * Input:
1087 * buf: ignored
1088 * size: zero
1089 * OR
1090 *
1091 * Input:
1092 * buf: any value
1093 * size: non-zero length of C string in @buf
1094 * Output:
1095 * passed-in buffer filled with "Y" or "N" with a newline
1096 * and NULL-terminated C string. This indicates whether
1097 * the grace period has ended in the current net
1098 * namespace. Return code is the size in bytes of the
1099 * string. Writing a string that starts with 'Y', 'y', or
1100 * '1' to the file will end the grace period for nfsd's v4
1101 * lock manager.
1102 */
1103static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
1104{
1105 struct net *net = file->f_dentry->d_sb->s_fs_info;
1106 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1107
1108 if (size > 0) {
1109 switch(buf[0]) {
1110 case 'Y':
1111 case 'y':
1112 case '1':
1113 nfsd4_end_grace(nn);
1114 break;
1115 default:
1116 return -EINVAL;
1117 }
1118 }
1119
1120 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%c\n",
1121 nn->grace_ended ? 'Y' : 'N');
1122}
1123
1080#endif 1124#endif
1081 1125
1082/*----------------------------------------------------------------------------*/ 1126/*----------------------------------------------------------------------------*/
@@ -1110,6 +1154,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1110 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1154 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
1111 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1155 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
1112 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, 1156 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
1157 [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO},
1113#endif 1158#endif
1114 /* last one */ {""} 1159 /* last one */ {""}
1115 }; 1160 };
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 847daf37e566..747f3b95bd11 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -251,7 +251,7 @@ void nfsd_lockd_shutdown(void);
251#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) 251#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
252#define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) 252#define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP)
253#define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) 253#define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH)
254#define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) 254#define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP)
255#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) 255#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED)
256#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) 256#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS)
257#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) 257#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index e883a5868be6..88026fc6a981 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -209,8 +209,10 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
209 * fix that case easily. 209 * fix that case easily.
210 */ 210 */
211 struct cred *new = prepare_creds(); 211 struct cred *new = prepare_creds();
212 if (!new) 212 if (!new) {
213 return nfserrno(-ENOMEM); 213 error = nfserrno(-ENOMEM);
214 goto out;
215 }
214 new->cap_effective = 216 new->cap_effective =
215 cap_raise_nfsd_set(new->cap_effective, 217 cap_raise_nfsd_set(new->cap_effective,
216 new->cap_permitted); 218 new->cap_permitted);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4a89e00d7461..bf52dc7b15e7 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -62,16 +62,21 @@ typedef struct {
62 (s)->si_generation 62 (s)->si_generation
63 63
64struct nfsd4_callback { 64struct nfsd4_callback {
65 void *cb_op;
66 struct nfs4_client *cb_clp; 65 struct nfs4_client *cb_clp;
67 struct list_head cb_per_client; 66 struct list_head cb_per_client;
68 u32 cb_minorversion; 67 u32 cb_minorversion;
69 struct rpc_message cb_msg; 68 struct rpc_message cb_msg;
70 const struct rpc_call_ops *cb_ops; 69 struct nfsd4_callback_ops *cb_ops;
71 struct work_struct cb_work; 70 struct work_struct cb_work;
72 bool cb_done; 71 bool cb_done;
73}; 72};
74 73
74struct nfsd4_callback_ops {
75 void (*prepare)(struct nfsd4_callback *);
76 int (*done)(struct nfsd4_callback *, struct rpc_task *);
77 void (*release)(struct nfsd4_callback *);
78};
79
75/* 80/*
76 * A core object that represents a "common" stateid. These are generally 81 * A core object that represents a "common" stateid. These are generally
77 * embedded within the different (more specific) stateid objects and contain 82 * embedded within the different (more specific) stateid objects and contain
@@ -306,6 +311,7 @@ struct nfs4_client {
306#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ 311#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */
307#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ 312#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */
308#define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ 313#define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */
314#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
309#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 315#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
310 1 << NFSD4_CLIENT_CB_KILL) 316 1 << NFSD4_CLIENT_CB_KILL)
311 unsigned long cl_flags; 317 unsigned long cl_flags;
@@ -517,6 +523,13 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
517#define RD_STATE 0x00000010 523#define RD_STATE 0x00000010
518#define WR_STATE 0x00000020 524#define WR_STATE 0x00000020
519 525
526enum nfsd4_cb_op {
527 NFSPROC4_CLNT_CB_NULL = 0,
528 NFSPROC4_CLNT_CB_RECALL,
529 NFSPROC4_CLNT_CB_SEQUENCE,
530};
531
532
520struct nfsd4_compound_state; 533struct nfsd4_compound_state;
521struct nfsd_net; 534struct nfsd_net;
522 535
@@ -531,12 +544,12 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
531extern __be32 nfs4_check_open_reclaim(clientid_t *clid, 544extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
532 struct nfsd4_compound_state *cstate, struct nfsd_net *nn); 545 struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
533extern int set_callback_cred(void); 546extern int set_callback_cred(void);
534void nfsd4_run_cb_null(struct work_struct *w);
535void nfsd4_run_cb_recall(struct work_struct *w);
536extern void nfsd4_probe_callback(struct nfs4_client *clp); 547extern void nfsd4_probe_callback(struct nfs4_client *clp);
537extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); 548extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
538extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); 549extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
539extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 550extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
551 struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
552extern void nfsd4_run_cb(struct nfsd4_callback *cb);
540extern int nfsd4_create_callback_queue(void); 553extern int nfsd4_create_callback_queue(void);
541extern void nfsd4_destroy_callback_queue(void); 554extern void nfsd4_destroy_callback_queue(void);
542extern void nfsd4_shutdown_callback(struct nfs4_client *); 555extern void nfsd4_shutdown_callback(struct nfs4_client *);
@@ -545,13 +558,16 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
545 struct nfsd_net *nn); 558 struct nfsd_net *nn);
546extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); 559extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
547 560
561/* grace period management */
562void nfsd4_end_grace(struct nfsd_net *nn);
563
548/* nfs4recover operations */ 564/* nfs4recover operations */
549extern int nfsd4_client_tracking_init(struct net *net); 565extern int nfsd4_client_tracking_init(struct net *net);
550extern void nfsd4_client_tracking_exit(struct net *net); 566extern void nfsd4_client_tracking_exit(struct net *net);
551extern void nfsd4_client_record_create(struct nfs4_client *clp); 567extern void nfsd4_client_record_create(struct nfs4_client *clp);
552extern void nfsd4_client_record_remove(struct nfs4_client *clp); 568extern void nfsd4_client_record_remove(struct nfs4_client *clp);
553extern int nfsd4_client_record_check(struct nfs4_client *clp); 569extern int nfsd4_client_record_check(struct nfs4_client *clp);
554extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); 570extern void nfsd4_record_grace_done(struct nfsd_net *nn);
555 571
556/* nfs fault injection functions */ 572/* nfs fault injection functions */
557#ifdef CONFIG_NFSD_FAULT_INJECTION 573#ifdef CONFIG_NFSD_FAULT_INJECTION
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index f501a9b5c9df..965cffd17a0c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -445,6 +445,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
445 if (err) 445 if (err)
446 goto out; 446 goto out;
447 size_change = 1; 447 size_change = 1;
448
449 /*
450 * RFC5661, Section 18.30.4:
451 * Changing the size of a file with SETATTR indirectly
452 * changes the time_modify and change attributes.
453 *
454 * (and similar for the older RFCs)
455 */
456 if (iap->ia_size != i_size_read(inode))
457 iap->ia_valid |= ATTR_MTIME;
448 } 458 }
449 459
450 iap->ia_valid |= ATTR_CTIME; 460 iap->ia_valid |= ATTR_CTIME;
@@ -649,6 +659,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
649{ 659{
650 struct path path; 660 struct path path;
651 struct inode *inode; 661 struct inode *inode;
662 struct file *file;
652 int flags = O_RDONLY|O_LARGEFILE; 663 int flags = O_RDONLY|O_LARGEFILE;
653 __be32 err; 664 __be32 err;
654 int host_err = 0; 665 int host_err = 0;
@@ -703,19 +714,25 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
703 else 714 else
704 flags = O_WRONLY|O_LARGEFILE; 715 flags = O_WRONLY|O_LARGEFILE;
705 } 716 }
706 *filp = dentry_open(&path, flags, current_cred());
707 if (IS_ERR(*filp)) {
708 host_err = PTR_ERR(*filp);
709 *filp = NULL;
710 } else {
711 host_err = ima_file_check(*filp, may_flags);
712 717
713 if (may_flags & NFSD_MAY_64BIT_COOKIE) 718 file = dentry_open(&path, flags, current_cred());
714 (*filp)->f_mode |= FMODE_64BITHASH; 719 if (IS_ERR(file)) {
715 else 720 host_err = PTR_ERR(file);
716 (*filp)->f_mode |= FMODE_32BITHASH; 721 goto out_nfserr;
717 } 722 }
718 723
724 host_err = ima_file_check(file, may_flags);
725 if (host_err) {
726 nfsd_close(file);
727 goto out_nfserr;
728 }
729
730 if (may_flags & NFSD_MAY_64BIT_COOKIE)
731 file->f_mode |= FMODE_64BITHASH;
732 else
733 file->f_mode |= FMODE_32BITHASH;
734
735 *filp = file;
719out_nfserr: 736out_nfserr:
720 err = nfserrno(host_err); 737 err = nfserrno(host_err);
721out: 738out:
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 465e7799742a..5720e9457f33 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -428,6 +428,17 @@ struct nfsd4_reclaim_complete {
428 u32 rca_one_fs; 428 u32 rca_one_fs;
429}; 429};
430 430
431struct nfsd4_seek {
432 /* request */
433 stateid_t seek_stateid;
434 loff_t seek_offset;
435 u32 seek_whence;
436
437 /* response */
438 u32 seek_eof;
439 loff_t seek_pos;
440};
441
431struct nfsd4_op { 442struct nfsd4_op {
432 int opnum; 443 int opnum;
433 __be32 status; 444 __be32 status;
@@ -473,6 +484,9 @@ struct nfsd4_op {
473 struct nfsd4_reclaim_complete reclaim_complete; 484 struct nfsd4_reclaim_complete reclaim_complete;
474 struct nfsd4_test_stateid test_stateid; 485 struct nfsd4_test_stateid test_stateid;
475 struct nfsd4_free_stateid free_stateid; 486 struct nfsd4_free_stateid free_stateid;
487
488 /* NFSv4.2 */
489 struct nfsd4_seek seek;
476 } u; 490 } u;
477 struct nfs4_replay * replay; 491 struct nfs4_replay * replay;
478}; 492};