aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/Kconfig6
-rw-r--r--fs/lockd/Makefile3
-rw-r--r--fs/lockd/netns.h1
-rw-r--r--fs/lockd/procfs.c92
-rw-r--r--fs/lockd/procfs.h28
-rw-r--r--fs/lockd/svc.c16
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs_common/Makefile3
-rw-r--r--fs/nfs_common/grace.c (renamed from fs/lockd/grace.c)68
-rw-r--r--fs/nfsd/Kconfig4
-rw-r--r--fs/nfsd/cache.h1
-rw-r--r--fs/nfsd/export.c1
-rw-r--r--fs/nfsd/nfs3proc.c13
-rw-r--r--fs/nfsd/nfs4callback.c144
-rw-r--r--fs/nfsd/nfs4idmap.c20
-rw-r--r--fs/nfsd/nfs4proc.c49
-rw-r--r--fs/nfsd/nfs4recover.c205
-rw-r--r--fs/nfsd/nfs4state.c115
-rw-r--r--fs/nfsd/nfs4xdr.c75
-rw-r--r--fs/nfsd/nfscache.c214
-rw-r--r--fs/nfsd/nfsctl.c45
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfsfh.c6
-rw-r--r--fs/nfsd/state.h31
-rw-r--r--fs/nfsd/vfs.c37
-rw-r--r--fs/nfsd/xdr4.h14
-rw-r--r--include/linux/nfs4.h26
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/sunrpc/svc.h1
-rw-r--r--include/uapi/linux/nfsd/export.h5
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svc_xprt.c81
-rw-r--r--net/sunrpc/svcsock.c25
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h7
35 files changed, 942 insertions, 406 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 312393f32948..db5dc1598716 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -233,9 +233,13 @@ if NETWORK_FILESYSTEMS
233source "fs/nfs/Kconfig" 233source "fs/nfs/Kconfig"
234source "fs/nfsd/Kconfig" 234source "fs/nfsd/Kconfig"
235 235
236config GRACE_PERIOD
237 tristate
238
236config LOCKD 239config LOCKD
237 tristate 240 tristate
238 depends on FILE_LOCKING 241 depends on FILE_LOCKING
242 select GRACE_PERIOD
239 243
240config LOCKD_V4 244config LOCKD_V4
241 bool 245 bool
@@ -249,7 +253,7 @@ config NFS_ACL_SUPPORT
249 253
250config NFS_COMMON 254config NFS_COMMON
251 bool 255 bool
252 depends on NFSD || NFS_FS 256 depends on NFSD || NFS_FS || LOCKD
253 default y 257 default y
254 258
255source "net/sunrpc/Kconfig" 259source "net/sunrpc/Kconfig"
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index ca58d64374ca..9b320cc2a8cf 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -5,6 +5,7 @@
5obj-$(CONFIG_LOCKD) += lockd.o 5obj-$(CONFIG_LOCKD) += lockd.o
6 6
7lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ 7lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
8 svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o 8 svcshare.o svcproc.o svcsubs.o mon.o xdr.o
9lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o 9lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
10lockd-objs-$(CONFIG_PROC_FS) += procfs.o
10lockd-objs := $(lockd-objs-y) 11lockd-objs := $(lockd-objs-y)
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 5010b55628b4..097bfa3adb1c 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -11,7 +11,6 @@ struct lockd_net {
11 11
12 struct delayed_work grace_period_end; 12 struct delayed_work grace_period_end;
13 struct lock_manager lockd_manager; 13 struct lock_manager lockd_manager;
14 struct list_head grace_list;
15 14
16 spinlock_t nsm_clnt_lock; 15 spinlock_t nsm_clnt_lock;
17 unsigned int nsm_users; 16 unsigned int nsm_users;
diff --git a/fs/lockd/procfs.c b/fs/lockd/procfs.c
new file mode 100644
index 000000000000..2a0a98480e39
--- /dev/null
+++ b/fs/lockd/procfs.c
@@ -0,0 +1,92 @@
1/*
2 * Procfs support for lockd
3 *
4 * Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com>
5 */
6
7#include <linux/fs.h>
8#include <linux/proc_fs.h>
9#include <linux/module.h>
10#include <linux/nsproxy.h>
11#include <net/net_namespace.h>
12
13#include "netns.h"
14#include "procfs.h"
15
16/*
17 * We only allow strings that start with 'Y', 'y', or '1'.
18 */
19static ssize_t
20nlm_end_grace_write(struct file *file, const char __user *buf, size_t size,
21 loff_t *pos)
22{
23 char *data;
24 struct lockd_net *ln = net_generic(current->nsproxy->net_ns,
25 lockd_net_id);
26
27 if (size < 1)
28 return -EINVAL;
29
30 data = simple_transaction_get(file, buf, size);
31 if (IS_ERR(data))
32 return PTR_ERR(data);
33
34 switch(data[0]) {
35 case 'Y':
36 case 'y':
37 case '1':
38 locks_end_grace(&ln->lockd_manager);
39 break;
40 default:
41 return -EINVAL;
42 }
43
44 return size;
45}
46
47static ssize_t
48nlm_end_grace_read(struct file *file, char __user *buf, size_t size,
49 loff_t *pos)
50{
51 struct lockd_net *ln = net_generic(current->nsproxy->net_ns,
52 lockd_net_id);
53 char resp[3];
54
55 resp[0] = list_empty(&ln->lockd_manager.list) ? 'Y' : 'N';
56 resp[1] = '\n';
57 resp[2] = '\0';
58
59 return simple_read_from_buffer(buf, size, pos, resp, sizeof(resp));
60}
61
62static const struct file_operations lockd_end_grace_operations = {
63 .write = nlm_end_grace_write,
64 .read = nlm_end_grace_read,
65 .llseek = default_llseek,
66 .release = simple_transaction_release,
67 .owner = THIS_MODULE,
68};
69
70int __init
71lockd_create_procfs(void)
72{
73 struct proc_dir_entry *entry;
74
75 entry = proc_mkdir("fs/lockd", NULL);
76 if (!entry)
77 return -ENOMEM;
78 entry = proc_create("nlm_end_grace", S_IRUGO|S_IWUSR, entry,
79 &lockd_end_grace_operations);
80 if (!entry) {
81 remove_proc_entry("fs/lockd", NULL);
82 return -ENOMEM;
83 }
84 return 0;
85}
86
87void __exit
88lockd_remove_procfs(void)
89{
90 remove_proc_entry("fs/lockd/nlm_end_grace", NULL);
91 remove_proc_entry("fs/lockd", NULL);
92}
diff --git a/fs/lockd/procfs.h b/fs/lockd/procfs.h
new file mode 100644
index 000000000000..2257a1311027
--- /dev/null
+++ b/fs/lockd/procfs.h
@@ -0,0 +1,28 @@
1/*
2 * Procfs support for lockd
3 *
4 * Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com>
5 */
6#ifndef _LOCKD_PROCFS_H
7#define _LOCKD_PROCFS_H
8
9#include <linux/kconfig.h>
10
11#if IS_ENABLED(CONFIG_PROC_FS)
12int lockd_create_procfs(void);
13void lockd_remove_procfs(void);
14#else
15static inline int
16lockd_create_procfs(void)
17{
18 return 0;
19}
20
21static inline void
22lockd_remove_procfs(void)
23{
24 return;
25}
26#endif /* IS_ENABLED(CONFIG_PROC_FS) */
27
28#endif /* _LOCKD_PROCFS_H */
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index ec9e082f9ecd..d1bb7ecfd201 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -36,6 +36,7 @@
36#include <linux/nfs.h> 36#include <linux/nfs.h>
37 37
38#include "netns.h" 38#include "netns.h"
39#include "procfs.h"
39 40
40#define NLMDBG_FACILITY NLMDBG_SVC 41#define NLMDBG_FACILITY NLMDBG_SVC
41#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) 42#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE)
@@ -304,13 +305,16 @@ static int lockd_start_svc(struct svc_serv *serv)
304 svc_sock_update_bufs(serv); 305 svc_sock_update_bufs(serv);
305 serv->sv_maxconn = nlm_max_connections; 306 serv->sv_maxconn = nlm_max_connections;
306 307
307 nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name); 308 nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name);
308 if (IS_ERR(nlmsvc_task)) { 309 if (IS_ERR(nlmsvc_task)) {
309 error = PTR_ERR(nlmsvc_task); 310 error = PTR_ERR(nlmsvc_task);
310 printk(KERN_WARNING 311 printk(KERN_WARNING
311 "lockd_up: kthread_run failed, error=%d\n", error); 312 "lockd_up: kthread_run failed, error=%d\n", error);
312 goto out_task; 313 goto out_task;
313 } 314 }
315 nlmsvc_rqst->rq_task = nlmsvc_task;
316 wake_up_process(nlmsvc_task);
317
314 dprintk("lockd_up: service started\n"); 318 dprintk("lockd_up: service started\n");
315 return 0; 319 return 0;
316 320
@@ -581,7 +585,7 @@ static int lockd_init_net(struct net *net)
581 struct lockd_net *ln = net_generic(net, lockd_net_id); 585 struct lockd_net *ln = net_generic(net, lockd_net_id);
582 586
583 INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); 587 INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
584 INIT_LIST_HEAD(&ln->grace_list); 588 INIT_LIST_HEAD(&ln->lockd_manager.list);
585 spin_lock_init(&ln->nsm_clnt_lock); 589 spin_lock_init(&ln->nsm_clnt_lock);
586 return 0; 590 return 0;
587} 591}
@@ -615,8 +619,15 @@ static int __init init_nlm(void)
615 err = register_pernet_subsys(&lockd_net_ops); 619 err = register_pernet_subsys(&lockd_net_ops);
616 if (err) 620 if (err)
617 goto err_pernet; 621 goto err_pernet;
622
623 err = lockd_create_procfs();
624 if (err)
625 goto err_procfs;
626
618 return 0; 627 return 0;
619 628
629err_procfs:
630 unregister_pernet_subsys(&lockd_net_ops);
620err_pernet: 631err_pernet:
621#ifdef CONFIG_SYSCTL 632#ifdef CONFIG_SYSCTL
622 unregister_sysctl_table(nlm_sysctl_table); 633 unregister_sysctl_table(nlm_sysctl_table);
@@ -629,6 +640,7 @@ static void __exit exit_nlm(void)
629{ 640{
630 /* FIXME: delete all NLM clients */ 641 /* FIXME: delete all NLM clients */
631 nlm_shutdown_hosts(); 642 nlm_shutdown_hosts();
643 lockd_remove_procfs();
632 unregister_pernet_subsys(&lockd_net_ops); 644 unregister_pernet_subsys(&lockd_net_ops);
633#ifdef CONFIG_SYSCTL 645#ifdef CONFIG_SYSCTL
634 unregister_sysctl_table(nlm_sysctl_table); 646 unregister_sysctl_table(nlm_sysctl_table);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 54de482143cc..b8fb3a4ef649 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -235,7 +235,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
235 235
236 cb_info->serv = serv; 236 cb_info->serv = serv;
237 cb_info->rqst = rqstp; 237 cb_info->rqst = rqstp;
238 cb_info->task = kthread_run(callback_svc, cb_info->rqst, 238 cb_info->task = kthread_create(callback_svc, cb_info->rqst,
239 "nfsv4.%u-svc", minorversion); 239 "nfsv4.%u-svc", minorversion);
240 if (IS_ERR(cb_info->task)) { 240 if (IS_ERR(cb_info->task)) {
241 ret = PTR_ERR(cb_info->task); 241 ret = PTR_ERR(cb_info->task);
@@ -244,6 +244,8 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
244 cb_info->task = NULL; 244 cb_info->task = NULL;
245 return ret; 245 return ret;
246 } 246 }
247 rqstp->rq_task = cb_info->task;
248 wake_up_process(cb_info->task);
247 dprintk("nfs_callback_up: service started\n"); 249 dprintk("nfs_callback_up: service started\n");
248 return 0; 250 return 0;
249} 251}
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile
index f689ed82af3a..d153ca3ea577 100644
--- a/fs/nfs_common/Makefile
+++ b/fs/nfs_common/Makefile
@@ -3,5 +3,6 @@
3# 3#
4 4
5obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o 5obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
6
7nfs_acl-objs := nfsacl.o 6nfs_acl-objs := nfsacl.o
7
8obj-$(CONFIG_GRACE_PERIOD) += grace.o
diff --git a/fs/lockd/grace.c b/fs/nfs_common/grace.c
index 6d1ee7204c88..ae6e58ea4de5 100644
--- a/fs/lockd/grace.c
+++ b/fs/nfs_common/grace.c
@@ -1,17 +1,20 @@
1/* 1/*
2 * Common code for control of lockd and nfsv4 grace periods. 2 * Common code for control of lockd and nfsv4 grace periods.
3 *
4 * Transplanted from lockd code
3 */ 5 */
4 6
5#include <linux/module.h> 7#include <linux/module.h>
6#include <linux/lockd/bind.h>
7#include <net/net_namespace.h> 8#include <net/net_namespace.h>
9#include <net/netns/generic.h>
10#include <linux/fs.h>
8 11
9#include "netns.h" 12static int grace_net_id;
10
11static DEFINE_SPINLOCK(grace_lock); 13static DEFINE_SPINLOCK(grace_lock);
12 14
13/** 15/**
14 * locks_start_grace 16 * locks_start_grace
17 * @net: net namespace that this lock manager belongs to
15 * @lm: who this grace period is for 18 * @lm: who this grace period is for
16 * 19 *
17 * A grace period is a period during which locks should not be given 20 * A grace period is a period during which locks should not be given
@@ -21,18 +24,20 @@ static DEFINE_SPINLOCK(grace_lock);
21 * 24 *
22 * This function is called to start a grace period. 25 * This function is called to start a grace period.
23 */ 26 */
24void locks_start_grace(struct net *net, struct lock_manager *lm) 27void
28locks_start_grace(struct net *net, struct lock_manager *lm)
25{ 29{
26 struct lockd_net *ln = net_generic(net, lockd_net_id); 30 struct list_head *grace_list = net_generic(net, grace_net_id);
27 31
28 spin_lock(&grace_lock); 32 spin_lock(&grace_lock);
29 list_add(&lm->list, &ln->grace_list); 33 list_add(&lm->list, grace_list);
30 spin_unlock(&grace_lock); 34 spin_unlock(&grace_lock);
31} 35}
32EXPORT_SYMBOL_GPL(locks_start_grace); 36EXPORT_SYMBOL_GPL(locks_start_grace);
33 37
34/** 38/**
35 * locks_end_grace 39 * locks_end_grace
40 * @net: net namespace that this lock manager belongs to
36 * @lm: who this grace period is for 41 * @lm: who this grace period is for
37 * 42 *
38 * Call this function to state that the given lock manager is ready to 43 * Call this function to state that the given lock manager is ready to
@@ -41,7 +46,8 @@ EXPORT_SYMBOL_GPL(locks_start_grace);
41 * Note that callers count on it being safe to call this more than once, 46 * Note that callers count on it being safe to call this more than once,
42 * and the second call should be a no-op. 47 * and the second call should be a no-op.
43 */ 48 */
44void locks_end_grace(struct lock_manager *lm) 49void
50locks_end_grace(struct lock_manager *lm)
45{ 51{
46 spin_lock(&grace_lock); 52 spin_lock(&grace_lock);
47 list_del_init(&lm->list); 53 list_del_init(&lm->list);
@@ -56,10 +62,52 @@ EXPORT_SYMBOL_GPL(locks_end_grace);
56 * to answer ordinary lock requests, and when they should accept only 62 * to answer ordinary lock requests, and when they should accept only
57 * lock reclaims. 63 * lock reclaims.
58 */ 64 */
59int locks_in_grace(struct net *net) 65int
66locks_in_grace(struct net *net)
60{ 67{
61 struct lockd_net *ln = net_generic(net, lockd_net_id); 68 struct list_head *grace_list = net_generic(net, grace_net_id);
62 69
63 return !list_empty(&ln->grace_list); 70 return !list_empty(grace_list);
64} 71}
65EXPORT_SYMBOL_GPL(locks_in_grace); 72EXPORT_SYMBOL_GPL(locks_in_grace);
73
74static int __net_init
75grace_init_net(struct net *net)
76{
77 struct list_head *grace_list = net_generic(net, grace_net_id);
78
79 INIT_LIST_HEAD(grace_list);
80 return 0;
81}
82
83static void __net_exit
84grace_exit_net(struct net *net)
85{
86 struct list_head *grace_list = net_generic(net, grace_net_id);
87
88 BUG_ON(!list_empty(grace_list));
89}
90
91static struct pernet_operations grace_net_ops = {
92 .init = grace_init_net,
93 .exit = grace_exit_net,
94 .id = &grace_net_id,
95 .size = sizeof(struct list_head),
96};
97
98static int __init
99init_grace(void)
100{
101 return register_pernet_subsys(&grace_net_ops);
102}
103
104static void __exit
105exit_grace(void)
106{
107 unregister_pernet_subsys(&grace_net_ops);
108}
109
110MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>");
111MODULE_LICENSE("GPL");
112module_init(init_grace)
113module_exit(exit_grace)
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index f994e750e0d1..73395156bdb4 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -71,6 +71,7 @@ config NFSD_V4
71 select FS_POSIX_ACL 71 select FS_POSIX_ACL
72 select SUNRPC_GSS 72 select SUNRPC_GSS
73 select CRYPTO 73 select CRYPTO
74 select GRACE_PERIOD
74 help 75 help
75 This option enables support in your system's NFS server for 76 This option enables support in your system's NFS server for
76 version 4 of the NFS protocol (RFC 3530). 77 version 4 of the NFS protocol (RFC 3530).
@@ -94,9 +95,6 @@ config NFSD_V4_SECURITY_LABEL
94 If you do not wish to enable fine-grained security labels SELinux or 95 If you do not wish to enable fine-grained security labels SELinux or
95 Smack policies on NFSv4 files, say N. 96 Smack policies on NFSv4 files, say N.
96 97
97 WARNING: there is still a chance of backwards-incompatible protocol changes.
98 For now we recommend "Y" only for developers and testers.
99
100config NFSD_FAULT_INJECTION 98config NFSD_FAULT_INJECTION
101 bool "NFS server manual fault injection" 99 bool "NFS server manual fault injection"
102 depends on NFSD_V4 && DEBUG_KERNEL 100 depends on NFSD_V4 && DEBUG_KERNEL
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index b582f9ab6b2a..dd96a3830004 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -18,7 +18,6 @@
18 * is much larger than a sockaddr_in6. 18 * is much larger than a sockaddr_in6.
19 */ 19 */
20struct svc_cacherep { 20struct svc_cacherep {
21 struct hlist_node c_hash;
22 struct list_head c_lru; 21 struct list_head c_lru;
23 22
24 unsigned char c_state, /* unused, inprog, done */ 23 unsigned char c_state, /* unused, inprog, done */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 72ffd7cce3c3..30a739d896ff 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1145,6 +1145,7 @@ static struct flags {
1145 { NFSEXP_ALLSQUASH, {"all_squash", ""}}, 1145 { NFSEXP_ALLSQUASH, {"all_squash", ""}},
1146 { NFSEXP_ASYNC, {"async", "sync"}}, 1146 { NFSEXP_ASYNC, {"async", "sync"}},
1147 { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}}, 1147 { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
1148 { NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}},
1148 { NFSEXP_NOHIDE, {"nohide", ""}}, 1149 { NFSEXP_NOHIDE, {"nohide", ""}},
1149 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, 1150 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
1150 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, 1151 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index fa2525b2e9d7..12f2aab4f614 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -223,11 +223,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
223 newfhp = fh_init(&resp->fh, NFS3_FHSIZE); 223 newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
224 attr = &argp->attrs; 224 attr = &argp->attrs;
225 225
226 /* Get the directory inode */
227 nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
228 if (nfserr)
229 RETURN_STATUS(nfserr);
230
231 /* Unfudge the mode bits */ 226 /* Unfudge the mode bits */
232 attr->ia_mode &= ~S_IFMT; 227 attr->ia_mode &= ~S_IFMT;
233 if (!(attr->ia_valid & ATTR_MODE)) { 228 if (!(attr->ia_valid & ATTR_MODE)) {
@@ -471,6 +466,14 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
471 resp->buflen = resp->count; 466 resp->buflen = resp->count;
472 resp->rqstp = rqstp; 467 resp->rqstp = rqstp;
473 offset = argp->cookie; 468 offset = argp->cookie;
469
470 nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP);
471 if (nfserr)
472 RETURN_STATUS(nfserr);
473
474 if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS)
475 RETURN_STATUS(nfserr_notsupp);
476
474 nfserr = nfsd_readdir(rqstp, &resp->fh, 477 nfserr = nfsd_readdir(rqstp, &resp->fh,
475 &offset, 478 &offset,
476 &resp->common, 479 &resp->common,
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index e0be57b0f79b..ed2b1151b171 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -49,12 +49,6 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
49 49
50/* Index of predefined Linux callback client operations */ 50/* Index of predefined Linux callback client operations */
51 51
52enum {
53 NFSPROC4_CLNT_CB_NULL = 0,
54 NFSPROC4_CLNT_CB_RECALL,
55 NFSPROC4_CLNT_CB_SEQUENCE,
56};
57
58struct nfs4_cb_compound_hdr { 52struct nfs4_cb_compound_hdr {
59 /* args */ 53 /* args */
60 u32 ident; /* minorversion 0 only */ 54 u32 ident; /* minorversion 0 only */
@@ -494,7 +488,7 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
494static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, 488static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
495 const struct nfsd4_callback *cb) 489 const struct nfsd4_callback *cb)
496{ 490{
497 const struct nfs4_delegation *args = cb->cb_op; 491 const struct nfs4_delegation *dp = cb_to_delegation(cb);
498 struct nfs4_cb_compound_hdr hdr = { 492 struct nfs4_cb_compound_hdr hdr = {
499 .ident = cb->cb_clp->cl_cb_ident, 493 .ident = cb->cb_clp->cl_cb_ident,
500 .minorversion = cb->cb_minorversion, 494 .minorversion = cb->cb_minorversion,
@@ -502,7 +496,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
502 496
503 encode_cb_compound4args(xdr, &hdr); 497 encode_cb_compound4args(xdr, &hdr);
504 encode_cb_sequence4args(xdr, cb, &hdr); 498 encode_cb_sequence4args(xdr, cb, &hdr);
505 encode_cb_recall4args(xdr, args, &hdr); 499 encode_cb_recall4args(xdr, dp, &hdr);
506 encode_cb_nops(&hdr); 500 encode_cb_nops(&hdr);
507} 501}
508 502
@@ -746,27 +740,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
746 740
747static struct workqueue_struct *callback_wq; 741static struct workqueue_struct *callback_wq;
748 742
749static void run_nfsd4_cb(struct nfsd4_callback *cb)
750{
751 queue_work(callback_wq, &cb->cb_work);
752}
753
754static void do_probe_callback(struct nfs4_client *clp)
755{
756 struct nfsd4_callback *cb = &clp->cl_cb_null;
757
758 cb->cb_op = NULL;
759 cb->cb_clp = clp;
760
761 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
762 cb->cb_msg.rpc_argp = NULL;
763 cb->cb_msg.rpc_resp = NULL;
764
765 cb->cb_ops = &nfsd4_cb_probe_ops;
766
767 run_nfsd4_cb(cb);
768}
769
770/* 743/*
771 * Poke the callback thread to process any updates to the callback 744 * Poke the callback thread to process any updates to the callback
772 * parameters, and send a null probe. 745 * parameters, and send a null probe.
@@ -775,7 +748,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp)
775{ 748{
776 clp->cl_cb_state = NFSD4_CB_UNKNOWN; 749 clp->cl_cb_state = NFSD4_CB_UNKNOWN;
777 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); 750 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
778 do_probe_callback(clp); 751 nfsd4_run_cb(&clp->cl_cb_null);
779} 752}
780 753
781void nfsd4_probe_callback_sync(struct nfs4_client *clp) 754void nfsd4_probe_callback_sync(struct nfs4_client *clp)
@@ -847,23 +820,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
847 rpc_wake_up_next(&clp->cl_cb_waitq); 820 rpc_wake_up_next(&clp->cl_cb_waitq);
848 dprintk("%s: freed slot, new seqid=%d\n", __func__, 821 dprintk("%s: freed slot, new seqid=%d\n", __func__,
849 clp->cl_cb_session->se_cb_seq_nr); 822 clp->cl_cb_session->se_cb_seq_nr);
850
851 /* We're done looking into the sequence information */
852 task->tk_msg.rpc_resp = NULL;
853 } 823 }
854}
855
856
857static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
858{
859 struct nfsd4_callback *cb = calldata;
860 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
861 struct nfs4_client *clp = cb->cb_clp;
862 struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
863
864 nfsd4_cb_done(task, calldata);
865 824
866 if (current_rpc_client != task->tk_client) { 825 if (clp->cl_cb_client != task->tk_client) {
867 /* We're shutting down or changing cl_cb_client; leave 826 /* We're shutting down or changing cl_cb_client; leave
868 * it to nfsd4_process_cb_update to restart the call if 827 * it to nfsd4_process_cb_update to restart the call if
869 * necessary. */ 828 * necessary. */
@@ -872,47 +831,42 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
872 831
873 if (cb->cb_done) 832 if (cb->cb_done)
874 return; 833 return;
875 switch (task->tk_status) { 834
835 switch (cb->cb_ops->done(cb, task)) {
876 case 0: 836 case 0:
877 cb->cb_done = true; 837 task->tk_status = 0;
838 rpc_restart_call_prepare(task);
878 return; 839 return;
879 case -EBADHANDLE: 840 case 1:
880 case -NFS4ERR_BAD_STATEID:
881 /* Race: client probably got cb_recall
882 * before open reply granting delegation */
883 break; 841 break;
884 default: 842 case -1:
885 /* Network partition? */ 843 /* Network partition? */
886 nfsd4_mark_cb_down(clp, task->tk_status); 844 nfsd4_mark_cb_down(clp, task->tk_status);
845 break;
846 default:
847 BUG();
887 } 848 }
888 if (dp->dl_retries--) {
889 rpc_delay(task, 2*HZ);
890 task->tk_status = 0;
891 rpc_restart_call_prepare(task);
892 return;
893 }
894 nfsd4_mark_cb_down(clp, task->tk_status);
895 cb->cb_done = true; 849 cb->cb_done = true;
896} 850}
897 851
898static void nfsd4_cb_recall_release(void *calldata) 852static void nfsd4_cb_release(void *calldata)
899{ 853{
900 struct nfsd4_callback *cb = calldata; 854 struct nfsd4_callback *cb = calldata;
901 struct nfs4_client *clp = cb->cb_clp; 855 struct nfs4_client *clp = cb->cb_clp;
902 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
903 856
904 if (cb->cb_done) { 857 if (cb->cb_done) {
905 spin_lock(&clp->cl_lock); 858 spin_lock(&clp->cl_lock);
906 list_del(&cb->cb_per_client); 859 list_del(&cb->cb_per_client);
907 spin_unlock(&clp->cl_lock); 860 spin_unlock(&clp->cl_lock);
908 nfs4_put_stid(&dp->dl_stid); 861
862 cb->cb_ops->release(cb);
909 } 863 }
910} 864}
911 865
912static const struct rpc_call_ops nfsd4_cb_recall_ops = { 866static const struct rpc_call_ops nfsd4_cb_ops = {
913 .rpc_call_prepare = nfsd4_cb_prepare, 867 .rpc_call_prepare = nfsd4_cb_prepare,
914 .rpc_call_done = nfsd4_cb_recall_done, 868 .rpc_call_done = nfsd4_cb_done,
915 .rpc_release = nfsd4_cb_recall_release, 869 .rpc_release = nfsd4_cb_release,
916}; 870};
917 871
918int nfsd4_create_callback_queue(void) 872int nfsd4_create_callback_queue(void)
@@ -937,16 +891,10 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
937 * instead, nfsd4_run_cb_null() will detect the killed 891 * instead, nfsd4_run_cb_null() will detect the killed
938 * client, destroy the rpc client, and stop: 892 * client, destroy the rpc client, and stop:
939 */ 893 */
940 do_probe_callback(clp); 894 nfsd4_run_cb(&clp->cl_cb_null);
941 flush_workqueue(callback_wq); 895 flush_workqueue(callback_wq);
942} 896}
943 897
944static void nfsd4_release_cb(struct nfsd4_callback *cb)
945{
946 if (cb->cb_ops->rpc_release)
947 cb->cb_ops->rpc_release(cb);
948}
949
950/* requires cl_lock: */ 898/* requires cl_lock: */
951static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) 899static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
952{ 900{
@@ -1009,63 +957,49 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
1009 } 957 }
1010 /* Yay, the callback channel's back! Restart any callbacks: */ 958 /* Yay, the callback channel's back! Restart any callbacks: */
1011 list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) 959 list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
1012 run_nfsd4_cb(cb); 960 queue_work(callback_wq, &cb->cb_work);
1013} 961}
1014 962
1015static void 963static void
1016nfsd4_run_callback_rpc(struct nfsd4_callback *cb) 964nfsd4_run_cb_work(struct work_struct *work)
1017{ 965{
966 struct nfsd4_callback *cb =
967 container_of(work, struct nfsd4_callback, cb_work);
1018 struct nfs4_client *clp = cb->cb_clp; 968 struct nfs4_client *clp = cb->cb_clp;
1019 struct rpc_clnt *clnt; 969 struct rpc_clnt *clnt;
1020 970
971 if (cb->cb_ops && cb->cb_ops->prepare)
972 cb->cb_ops->prepare(cb);
973
1021 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) 974 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
1022 nfsd4_process_cb_update(cb); 975 nfsd4_process_cb_update(cb);
1023 976
1024 clnt = clp->cl_cb_client; 977 clnt = clp->cl_cb_client;
1025 if (!clnt) { 978 if (!clnt) {
1026 /* Callback channel broken, or client killed; give up: */ 979 /* Callback channel broken, or client killed; give up: */
1027 nfsd4_release_cb(cb); 980 if (cb->cb_ops && cb->cb_ops->release)
981 cb->cb_ops->release(cb);
1028 return; 982 return;
1029 } 983 }
1030 cb->cb_msg.rpc_cred = clp->cl_cb_cred; 984 cb->cb_msg.rpc_cred = clp->cl_cb_cred;
1031 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 985 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
1032 cb->cb_ops, cb); 986 cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
1033} 987}
1034 988
1035void 989void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
1036nfsd4_run_cb_null(struct work_struct *w) 990 struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op)
1037{ 991{
1038 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
1039 cb_work);
1040 nfsd4_run_callback_rpc(cb);
1041}
1042
1043void
1044nfsd4_run_cb_recall(struct work_struct *w)
1045{
1046 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
1047 cb_work);
1048
1049 nfsd4_prepare_cb_recall(cb->cb_op);
1050 nfsd4_run_callback_rpc(cb);
1051}
1052
1053void nfsd4_cb_recall(struct nfs4_delegation *dp)
1054{
1055 struct nfsd4_callback *cb = &dp->dl_recall;
1056 struct nfs4_client *clp = dp->dl_stid.sc_client;
1057
1058 dp->dl_retries = 1;
1059 cb->cb_op = dp;
1060 cb->cb_clp = clp; 992 cb->cb_clp = clp;
1061 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; 993 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
1062 cb->cb_msg.rpc_argp = cb; 994 cb->cb_msg.rpc_argp = cb;
1063 cb->cb_msg.rpc_resp = cb; 995 cb->cb_msg.rpc_resp = cb;
1064 996 cb->cb_ops = ops;
1065 cb->cb_ops = &nfsd4_cb_recall_ops; 997 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
1066
1067 INIT_LIST_HEAD(&cb->cb_per_client); 998 INIT_LIST_HEAD(&cb->cb_per_client);
1068 cb->cb_done = true; 999 cb->cb_done = true;
1000}
1069 1001
1070 run_nfsd4_cb(&dp->dl_recall); 1002void nfsd4_run_cb(struct nfsd4_callback *cb)
1003{
1004 queue_work(callback_wq, &cb->cb_work);
1071} 1005}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index a0ab0a847d69..e1b3d3d472da 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -215,7 +215,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
215 memset(&ent, 0, sizeof(ent)); 215 memset(&ent, 0, sizeof(ent));
216 216
217 /* Authentication name */ 217 /* Authentication name */
218 if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) 218 len = qword_get(&buf, buf1, PAGE_SIZE);
219 if (len <= 0 || len >= IDMAP_NAMESZ)
219 goto out; 220 goto out;
220 memcpy(ent.authname, buf1, sizeof(ent.authname)); 221 memcpy(ent.authname, buf1, sizeof(ent.authname));
221 222
@@ -245,12 +246,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
245 /* Name */ 246 /* Name */
246 error = -EINVAL; 247 error = -EINVAL;
247 len = qword_get(&buf, buf1, PAGE_SIZE); 248 len = qword_get(&buf, buf1, PAGE_SIZE);
248 if (len < 0) 249 if (len < 0 || len >= IDMAP_NAMESZ)
249 goto out; 250 goto out;
250 if (len == 0) 251 if (len == 0)
251 set_bit(CACHE_NEGATIVE, &ent.h.flags); 252 set_bit(CACHE_NEGATIVE, &ent.h.flags);
252 else if (len >= IDMAP_NAMESZ)
253 goto out;
254 else 253 else
255 memcpy(ent.name, buf1, sizeof(ent.name)); 254 memcpy(ent.name, buf1, sizeof(ent.name));
256 error = -ENOMEM; 255 error = -ENOMEM;
@@ -259,15 +258,12 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
259 goto out; 258 goto out;
260 259
261 cache_put(&res->h, cd); 260 cache_put(&res->h, cd);
262
263 error = 0; 261 error = 0;
264out: 262out:
265 kfree(buf1); 263 kfree(buf1);
266
267 return error; 264 return error;
268} 265}
269 266
270
271static struct ent * 267static struct ent *
272idtoname_lookup(struct cache_detail *cd, struct ent *item) 268idtoname_lookup(struct cache_detail *cd, struct ent *item)
273{ 269{
@@ -368,7 +364,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
368{ 364{
369 struct ent ent, *res; 365 struct ent ent, *res;
370 char *buf1; 366 char *buf1;
371 int error = -EINVAL; 367 int len, error = -EINVAL;
372 368
373 if (buf[buflen - 1] != '\n') 369 if (buf[buflen - 1] != '\n')
374 return (-EINVAL); 370 return (-EINVAL);
@@ -381,7 +377,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
381 memset(&ent, 0, sizeof(ent)); 377 memset(&ent, 0, sizeof(ent));
382 378
383 /* Authentication name */ 379 /* Authentication name */
384 if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) 380 len = qword_get(&buf, buf1, PAGE_SIZE);
381 if (len <= 0 || len >= IDMAP_NAMESZ)
385 goto out; 382 goto out;
386 memcpy(ent.authname, buf1, sizeof(ent.authname)); 383 memcpy(ent.authname, buf1, sizeof(ent.authname));
387 384
@@ -392,8 +389,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
392 IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; 389 IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
393 390
394 /* Name */ 391 /* Name */
395 error = qword_get(&buf, buf1, PAGE_SIZE); 392 len = qword_get(&buf, buf1, PAGE_SIZE);
396 if (error <= 0 || error >= IDMAP_NAMESZ) 393 if (len <= 0 || len >= IDMAP_NAMESZ)
397 goto out; 394 goto out;
398 memcpy(ent.name, buf1, sizeof(ent.name)); 395 memcpy(ent.name, buf1, sizeof(ent.name));
399 396
@@ -421,7 +418,6 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
421 error = 0; 418 error = 0;
422out: 419out:
423 kfree(buf1); 420 kfree(buf1);
424
425 return (error); 421 return (error);
426} 422}
427 423
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5e0dc528a0e8..cdeb3cfd6f32 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1013,6 +1013,49 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1013 return status; 1013 return status;
1014} 1014}
1015 1015
1016static __be32
1017nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1018 struct nfsd4_seek *seek)
1019{
1020 int whence;
1021 __be32 status;
1022 struct file *file;
1023
1024 status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
1025 &seek->seek_stateid,
1026 RD_STATE, &file);
1027 if (status) {
1028 dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
1029 return status;
1030 }
1031
1032 switch (seek->seek_whence) {
1033 case NFS4_CONTENT_DATA:
1034 whence = SEEK_DATA;
1035 break;
1036 case NFS4_CONTENT_HOLE:
1037 whence = SEEK_HOLE;
1038 break;
1039 default:
1040 status = nfserr_union_notsupp;
1041 goto out;
1042 }
1043
1044 /*
1045 * Note: This call does change file->f_pos, but nothing in NFSD
1046 * should ever file->f_pos.
1047 */
1048 seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
1049 if (seek->seek_pos < 0)
1050 status = nfserrno(seek->seek_pos);
1051 else if (seek->seek_pos >= i_size_read(file_inode(file)))
1052 seek->seek_eof = true;
1053
1054out:
1055 fput(file);
1056 return status;
1057}
1058
1016/* This routine never returns NFS_OK! If there are no other errors, it 1059/* This routine never returns NFS_OK! If there are no other errors, it
1017 * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the 1060 * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
1018 * attributes matched. VERIFY is implemented by mapping NFSERR_SAME 1061 * attributes matched. VERIFY is implemented by mapping NFSERR_SAME
@@ -1881,6 +1924,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
1881 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, 1924 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
1882 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 1925 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1883 }, 1926 },
1927
1928 /* NFSv4.2 operations */
1929 [OP_SEEK] = {
1930 .op_func = (nfsd4op_func)nfsd4_seek,
1931 .op_name = "OP_SEEK",
1932 },
1884}; 1933};
1885 1934
1886int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) 1935int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 9c271f42604a..ea95a2bc21b5 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -58,7 +58,7 @@ struct nfsd4_client_tracking_ops {
58 void (*create)(struct nfs4_client *); 58 void (*create)(struct nfs4_client *);
59 void (*remove)(struct nfs4_client *); 59 void (*remove)(struct nfs4_client *);
60 int (*check)(struct nfs4_client *); 60 int (*check)(struct nfs4_client *);
61 void (*grace_done)(struct nfsd_net *, time_t); 61 void (*grace_done)(struct nfsd_net *);
62}; 62};
63 63
64/* Globals */ 64/* Globals */
@@ -188,7 +188,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
188 188
189 status = mnt_want_write_file(nn->rec_file); 189 status = mnt_want_write_file(nn->rec_file);
190 if (status) 190 if (status)
191 return; 191 goto out_creds;
192 192
193 dir = nn->rec_file->f_path.dentry; 193 dir = nn->rec_file->f_path.dentry;
194 /* lock the parent */ 194 /* lock the parent */
@@ -228,6 +228,7 @@ out_unlock:
228 user_recovery_dirname); 228 user_recovery_dirname);
229 } 229 }
230 mnt_drop_write_file(nn->rec_file); 230 mnt_drop_write_file(nn->rec_file);
231out_creds:
231 nfs4_reset_creds(original_cred); 232 nfs4_reset_creds(original_cred);
232} 233}
233 234
@@ -392,7 +393,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
392} 393}
393 394
394static void 395static void
395nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) 396nfsd4_recdir_purge_old(struct nfsd_net *nn)
396{ 397{
397 int status; 398 int status;
398 399
@@ -479,6 +480,16 @@ nfsd4_init_recdir(struct net *net)
479 return status; 480 return status;
480} 481}
481 482
483static void
484nfsd4_shutdown_recdir(struct net *net)
485{
486 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
487
488 if (!nn->rec_file)
489 return;
490 fput(nn->rec_file);
491 nn->rec_file = NULL;
492}
482 493
483static int 494static int
484nfs4_legacy_state_init(struct net *net) 495nfs4_legacy_state_init(struct net *net)
@@ -512,10 +523,13 @@ nfsd4_load_reboot_recovery_data(struct net *net)
512 int status; 523 int status;
513 524
514 status = nfsd4_init_recdir(net); 525 status = nfsd4_init_recdir(net);
515 if (!status)
516 status = nfsd4_recdir_load(net);
517 if (status) 526 if (status)
518 printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); 527 return status;
528
529 status = nfsd4_recdir_load(net);
530 if (status)
531 nfsd4_shutdown_recdir(net);
532
519 return status; 533 return status;
520} 534}
521 535
@@ -546,21 +560,12 @@ err:
546} 560}
547 561
548static void 562static void
549nfsd4_shutdown_recdir(struct nfsd_net *nn)
550{
551 if (!nn->rec_file)
552 return;
553 fput(nn->rec_file);
554 nn->rec_file = NULL;
555}
556
557static void
558nfsd4_legacy_tracking_exit(struct net *net) 563nfsd4_legacy_tracking_exit(struct net *net)
559{ 564{
560 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 565 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
561 566
562 nfs4_release_reclaim(nn); 567 nfs4_release_reclaim(nn);
563 nfsd4_shutdown_recdir(nn); 568 nfsd4_shutdown_recdir(net);
564 nfs4_legacy_state_shutdown(net); 569 nfs4_legacy_state_shutdown(net);
565} 570}
566 571
@@ -1016,7 +1021,7 @@ nfsd4_cld_check(struct nfs4_client *clp)
1016} 1021}
1017 1022
1018static void 1023static void
1019nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) 1024nfsd4_cld_grace_done(struct nfsd_net *nn)
1020{ 1025{
1021 int ret; 1026 int ret;
1022 struct cld_upcall *cup; 1027 struct cld_upcall *cup;
@@ -1029,7 +1034,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time)
1029 } 1034 }
1030 1035
1031 cup->cu_msg.cm_cmd = Cld_GraceDone; 1036 cup->cu_msg.cm_cmd = Cld_GraceDone;
1032 cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time; 1037 cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
1033 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); 1038 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1034 if (!ret) 1039 if (!ret)
1035 ret = cup->cu_msg.cm_status; 1040 ret = cup->cu_msg.cm_status;
@@ -1062,6 +1067,8 @@ MODULE_PARM_DESC(cltrack_legacy_disable,
1062 1067
1063#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" 1068#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR="
1064#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" 1069#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR="
1070#define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION="
1071#define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START="
1065 1072
1066static char * 1073static char *
1067nfsd4_cltrack_legacy_topdir(void) 1074nfsd4_cltrack_legacy_topdir(void)
@@ -1126,10 +1133,60 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
1126 return result; 1133 return result;
1127} 1134}
1128 1135
1136static char *
1137nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
1138{
1139 int copied;
1140 size_t len;
1141 char *result;
1142
1143 /* prefix + Y/N character + terminating NULL */
1144 len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1;
1145
1146 result = kmalloc(len, GFP_KERNEL);
1147 if (!result)
1148 return result;
1149
1150 copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c",
1151 clp->cl_minorversion ? 'Y' : 'N');
1152 if (copied >= len) {
1153 /* just return nothing if output was truncated */
1154 kfree(result);
1155 return NULL;
1156 }
1157
1158 return result;
1159}
1160
1161static char *
1162nfsd4_cltrack_grace_start(time_t grace_start)
1163{
1164 int copied;
1165 size_t len;
1166 char *result;
1167
1168 /* prefix + max width of int64_t string + terminating NULL */
1169 len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1;
1170
1171 result = kmalloc(len, GFP_KERNEL);
1172 if (!result)
1173 return result;
1174
1175 copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld",
1176 grace_start);
1177 if (copied >= len) {
1178 /* just return nothing if output was truncated */
1179 kfree(result);
1180 return NULL;
1181 }
1182
1183 return result;
1184}
1185
1129static int 1186static int
1130nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) 1187nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1)
1131{ 1188{
1132 char *envp[2]; 1189 char *envp[3];
1133 char *argv[4]; 1190 char *argv[4];
1134 int ret; 1191 int ret;
1135 1192
@@ -1140,10 +1197,12 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy)
1140 1197
1141 dprintk("%s: cmd: %s\n", __func__, cmd); 1198 dprintk("%s: cmd: %s\n", __func__, cmd);
1142 dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); 1199 dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)");
1143 dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); 1200 dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)");
1201 dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)");
1144 1202
1145 envp[0] = legacy; 1203 envp[0] = env0;
1146 envp[1] = NULL; 1204 envp[1] = env1;
1205 envp[2] = NULL;
1147 1206
1148 argv[0] = (char *)cltrack_prog; 1207 argv[0] = (char *)cltrack_prog;
1149 argv[1] = cmd; 1208 argv[1] = cmd;
@@ -1187,28 +1246,78 @@ bin_to_hex_dup(const unsigned char *src, int srclen)
1187} 1246}
1188 1247
1189static int 1248static int
1190nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) 1249nfsd4_umh_cltrack_init(struct net *net)
1191{ 1250{
1251 int ret;
1252 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1253 char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
1254
1192 /* XXX: The usermode helper s not working in container yet. */ 1255 /* XXX: The usermode helper s not working in container yet. */
1193 if (net != &init_net) { 1256 if (net != &init_net) {
1194 WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " 1257 WARN(1, KERN_ERR "NFSD: attempt to initialize umh client "
1195 "tracking in a container!\n"); 1258 "tracking in a container!\n");
1196 return -EINVAL; 1259 return -EINVAL;
1197 } 1260 }
1198 return nfsd4_umh_cltrack_upcall("init", NULL, NULL); 1261
1262 ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
1263 kfree(grace_start);
1264 return ret;
1265}
1266
1267static void
1268nfsd4_cltrack_upcall_lock(struct nfs4_client *clp)
1269{
1270 wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK,
1271 TASK_UNINTERRUPTIBLE);
1272}
1273
1274static void
1275nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp)
1276{
1277 smp_mb__before_atomic();
1278 clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
1279 smp_mb__after_atomic();
1280 wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK);
1199} 1281}
1200 1282
1201static void 1283static void
1202nfsd4_umh_cltrack_create(struct nfs4_client *clp) 1284nfsd4_umh_cltrack_create(struct nfs4_client *clp)
1203{ 1285{
1204 char *hexid; 1286 char *hexid, *has_session, *grace_start;
1287 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1288
1289 /*
1290 * With v4.0 clients, there's little difference in outcome between a
1291 * create and check operation, and we can end up calling into this
1292 * function multiple times per client (once for each openowner). So,
1293 * for v4.0 clients skip upcalling once the client has been recorded
1294 * on stable storage.
1295 *
1296 * For v4.1+ clients, the outcome of the two operations is different,
1297 * so we must ensure that we upcall for the create operation. v4.1+
1298 * clients call this on RECLAIM_COMPLETE though, so we should only end
1299 * up doing a single create upcall per client.
1300 */
1301 if (clp->cl_minorversion == 0 &&
1302 test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1303 return;
1205 1304
1206 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); 1305 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1207 if (!hexid) { 1306 if (!hexid) {
1208 dprintk("%s: can't allocate memory for upcall!\n", __func__); 1307 dprintk("%s: can't allocate memory for upcall!\n", __func__);
1209 return; 1308 return;
1210 } 1309 }
1211 nfsd4_umh_cltrack_upcall("create", hexid, NULL); 1310
1311 has_session = nfsd4_cltrack_client_has_session(clp);
1312 grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
1313
1314 nfsd4_cltrack_upcall_lock(clp);
1315 if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start))
1316 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1317 nfsd4_cltrack_upcall_unlock(clp);
1318
1319 kfree(has_session);
1320 kfree(grace_start);
1212 kfree(hexid); 1321 kfree(hexid);
1213} 1322}
1214 1323
@@ -1217,12 +1326,21 @@ nfsd4_umh_cltrack_remove(struct nfs4_client *clp)
1217{ 1326{
1218 char *hexid; 1327 char *hexid;
1219 1328
1329 if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1330 return;
1331
1220 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); 1332 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1221 if (!hexid) { 1333 if (!hexid) {
1222 dprintk("%s: can't allocate memory for upcall!\n", __func__); 1334 dprintk("%s: can't allocate memory for upcall!\n", __func__);
1223 return; 1335 return;
1224 } 1336 }
1225 nfsd4_umh_cltrack_upcall("remove", hexid, NULL); 1337
1338 nfsd4_cltrack_upcall_lock(clp);
1339 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) &&
1340 nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0)
1341 clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1342 nfsd4_cltrack_upcall_unlock(clp);
1343
1226 kfree(hexid); 1344 kfree(hexid);
1227} 1345}
1228 1346
@@ -1230,30 +1348,45 @@ static int
1230nfsd4_umh_cltrack_check(struct nfs4_client *clp) 1348nfsd4_umh_cltrack_check(struct nfs4_client *clp)
1231{ 1349{
1232 int ret; 1350 int ret;
1233 char *hexid, *legacy; 1351 char *hexid, *has_session, *legacy;
1352
1353 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1354 return 0;
1234 1355
1235 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); 1356 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1236 if (!hexid) { 1357 if (!hexid) {
1237 dprintk("%s: can't allocate memory for upcall!\n", __func__); 1358 dprintk("%s: can't allocate memory for upcall!\n", __func__);
1238 return -ENOMEM; 1359 return -ENOMEM;
1239 } 1360 }
1361
1362 has_session = nfsd4_cltrack_client_has_session(clp);
1240 legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); 1363 legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name);
1241 ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy); 1364
1365 nfsd4_cltrack_upcall_lock(clp);
1366 if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) {
1367 ret = 0;
1368 } else {
1369 ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy);
1370 if (ret == 0)
1371 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1372 }
1373 nfsd4_cltrack_upcall_unlock(clp);
1374 kfree(has_session);
1242 kfree(legacy); 1375 kfree(legacy);
1243 kfree(hexid); 1376 kfree(hexid);
1377
1244 return ret; 1378 return ret;
1245} 1379}
1246 1380
1247static void 1381static void
1248nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, 1382nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
1249 time_t boot_time)
1250{ 1383{
1251 char *legacy; 1384 char *legacy;
1252 char timestr[22]; /* FIXME: better way to determine max size? */ 1385 char timestr[22]; /* FIXME: better way to determine max size? */
1253 1386
1254 sprintf(timestr, "%ld", boot_time); 1387 sprintf(timestr, "%ld", nn->boot_time);
1255 legacy = nfsd4_cltrack_legacy_topdir(); 1388 legacy = nfsd4_cltrack_legacy_topdir();
1256 nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); 1389 nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
1257 kfree(legacy); 1390 kfree(legacy);
1258} 1391}
1259 1392
@@ -1356,10 +1489,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
1356} 1489}
1357 1490
1358void 1491void
1359nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) 1492nfsd4_record_grace_done(struct nfsd_net *nn)
1360{ 1493{
1361 if (nn->client_tracking_ops) 1494 if (nn->client_tracking_ops)
1362 nn->client_tracking_ops->grace_done(nn, boot_time); 1495 nn->client_tracking_ops->grace_done(nn);
1363} 1496}
1364 1497
1365static int 1498static int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2e80a59e7e91..5c0cac173068 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -96,6 +96,8 @@ static struct kmem_cache *deleg_slab;
96 96
97static void free_session(struct nfsd4_session *); 97static void free_session(struct nfsd4_session *);
98 98
99static struct nfsd4_callback_ops nfsd4_cb_recall_ops;
100
99static bool is_session_dead(struct nfsd4_session *ses) 101static bool is_session_dead(struct nfsd4_session *ses)
100{ 102{
101 return ses->se_flags & NFS4_SESSION_DEAD; 103 return ses->se_flags & NFS4_SESSION_DEAD;
@@ -645,7 +647,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
645 INIT_LIST_HEAD(&dp->dl_perclnt); 647 INIT_LIST_HEAD(&dp->dl_perclnt);
646 INIT_LIST_HEAD(&dp->dl_recall_lru); 648 INIT_LIST_HEAD(&dp->dl_recall_lru);
647 dp->dl_type = NFS4_OPEN_DELEGATE_READ; 649 dp->dl_type = NFS4_OPEN_DELEGATE_READ;
648 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); 650 dp->dl_retries = 1;
651 nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
652 &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
649 return dp; 653 return dp;
650out_dec: 654out_dec:
651 atomic_long_dec(&num_delegations); 655 atomic_long_dec(&num_delegations);
@@ -673,15 +677,20 @@ nfs4_put_stid(struct nfs4_stid *s)
673 677
674static void nfs4_put_deleg_lease(struct nfs4_file *fp) 678static void nfs4_put_deleg_lease(struct nfs4_file *fp)
675{ 679{
676 lockdep_assert_held(&state_lock); 680 struct file *filp = NULL;
681 struct file_lock *fl;
677 682
678 if (!fp->fi_lease) 683 spin_lock(&fp->fi_lock);
679 return; 684 if (fp->fi_lease && atomic_dec_and_test(&fp->fi_delegees)) {
680 if (atomic_dec_and_test(&fp->fi_delegees)) { 685 swap(filp, fp->fi_deleg_file);
681 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); 686 fl = fp->fi_lease;
682 fp->fi_lease = NULL; 687 fp->fi_lease = NULL;
683 fput(fp->fi_deleg_file); 688 }
684 fp->fi_deleg_file = NULL; 689 spin_unlock(&fp->fi_lock);
690
691 if (filp) {
692 vfs_setlease(filp, F_UNLCK, &fl);
693 fput(filp);
685 } 694 }
686} 695}
687 696
@@ -717,8 +726,6 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
717 list_del_init(&dp->dl_recall_lru); 726 list_del_init(&dp->dl_recall_lru);
718 list_del_init(&dp->dl_perfile); 727 list_del_init(&dp->dl_perfile);
719 spin_unlock(&fp->fi_lock); 728 spin_unlock(&fp->fi_lock);
720 if (fp)
721 nfs4_put_deleg_lease(fp);
722} 729}
723 730
724static void destroy_delegation(struct nfs4_delegation *dp) 731static void destroy_delegation(struct nfs4_delegation *dp)
@@ -726,6 +733,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
726 spin_lock(&state_lock); 733 spin_lock(&state_lock);
727 unhash_delegation_locked(dp); 734 unhash_delegation_locked(dp);
728 spin_unlock(&state_lock); 735 spin_unlock(&state_lock);
736 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
729 nfs4_put_stid(&dp->dl_stid); 737 nfs4_put_stid(&dp->dl_stid);
730} 738}
731 739
@@ -735,6 +743,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
735 743
736 WARN_ON(!list_empty(&dp->dl_recall_lru)); 744 WARN_ON(!list_empty(&dp->dl_recall_lru));
737 745
746 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
747
738 if (clp->cl_minorversion == 0) 748 if (clp->cl_minorversion == 0)
739 nfs4_put_stid(&dp->dl_stid); 749 nfs4_put_stid(&dp->dl_stid);
740 else { 750 else {
@@ -1635,6 +1645,7 @@ __destroy_client(struct nfs4_client *clp)
1635 while (!list_empty(&reaplist)) { 1645 while (!list_empty(&reaplist)) {
1636 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1646 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1637 list_del_init(&dp->dl_recall_lru); 1647 list_del_init(&dp->dl_recall_lru);
1648 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
1638 nfs4_put_stid(&dp->dl_stid); 1649 nfs4_put_stid(&dp->dl_stid);
1639 } 1650 }
1640 while (!list_empty(&clp->cl_revoked)) { 1651 while (!list_empty(&clp->cl_revoked)) {
@@ -1862,7 +1873,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
1862 free_client(clp); 1873 free_client(clp);
1863 return NULL; 1874 return NULL;
1864 } 1875 }
1865 INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); 1876 nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
1866 clp->cl_time = get_seconds(); 1877 clp->cl_time = get_seconds();
1867 clear_bit(0, &clp->cl_cb_slot_busy); 1878 clear_bit(0, &clp->cl_cb_slot_busy);
1868 copy_verf(clp, verf); 1879 copy_verf(clp, verf);
@@ -3349,8 +3360,9 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
3349 return ret; 3360 return ret;
3350} 3361}
3351 3362
3352void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) 3363static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
3353{ 3364{
3365 struct nfs4_delegation *dp = cb_to_delegation(cb);
3354 struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, 3366 struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
3355 nfsd_net_id); 3367 nfsd_net_id);
3356 3368
@@ -3371,6 +3383,43 @@ void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp)
3371 spin_unlock(&state_lock); 3383 spin_unlock(&state_lock);
3372} 3384}
3373 3385
3386static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
3387 struct rpc_task *task)
3388{
3389 struct nfs4_delegation *dp = cb_to_delegation(cb);
3390
3391 switch (task->tk_status) {
3392 case 0:
3393 return 1;
3394 case -EBADHANDLE:
3395 case -NFS4ERR_BAD_STATEID:
3396 /*
3397 * Race: client probably got cb_recall before open reply
3398 * granting delegation.
3399 */
3400 if (dp->dl_retries--) {
3401 rpc_delay(task, 2 * HZ);
3402 return 0;
3403 }
3404 /*FALLTHRU*/
3405 default:
3406 return -1;
3407 }
3408}
3409
3410static void nfsd4_cb_recall_release(struct nfsd4_callback *cb)
3411{
3412 struct nfs4_delegation *dp = cb_to_delegation(cb);
3413
3414 nfs4_put_stid(&dp->dl_stid);
3415}
3416
3417static struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
3418 .prepare = nfsd4_cb_recall_prepare,
3419 .done = nfsd4_cb_recall_done,
3420 .release = nfsd4_cb_recall_release,
3421};
3422
3374static void nfsd_break_one_deleg(struct nfs4_delegation *dp) 3423static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
3375{ 3424{
3376 /* 3425 /*
@@ -3381,7 +3430,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
3381 * it's safe to take a reference. 3430 * it's safe to take a reference.
3382 */ 3431 */
3383 atomic_inc(&dp->dl_stid.sc_count); 3432 atomic_inc(&dp->dl_stid.sc_count);
3384 nfsd4_cb_recall(dp); 3433 nfsd4_run_cb(&dp->dl_recall);
3385} 3434}
3386 3435
3387/* Called from break_lease() with i_lock held. */ 3436/* Called from break_lease() with i_lock held. */
@@ -3759,7 +3808,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
3759 fl = locks_alloc_lock(); 3808 fl = locks_alloc_lock();
3760 if (!fl) 3809 if (!fl)
3761 return NULL; 3810 return NULL;
3762 locks_init_lock(fl);
3763 fl->fl_lmops = &nfsd_lease_mng_ops; 3811 fl->fl_lmops = &nfsd_lease_mng_ops;
3764 fl->fl_flags = FL_DELEG; 3812 fl->fl_flags = FL_DELEG;
3765 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; 3813 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
@@ -4107,7 +4155,7 @@ out:
4107 return status; 4155 return status;
4108} 4156}
4109 4157
4110static void 4158void
4111nfsd4_end_grace(struct nfsd_net *nn) 4159nfsd4_end_grace(struct nfsd_net *nn)
4112{ 4160{
4113 /* do nothing if grace period already ended */ 4161 /* do nothing if grace period already ended */
@@ -4116,14 +4164,28 @@ nfsd4_end_grace(struct nfsd_net *nn)
4116 4164
4117 dprintk("NFSD: end of grace period\n"); 4165 dprintk("NFSD: end of grace period\n");
4118 nn->grace_ended = true; 4166 nn->grace_ended = true;
4119 nfsd4_record_grace_done(nn, nn->boot_time); 4167 /*
4168 * If the server goes down again right now, an NFSv4
4169 * client will still be allowed to reclaim after it comes back up,
4170 * even if it hasn't yet had a chance to reclaim state this time.
4171 *
4172 */
4173 nfsd4_record_grace_done(nn);
4174 /*
4175 * At this point, NFSv4 clients can still reclaim. But if the
4176 * server crashes, any that have not yet reclaimed will be out
4177 * of luck on the next boot.
4178 *
4179 * (NFSv4.1+ clients are considered to have reclaimed once they
4180 * call RECLAIM_COMPLETE. NFSv4.0 clients are considered to
4181 * have reclaimed after their first OPEN.)
4182 */
4120 locks_end_grace(&nn->nfsd4_manager); 4183 locks_end_grace(&nn->nfsd4_manager);
4121 /* 4184 /*
4122 * Now that every NFSv4 client has had the chance to recover and 4185 * At this point, and once lockd and/or any other containers
4123 * to see the (possibly new, possibly shorter) lease time, we 4186 * exit their grace period, further reclaims will fail and
4124 * can safely set the next grace time to the current lease time: 4187 * regular locking can resume.
4125 */ 4188 */
4126 nn->nfsd4_grace = nn->nfsd4_lease;
4127} 4189}
4128 4190
4129static time_t 4191static time_t
@@ -5210,7 +5272,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5210 } 5272 }
5211 5273
5212 fp = lock_stp->st_stid.sc_file; 5274 fp = lock_stp->st_stid.sc_file;
5213 locks_init_lock(file_lock);
5214 switch (lock->lk_type) { 5275 switch (lock->lk_type) {
5215 case NFS4_READ_LT: 5276 case NFS4_READ_LT:
5216 case NFS4_READW_LT: 5277 case NFS4_READW_LT:
@@ -5354,7 +5415,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5354 status = nfserr_jukebox; 5415 status = nfserr_jukebox;
5355 goto out; 5416 goto out;
5356 } 5417 }
5357 locks_init_lock(file_lock); 5418
5358 switch (lockt->lt_type) { 5419 switch (lockt->lt_type) {
5359 case NFS4_READ_LT: 5420 case NFS4_READ_LT:
5360 case NFS4_READW_LT: 5421 case NFS4_READW_LT:
@@ -5432,7 +5493,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5432 status = nfserr_jukebox; 5493 status = nfserr_jukebox;
5433 goto fput; 5494 goto fput;
5434 } 5495 }
5435 locks_init_lock(file_lock); 5496
5436 file_lock->fl_type = F_UNLCK; 5497 file_lock->fl_type = F_UNLCK;
5437 file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); 5498 file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner);
5438 file_lock->fl_pid = current->tgid; 5499 file_lock->fl_pid = current->tgid;
@@ -5645,6 +5706,9 @@ nfs4_check_open_reclaim(clientid_t *clid,
5645 if (status) 5706 if (status)
5646 return nfserr_reclaim_bad; 5707 return nfserr_reclaim_bad;
5647 5708
5709 if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags))
5710 return nfserr_no_grace;
5711
5648 if (nfsd4_client_record_check(cstate->clp)) 5712 if (nfsd4_client_record_check(cstate->clp))
5649 return nfserr_reclaim_bad; 5713 return nfserr_reclaim_bad;
5650 5714
@@ -6342,10 +6406,10 @@ nfs4_state_start_net(struct net *net)
6342 ret = nfs4_state_create_net(net); 6406 ret = nfs4_state_create_net(net);
6343 if (ret) 6407 if (ret)
6344 return ret; 6408 return ret;
6345 nfsd4_client_tracking_init(net);
6346 nn->boot_time = get_seconds(); 6409 nn->boot_time = get_seconds();
6347 locks_start_grace(net, &nn->nfsd4_manager);
6348 nn->grace_ended = false; 6410 nn->grace_ended = false;
6411 locks_start_grace(net, &nn->nfsd4_manager);
6412 nfsd4_client_tracking_init(net);
6349 printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", 6413 printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
6350 nn->nfsd4_grace, net); 6414 nn->nfsd4_grace, net);
6351 queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); 6415 queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
@@ -6402,6 +6466,7 @@ nfs4_state_shutdown_net(struct net *net)
6402 list_for_each_safe(pos, next, &reaplist) { 6466 list_for_each_safe(pos, next, &reaplist) {
6403 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6467 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6404 list_del_init(&dp->dl_recall_lru); 6468 list_del_init(&dp->dl_recall_lru);
6469 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
6405 nfs4_put_stid(&dp->dl_stid); 6470 nfs4_put_stid(&dp->dl_stid);
6406 } 6471 }
6407 6472
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b01f6e100ee8..eeea7a90eb87 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -31,13 +31,6 @@
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 * TODO: Neil Brown made the following observation: We currently
36 * initially reserve NFSD_BUFSIZE space on the transmit queue and
37 * never release any of that until the request is complete.
38 * It would be good to calculate a new maximum response size while
39 * decoding the COMPOUND, and call svc_reserve with this number
40 * at the end of nfs4svc_decode_compoundargs.
41 */ 34 */
42 35
43#include <linux/slab.h> 36#include <linux/slab.h>
@@ -1521,6 +1514,22 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
1521} 1514}
1522 1515
1523static __be32 1516static __be32
1517nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
1518{
1519 DECODE_HEAD;
1520
1521 status = nfsd4_decode_stateid(argp, &seek->seek_stateid);
1522 if (status)
1523 return status;
1524
1525 READ_BUF(8 + 4);
1526 p = xdr_decode_hyper(p, &seek->seek_offset);
1527 seek->seek_whence = be32_to_cpup(p);
1528
1529 DECODE_TAIL;
1530}
1531
1532static __be32
1524nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) 1533nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
1525{ 1534{
1526 return nfs_ok; 1535 return nfs_ok;
@@ -1593,6 +1602,20 @@ static nfsd4_dec nfsd4_dec_ops[] = {
1593 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, 1602 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
1594 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, 1603 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
1595 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, 1604 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
1605
1606 /* new operations for NFSv4.2 */
1607 [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
1608 [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp,
1609 [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
1610 [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
1611 [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
1612 [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
1613 [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
1614 [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp,
1615 [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp,
1616 [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
1617 [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
1618 [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
1596}; 1619};
1597 1620
1598static inline bool 1621static inline bool
@@ -1670,6 +1693,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1670 readbytes += nfsd4_max_reply(argp->rqstp, op); 1693 readbytes += nfsd4_max_reply(argp->rqstp, op);
1671 } else 1694 } else
1672 max_reply += nfsd4_max_reply(argp->rqstp, op); 1695 max_reply += nfsd4_max_reply(argp->rqstp, op);
1696 /*
1697 * OP_LOCK may return a conflicting lock. (Special case
1698 * because it will just skip encoding this if it runs
1699 * out of xdr buffer space, and it is the only operation
1700 * that behaves this way.)
1701 */
1702 if (op->opnum == OP_LOCK)
1703 max_reply += NFS4_OPAQUE_LIMIT;
1673 1704
1674 if (op->status) { 1705 if (op->status) {
1675 argp->opcnt = i+1; 1706 argp->opcnt = i+1;
@@ -3764,6 +3795,22 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
3764} 3795}
3765 3796
3766static __be32 3797static __be32
3798nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
3799 struct nfsd4_seek *seek)
3800{
3801 __be32 *p;
3802
3803 if (nfserr)
3804 return nfserr;
3805
3806 p = xdr_reserve_space(&resp->xdr, 4 + 8);
3807 *p++ = cpu_to_be32(seek->seek_eof);
3808 p = xdr_encode_hyper(p, seek->seek_pos);
3809
3810 return nfserr;
3811}
3812
3813static __be32
3767nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) 3814nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
3768{ 3815{
3769 return nfserr; 3816 return nfserr;
@@ -3835,6 +3882,20 @@ static nfsd4_enc nfsd4_enc_ops[] = {
3835 [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, 3882 [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
3836 [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, 3883 [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
3837 [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, 3884 [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
3885
3886 /* NFSv4.2 operations */
3887 [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
3888 [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop,
3889 [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
3890 [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
3891 [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
3892 [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
3893 [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
3894 [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
3895 [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop,
3896 [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
3897 [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
3898 [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
3838}; 3899};
3839 3900
3840/* 3901/*
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index ff9567633245..122f69185ef5 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -27,8 +27,12 @@
27 */ 27 */
28#define TARGET_BUCKET_SIZE 64 28#define TARGET_BUCKET_SIZE 64
29 29
30static struct hlist_head * cache_hash; 30struct nfsd_drc_bucket {
31static struct list_head lru_head; 31 struct list_head lru_head;
32 spinlock_t cache_lock;
33};
34
35static struct nfsd_drc_bucket *drc_hashtbl;
32static struct kmem_cache *drc_slab; 36static struct kmem_cache *drc_slab;
33 37
34/* max number of entries allowed in the cache */ 38/* max number of entries allowed in the cache */
@@ -36,6 +40,7 @@ static unsigned int max_drc_entries;
36 40
37/* number of significant bits in the hash value */ 41/* number of significant bits in the hash value */
38static unsigned int maskbits; 42static unsigned int maskbits;
43static unsigned int drc_hashsize;
39 44
40/* 45/*
41 * Stats and other tracking of on the duplicate reply cache. All of these and 46 * Stats and other tracking of on the duplicate reply cache. All of these and
@@ -43,7 +48,7 @@ static unsigned int maskbits;
43 */ 48 */
44 49
45/* total number of entries */ 50/* total number of entries */
46static unsigned int num_drc_entries; 51static atomic_t num_drc_entries;
47 52
48/* cache misses due only to checksum comparison failures */ 53/* cache misses due only to checksum comparison failures */
49static unsigned int payload_misses; 54static unsigned int payload_misses;
@@ -75,7 +80,6 @@ static struct shrinker nfsd_reply_cache_shrinker = {
75 * A cache entry is "single use" if c_state == RC_INPROG 80 * A cache entry is "single use" if c_state == RC_INPROG
76 * Otherwise, it when accessing _prev or _next, the lock must be held. 81 * Otherwise, it when accessing _prev or _next, the lock must be held.
77 */ 82 */
78static DEFINE_SPINLOCK(cache_lock);
79static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); 83static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func);
80 84
81/* 85/*
@@ -116,6 +120,12 @@ nfsd_hashsize(unsigned int limit)
116 return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); 120 return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
117} 121}
118 122
123static u32
124nfsd_cache_hash(__be32 xid)
125{
126 return hash_32(be32_to_cpu(xid), maskbits);
127}
128
119static struct svc_cacherep * 129static struct svc_cacherep *
120nfsd_reply_cache_alloc(void) 130nfsd_reply_cache_alloc(void)
121{ 131{
@@ -126,7 +136,6 @@ nfsd_reply_cache_alloc(void)
126 rp->c_state = RC_UNUSED; 136 rp->c_state = RC_UNUSED;
127 rp->c_type = RC_NOCACHE; 137 rp->c_type = RC_NOCACHE;
128 INIT_LIST_HEAD(&rp->c_lru); 138 INIT_LIST_HEAD(&rp->c_lru);
129 INIT_HLIST_NODE(&rp->c_hash);
130 } 139 }
131 return rp; 140 return rp;
132} 141}
@@ -138,29 +147,27 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
138 drc_mem_usage -= rp->c_replvec.iov_len; 147 drc_mem_usage -= rp->c_replvec.iov_len;
139 kfree(rp->c_replvec.iov_base); 148 kfree(rp->c_replvec.iov_base);
140 } 149 }
141 if (!hlist_unhashed(&rp->c_hash))
142 hlist_del(&rp->c_hash);
143 list_del(&rp->c_lru); 150 list_del(&rp->c_lru);
144 --num_drc_entries; 151 atomic_dec(&num_drc_entries);
145 drc_mem_usage -= sizeof(*rp); 152 drc_mem_usage -= sizeof(*rp);
146 kmem_cache_free(drc_slab, rp); 153 kmem_cache_free(drc_slab, rp);
147} 154}
148 155
149static void 156static void
150nfsd_reply_cache_free(struct svc_cacherep *rp) 157nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
151{ 158{
152 spin_lock(&cache_lock); 159 spin_lock(&b->cache_lock);
153 nfsd_reply_cache_free_locked(rp); 160 nfsd_reply_cache_free_locked(rp);
154 spin_unlock(&cache_lock); 161 spin_unlock(&b->cache_lock);
155} 162}
156 163
157int nfsd_reply_cache_init(void) 164int nfsd_reply_cache_init(void)
158{ 165{
159 unsigned int hashsize; 166 unsigned int hashsize;
167 unsigned int i;
160 168
161 INIT_LIST_HEAD(&lru_head);
162 max_drc_entries = nfsd_cache_size_limit(); 169 max_drc_entries = nfsd_cache_size_limit();
163 num_drc_entries = 0; 170 atomic_set(&num_drc_entries, 0);
164 hashsize = nfsd_hashsize(max_drc_entries); 171 hashsize = nfsd_hashsize(max_drc_entries);
165 maskbits = ilog2(hashsize); 172 maskbits = ilog2(hashsize);
166 173
@@ -170,9 +177,14 @@ int nfsd_reply_cache_init(void)
170 if (!drc_slab) 177 if (!drc_slab)
171 goto out_nomem; 178 goto out_nomem;
172 179
173 cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); 180 drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
174 if (!cache_hash) 181 if (!drc_hashtbl)
175 goto out_nomem; 182 goto out_nomem;
183 for (i = 0; i < hashsize; i++) {
184 INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
185 spin_lock_init(&drc_hashtbl[i].cache_lock);
186 }
187 drc_hashsize = hashsize;
176 188
177 return 0; 189 return 0;
178out_nomem: 190out_nomem:
@@ -184,17 +196,22 @@ out_nomem:
184void nfsd_reply_cache_shutdown(void) 196void nfsd_reply_cache_shutdown(void)
185{ 197{
186 struct svc_cacherep *rp; 198 struct svc_cacherep *rp;
199 unsigned int i;
187 200
188 unregister_shrinker(&nfsd_reply_cache_shrinker); 201 unregister_shrinker(&nfsd_reply_cache_shrinker);
189 cancel_delayed_work_sync(&cache_cleaner); 202 cancel_delayed_work_sync(&cache_cleaner);
190 203
191 while (!list_empty(&lru_head)) { 204 for (i = 0; i < drc_hashsize; i++) {
192 rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); 205 struct list_head *head = &drc_hashtbl[i].lru_head;
193 nfsd_reply_cache_free_locked(rp); 206 while (!list_empty(head)) {
207 rp = list_first_entry(head, struct svc_cacherep, c_lru);
208 nfsd_reply_cache_free_locked(rp);
209 }
194 } 210 }
195 211
196 kfree (cache_hash); 212 kfree (drc_hashtbl);
197 cache_hash = NULL; 213 drc_hashtbl = NULL;
214 drc_hashsize = 0;
198 215
199 if (drc_slab) { 216 if (drc_slab) {
200 kmem_cache_destroy(drc_slab); 217 kmem_cache_destroy(drc_slab);
@@ -207,61 +224,63 @@ void nfsd_reply_cache_shutdown(void)
207 * not already scheduled. 224 * not already scheduled.
208 */ 225 */
209static void 226static void
210lru_put_end(struct svc_cacherep *rp) 227lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
211{ 228{
212 rp->c_timestamp = jiffies; 229 rp->c_timestamp = jiffies;
213 list_move_tail(&rp->c_lru, &lru_head); 230 list_move_tail(&rp->c_lru, &b->lru_head);
214 schedule_delayed_work(&cache_cleaner, RC_EXPIRE); 231 schedule_delayed_work(&cache_cleaner, RC_EXPIRE);
215} 232}
216 233
217/*
218 * Move a cache entry from one hash list to another
219 */
220static void
221hash_refile(struct svc_cacherep *rp)
222{
223 hlist_del_init(&rp->c_hash);
224 /*
225 * No point in byte swapping c_xid since we're just using it to pick
226 * a hash bucket.
227 */
228 hlist_add_head(&rp->c_hash, cache_hash +
229 hash_32((__force u32)rp->c_xid, maskbits));
230}
231
232/*
233 * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
234 * Also prune the oldest ones when the total exceeds the max number of entries.
235 */
236static long 234static long
237prune_cache_entries(void) 235prune_bucket(struct nfsd_drc_bucket *b)
238{ 236{
239 struct svc_cacherep *rp, *tmp; 237 struct svc_cacherep *rp, *tmp;
240 long freed = 0; 238 long freed = 0;
241 239
242 list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { 240 list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
243 /* 241 /*
244 * Don't free entries attached to calls that are still 242 * Don't free entries attached to calls that are still
245 * in-progress, but do keep scanning the list. 243 * in-progress, but do keep scanning the list.
246 */ 244 */
247 if (rp->c_state == RC_INPROG) 245 if (rp->c_state == RC_INPROG)
248 continue; 246 continue;
249 if (num_drc_entries <= max_drc_entries && 247 if (atomic_read(&num_drc_entries) <= max_drc_entries &&
250 time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) 248 time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
251 break; 249 break;
252 nfsd_reply_cache_free_locked(rp); 250 nfsd_reply_cache_free_locked(rp);
253 freed++; 251 freed++;
254 } 252 }
253 return freed;
254}
255
256/*
257 * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
258 * Also prune the oldest ones when the total exceeds the max number of entries.
259 */
260static long
261prune_cache_entries(void)
262{
263 unsigned int i;
264 long freed = 0;
265 bool cancel = true;
266
267 for (i = 0; i < drc_hashsize; i++) {
268 struct nfsd_drc_bucket *b = &drc_hashtbl[i];
269
270 if (list_empty(&b->lru_head))
271 continue;
272 spin_lock(&b->cache_lock);
273 freed += prune_bucket(b);
274 if (!list_empty(&b->lru_head))
275 cancel = false;
276 spin_unlock(&b->cache_lock);
277 }
255 278
256 /* 279 /*
257 * Conditionally rearm the job. If we cleaned out the list, then 280 * Conditionally rearm the job to run in RC_EXPIRE since we just
258 * cancel any pending run (since there won't be any work to do). 281 * ran the pruner.
259 * Otherwise, we rearm the job or modify the existing one to run in
260 * RC_EXPIRE since we just ran the pruner.
261 */ 282 */
262 if (list_empty(&lru_head)) 283 if (!cancel)
263 cancel_delayed_work(&cache_cleaner);
264 else
265 mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); 284 mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
266 return freed; 285 return freed;
267} 286}
@@ -269,32 +288,19 @@ prune_cache_entries(void)
269static void 288static void
270cache_cleaner_func(struct work_struct *unused) 289cache_cleaner_func(struct work_struct *unused)
271{ 290{
272 spin_lock(&cache_lock);
273 prune_cache_entries(); 291 prune_cache_entries();
274 spin_unlock(&cache_lock);
275} 292}
276 293
277static unsigned long 294static unsigned long
278nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) 295nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
279{ 296{
280 unsigned long num; 297 return atomic_read(&num_drc_entries);
281
282 spin_lock(&cache_lock);
283 num = num_drc_entries;
284 spin_unlock(&cache_lock);
285
286 return num;
287} 298}
288 299
289static unsigned long 300static unsigned long
290nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) 301nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
291{ 302{
292 unsigned long freed; 303 return prune_cache_entries();
293
294 spin_lock(&cache_lock);
295 freed = prune_cache_entries();
296 spin_unlock(&cache_lock);
297 return freed;
298} 304}
299/* 305/*
300 * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes 306 * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
@@ -332,20 +338,24 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
332static bool 338static bool
333nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) 339nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
334{ 340{
335 /* Check RPC header info first */ 341 /* Check RPC XID first */
336 if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || 342 if (rqstp->rq_xid != rp->c_xid)
337 rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers ||
338 rqstp->rq_arg.len != rp->c_len ||
339 !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
340 rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
341 return false; 343 return false;
342
343 /* compare checksum of NFS data */ 344 /* compare checksum of NFS data */
344 if (csum != rp->c_csum) { 345 if (csum != rp->c_csum) {
345 ++payload_misses; 346 ++payload_misses;
346 return false; 347 return false;
347 } 348 }
348 349
350 /* Other discriminators */
351 if (rqstp->rq_proc != rp->c_proc ||
352 rqstp->rq_prot != rp->c_prot ||
353 rqstp->rq_vers != rp->c_vers ||
354 rqstp->rq_arg.len != rp->c_len ||
355 !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
356 rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
357 return false;
358
349 return true; 359 return true;
350} 360}
351 361
@@ -355,18 +365,14 @@ nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
355 * NULL on failure. 365 * NULL on failure.
356 */ 366 */
357static struct svc_cacherep * 367static struct svc_cacherep *
358nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) 368nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp,
369 __wsum csum)
359{ 370{
360 struct svc_cacherep *rp, *ret = NULL; 371 struct svc_cacherep *rp, *ret = NULL;
361 struct hlist_head *rh; 372 struct list_head *rh = &b->lru_head;
362 unsigned int entries = 0; 373 unsigned int entries = 0;
363 374
364 /* 375 list_for_each_entry(rp, rh, c_lru) {
365 * No point in byte swapping rq_xid since we're just using it to pick
366 * a hash bucket.
367 */
368 rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)];
369 hlist_for_each_entry(rp, rh, c_hash) {
370 ++entries; 376 ++entries;
371 if (nfsd_cache_match(rqstp, csum, rp)) { 377 if (nfsd_cache_match(rqstp, csum, rp)) {
372 ret = rp; 378 ret = rp;
@@ -377,11 +383,12 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
377 /* tally hash chain length stats */ 383 /* tally hash chain length stats */
378 if (entries > longest_chain) { 384 if (entries > longest_chain) {
379 longest_chain = entries; 385 longest_chain = entries;
380 longest_chain_cachesize = num_drc_entries; 386 longest_chain_cachesize = atomic_read(&num_drc_entries);
381 } else if (entries == longest_chain) { 387 } else if (entries == longest_chain) {
382 /* prefer to keep the smallest cachesize possible here */ 388 /* prefer to keep the smallest cachesize possible here */
383 longest_chain_cachesize = min(longest_chain_cachesize, 389 longest_chain_cachesize = min_t(unsigned int,
384 num_drc_entries); 390 longest_chain_cachesize,
391 atomic_read(&num_drc_entries));
385 } 392 }
386 393
387 return ret; 394 return ret;
@@ -403,6 +410,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
403 vers = rqstp->rq_vers, 410 vers = rqstp->rq_vers,
404 proc = rqstp->rq_proc; 411 proc = rqstp->rq_proc;
405 __wsum csum; 412 __wsum csum;
413 u32 hash = nfsd_cache_hash(xid);
414 struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
406 unsigned long age; 415 unsigned long age;
407 int type = rqstp->rq_cachetype; 416 int type = rqstp->rq_cachetype;
408 int rtn = RC_DOIT; 417 int rtn = RC_DOIT;
@@ -420,16 +429,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
420 * preallocate an entry. 429 * preallocate an entry.
421 */ 430 */
422 rp = nfsd_reply_cache_alloc(); 431 rp = nfsd_reply_cache_alloc();
423 spin_lock(&cache_lock); 432 spin_lock(&b->cache_lock);
424 if (likely(rp)) { 433 if (likely(rp)) {
425 ++num_drc_entries; 434 atomic_inc(&num_drc_entries);
426 drc_mem_usage += sizeof(*rp); 435 drc_mem_usage += sizeof(*rp);
427 } 436 }
428 437
429 /* go ahead and prune the cache */ 438 /* go ahead and prune the cache */
430 prune_cache_entries(); 439 prune_bucket(b);
431 440
432 found = nfsd_cache_search(rqstp, csum); 441 found = nfsd_cache_search(b, rqstp, csum);
433 if (found) { 442 if (found) {
434 if (likely(rp)) 443 if (likely(rp))
435 nfsd_reply_cache_free_locked(rp); 444 nfsd_reply_cache_free_locked(rp);
@@ -454,8 +463,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
454 rp->c_len = rqstp->rq_arg.len; 463 rp->c_len = rqstp->rq_arg.len;
455 rp->c_csum = csum; 464 rp->c_csum = csum;
456 465
457 hash_refile(rp); 466 lru_put_end(b, rp);
458 lru_put_end(rp);
459 467
460 /* release any buffer */ 468 /* release any buffer */
461 if (rp->c_type == RC_REPLBUFF) { 469 if (rp->c_type == RC_REPLBUFF) {
@@ -465,14 +473,14 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
465 } 473 }
466 rp->c_type = RC_NOCACHE; 474 rp->c_type = RC_NOCACHE;
467 out: 475 out:
468 spin_unlock(&cache_lock); 476 spin_unlock(&b->cache_lock);
469 return rtn; 477 return rtn;
470 478
471found_entry: 479found_entry:
472 nfsdstats.rchits++; 480 nfsdstats.rchits++;
473 /* We found a matching entry which is either in progress or done. */ 481 /* We found a matching entry which is either in progress or done. */
474 age = jiffies - rp->c_timestamp; 482 age = jiffies - rp->c_timestamp;
475 lru_put_end(rp); 483 lru_put_end(b, rp);
476 484
477 rtn = RC_DROPIT; 485 rtn = RC_DROPIT;
478 /* Request being processed or excessive rexmits */ 486 /* Request being processed or excessive rexmits */
@@ -527,18 +535,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
527{ 535{
528 struct svc_cacherep *rp = rqstp->rq_cacherep; 536 struct svc_cacherep *rp = rqstp->rq_cacherep;
529 struct kvec *resv = &rqstp->rq_res.head[0], *cachv; 537 struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
538 u32 hash;
539 struct nfsd_drc_bucket *b;
530 int len; 540 int len;
531 size_t bufsize = 0; 541 size_t bufsize = 0;
532 542
533 if (!rp) 543 if (!rp)
534 return; 544 return;
535 545
546 hash = nfsd_cache_hash(rp->c_xid);
547 b = &drc_hashtbl[hash];
548
536 len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); 549 len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
537 len >>= 2; 550 len >>= 2;
538 551
539 /* Don't cache excessive amounts of data and XDR failures */ 552 /* Don't cache excessive amounts of data and XDR failures */
540 if (!statp || len > (256 >> 2)) { 553 if (!statp || len > (256 >> 2)) {
541 nfsd_reply_cache_free(rp); 554 nfsd_reply_cache_free(b, rp);
542 return; 555 return;
543 } 556 }
544 557
@@ -553,23 +566,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
553 bufsize = len << 2; 566 bufsize = len << 2;
554 cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); 567 cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
555 if (!cachv->iov_base) { 568 if (!cachv->iov_base) {
556 nfsd_reply_cache_free(rp); 569 nfsd_reply_cache_free(b, rp);
557 return; 570 return;
558 } 571 }
559 cachv->iov_len = bufsize; 572 cachv->iov_len = bufsize;
560 memcpy(cachv->iov_base, statp, bufsize); 573 memcpy(cachv->iov_base, statp, bufsize);
561 break; 574 break;
562 case RC_NOCACHE: 575 case RC_NOCACHE:
563 nfsd_reply_cache_free(rp); 576 nfsd_reply_cache_free(b, rp);
564 return; 577 return;
565 } 578 }
566 spin_lock(&cache_lock); 579 spin_lock(&b->cache_lock);
567 drc_mem_usage += bufsize; 580 drc_mem_usage += bufsize;
568 lru_put_end(rp); 581 lru_put_end(b, rp);
569 rp->c_secure = rqstp->rq_secure; 582 rp->c_secure = rqstp->rq_secure;
570 rp->c_type = cachetype; 583 rp->c_type = cachetype;
571 rp->c_state = RC_DONE; 584 rp->c_state = RC_DONE;
572 spin_unlock(&cache_lock); 585 spin_unlock(&b->cache_lock);
573 return; 586 return;
574} 587}
575 588
@@ -600,9 +613,9 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
600 */ 613 */
601static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) 614static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
602{ 615{
603 spin_lock(&cache_lock);
604 seq_printf(m, "max entries: %u\n", max_drc_entries); 616 seq_printf(m, "max entries: %u\n", max_drc_entries);
605 seq_printf(m, "num entries: %u\n", num_drc_entries); 617 seq_printf(m, "num entries: %u\n",
618 atomic_read(&num_drc_entries));
606 seq_printf(m, "hash buckets: %u\n", 1 << maskbits); 619 seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
607 seq_printf(m, "mem usage: %u\n", drc_mem_usage); 620 seq_printf(m, "mem usage: %u\n", drc_mem_usage);
608 seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); 621 seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
@@ -611,7 +624,6 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
611 seq_printf(m, "payload misses: %u\n", payload_misses); 624 seq_printf(m, "payload misses: %u\n", payload_misses);
612 seq_printf(m, "longest chain len: %u\n", longest_chain); 625 seq_printf(m, "longest chain len: %u\n", longest_chain);
613 seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); 626 seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
614 spin_unlock(&cache_lock);
615 return 0; 627 return 0;
616} 628}
617 629
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 4e042105fb6e..ca73ca79a0ee 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -49,6 +49,7 @@ enum {
49 NFSD_Leasetime, 49 NFSD_Leasetime,
50 NFSD_Gracetime, 50 NFSD_Gracetime,
51 NFSD_RecoveryDir, 51 NFSD_RecoveryDir,
52 NFSD_V4EndGrace,
52#endif 53#endif
53}; 54};
54 55
@@ -68,6 +69,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
68static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 69static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
69static ssize_t write_gracetime(struct file *file, char *buf, size_t size); 70static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
70static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); 71static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
72static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size);
71#endif 73#endif
72 74
73static ssize_t (*write_op[])(struct file *, char *, size_t) = { 75static ssize_t (*write_op[])(struct file *, char *, size_t) = {
@@ -84,6 +86,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
84 [NFSD_Leasetime] = write_leasetime, 86 [NFSD_Leasetime] = write_leasetime,
85 [NFSD_Gracetime] = write_gracetime, 87 [NFSD_Gracetime] = write_gracetime,
86 [NFSD_RecoveryDir] = write_recoverydir, 88 [NFSD_RecoveryDir] = write_recoverydir,
89 [NFSD_V4EndGrace] = write_v4_end_grace,
87#endif 90#endif
88}; 91};
89 92
@@ -1077,6 +1080,47 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
1077 return rv; 1080 return rv;
1078} 1081}
1079 1082
1083/**
1084 * write_v4_end_grace - release grace period for nfsd's v4.x lock manager
1085 *
1086 * Input:
1087 * buf: ignored
1088 * size: zero
1089 * OR
1090 *
1091 * Input:
1092 * buf: any value
1093 * size: non-zero length of C string in @buf
1094 * Output:
1095 * passed-in buffer filled with "Y" or "N" with a newline
1096 * and NULL-terminated C string. This indicates whether
1097 * the grace period has ended in the current net
1098 * namespace. Return code is the size in bytes of the
1099 * string. Writing a string that starts with 'Y', 'y', or
1100 * '1' to the file will end the grace period for nfsd's v4
1101 * lock manager.
1102 */
1103static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
1104{
1105 struct net *net = file->f_dentry->d_sb->s_fs_info;
1106 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1107
1108 if (size > 0) {
1109 switch(buf[0]) {
1110 case 'Y':
1111 case 'y':
1112 case '1':
1113 nfsd4_end_grace(nn);
1114 break;
1115 default:
1116 return -EINVAL;
1117 }
1118 }
1119
1120 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%c\n",
1121 nn->grace_ended ? 'Y' : 'N');
1122}
1123
1080#endif 1124#endif
1081 1125
1082/*----------------------------------------------------------------------------*/ 1126/*----------------------------------------------------------------------------*/
@@ -1110,6 +1154,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1110 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1154 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
1111 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1155 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
1112 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, 1156 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
1157 [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO},
1113#endif 1158#endif
1114 /* last one */ {""} 1159 /* last one */ {""}
1115 }; 1160 };
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 847daf37e566..747f3b95bd11 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -251,7 +251,7 @@ void nfsd_lockd_shutdown(void);
251#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) 251#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
252#define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) 252#define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP)
253#define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) 253#define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH)
254#define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) 254#define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP)
255#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) 255#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED)
256#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) 256#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS)
257#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) 257#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index e883a5868be6..88026fc6a981 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -209,8 +209,10 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
209 * fix that case easily. 209 * fix that case easily.
210 */ 210 */
211 struct cred *new = prepare_creds(); 211 struct cred *new = prepare_creds();
212 if (!new) 212 if (!new) {
213 return nfserrno(-ENOMEM); 213 error = nfserrno(-ENOMEM);
214 goto out;
215 }
214 new->cap_effective = 216 new->cap_effective =
215 cap_raise_nfsd_set(new->cap_effective, 217 cap_raise_nfsd_set(new->cap_effective,
216 new->cap_permitted); 218 new->cap_permitted);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4a89e00d7461..0a47c6a6b301 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -62,16 +62,21 @@ typedef struct {
62 (s)->si_generation 62 (s)->si_generation
63 63
64struct nfsd4_callback { 64struct nfsd4_callback {
65 void *cb_op;
66 struct nfs4_client *cb_clp; 65 struct nfs4_client *cb_clp;
67 struct list_head cb_per_client; 66 struct list_head cb_per_client;
68 u32 cb_minorversion; 67 u32 cb_minorversion;
69 struct rpc_message cb_msg; 68 struct rpc_message cb_msg;
70 const struct rpc_call_ops *cb_ops; 69 struct nfsd4_callback_ops *cb_ops;
71 struct work_struct cb_work; 70 struct work_struct cb_work;
72 bool cb_done; 71 bool cb_done;
73}; 72};
74 73
74struct nfsd4_callback_ops {
75 void (*prepare)(struct nfsd4_callback *);
76 int (*done)(struct nfsd4_callback *, struct rpc_task *);
77 void (*release)(struct nfsd4_callback *);
78};
79
75/* 80/*
76 * A core object that represents a "common" stateid. These are generally 81 * A core object that represents a "common" stateid. These are generally
77 * embedded within the different (more specific) stateid objects and contain 82 * embedded within the different (more specific) stateid objects and contain
@@ -127,6 +132,9 @@ struct nfs4_delegation {
127 struct nfsd4_callback dl_recall; 132 struct nfsd4_callback dl_recall;
128}; 133};
129 134
135#define cb_to_delegation(cb) \
136 container_of(cb, struct nfs4_delegation, dl_recall)
137
130/* client delegation callback info */ 138/* client delegation callback info */
131struct nfs4_cb_conn { 139struct nfs4_cb_conn {
132 /* SETCLIENTID info */ 140 /* SETCLIENTID info */
@@ -306,6 +314,7 @@ struct nfs4_client {
306#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ 314#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */
307#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ 315#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */
308#define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ 316#define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */
317#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
309#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 318#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
310 1 << NFSD4_CLIENT_CB_KILL) 319 1 << NFSD4_CLIENT_CB_KILL)
311 unsigned long cl_flags; 320 unsigned long cl_flags;
@@ -517,6 +526,13 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
517#define RD_STATE 0x00000010 526#define RD_STATE 0x00000010
518#define WR_STATE 0x00000020 527#define WR_STATE 0x00000020
519 528
529enum nfsd4_cb_op {
530 NFSPROC4_CLNT_CB_NULL = 0,
531 NFSPROC4_CLNT_CB_RECALL,
532 NFSPROC4_CLNT_CB_SEQUENCE,
533};
534
535
520struct nfsd4_compound_state; 536struct nfsd4_compound_state;
521struct nfsd_net; 537struct nfsd_net;
522 538
@@ -531,12 +547,12 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
531extern __be32 nfs4_check_open_reclaim(clientid_t *clid, 547extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
532 struct nfsd4_compound_state *cstate, struct nfsd_net *nn); 548 struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
533extern int set_callback_cred(void); 549extern int set_callback_cred(void);
534void nfsd4_run_cb_null(struct work_struct *w);
535void nfsd4_run_cb_recall(struct work_struct *w);
536extern void nfsd4_probe_callback(struct nfs4_client *clp); 550extern void nfsd4_probe_callback(struct nfs4_client *clp);
537extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); 551extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
538extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); 552extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
539extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 553extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
554 struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
555extern void nfsd4_run_cb(struct nfsd4_callback *cb);
540extern int nfsd4_create_callback_queue(void); 556extern int nfsd4_create_callback_queue(void);
541extern void nfsd4_destroy_callback_queue(void); 557extern void nfsd4_destroy_callback_queue(void);
542extern void nfsd4_shutdown_callback(struct nfs4_client *); 558extern void nfsd4_shutdown_callback(struct nfs4_client *);
@@ -545,13 +561,16 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
545 struct nfsd_net *nn); 561 struct nfsd_net *nn);
546extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); 562extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
547 563
564/* grace period management */
565void nfsd4_end_grace(struct nfsd_net *nn);
566
548/* nfs4recover operations */ 567/* nfs4recover operations */
549extern int nfsd4_client_tracking_init(struct net *net); 568extern int nfsd4_client_tracking_init(struct net *net);
550extern void nfsd4_client_tracking_exit(struct net *net); 569extern void nfsd4_client_tracking_exit(struct net *net);
551extern void nfsd4_client_record_create(struct nfs4_client *clp); 570extern void nfsd4_client_record_create(struct nfs4_client *clp);
552extern void nfsd4_client_record_remove(struct nfs4_client *clp); 571extern void nfsd4_client_record_remove(struct nfs4_client *clp);
553extern int nfsd4_client_record_check(struct nfs4_client *clp); 572extern int nfsd4_client_record_check(struct nfs4_client *clp);
554extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); 573extern void nfsd4_record_grace_done(struct nfsd_net *nn);
555 574
556/* nfs fault injection functions */ 575/* nfs fault injection functions */
557#ifdef CONFIG_NFSD_FAULT_INJECTION 576#ifdef CONFIG_NFSD_FAULT_INJECTION
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index f501a9b5c9df..965cffd17a0c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -445,6 +445,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
445 if (err) 445 if (err)
446 goto out; 446 goto out;
447 size_change = 1; 447 size_change = 1;
448
449 /*
450 * RFC5661, Section 18.30.4:
451 * Changing the size of a file with SETATTR indirectly
452 * changes the time_modify and change attributes.
453 *
454 * (and similar for the older RFCs)
455 */
456 if (iap->ia_size != i_size_read(inode))
457 iap->ia_valid |= ATTR_MTIME;
448 } 458 }
449 459
450 iap->ia_valid |= ATTR_CTIME; 460 iap->ia_valid |= ATTR_CTIME;
@@ -649,6 +659,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
649{ 659{
650 struct path path; 660 struct path path;
651 struct inode *inode; 661 struct inode *inode;
662 struct file *file;
652 int flags = O_RDONLY|O_LARGEFILE; 663 int flags = O_RDONLY|O_LARGEFILE;
653 __be32 err; 664 __be32 err;
654 int host_err = 0; 665 int host_err = 0;
@@ -703,19 +714,25 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
703 else 714 else
704 flags = O_WRONLY|O_LARGEFILE; 715 flags = O_WRONLY|O_LARGEFILE;
705 } 716 }
706 *filp = dentry_open(&path, flags, current_cred());
707 if (IS_ERR(*filp)) {
708 host_err = PTR_ERR(*filp);
709 *filp = NULL;
710 } else {
711 host_err = ima_file_check(*filp, may_flags);
712 717
713 if (may_flags & NFSD_MAY_64BIT_COOKIE) 718 file = dentry_open(&path, flags, current_cred());
714 (*filp)->f_mode |= FMODE_64BITHASH; 719 if (IS_ERR(file)) {
715 else 720 host_err = PTR_ERR(file);
716 (*filp)->f_mode |= FMODE_32BITHASH; 721 goto out_nfserr;
717 } 722 }
718 723
724 host_err = ima_file_check(file, may_flags);
725 if (host_err) {
726 nfsd_close(file);
727 goto out_nfserr;
728 }
729
730 if (may_flags & NFSD_MAY_64BIT_COOKIE)
731 file->f_mode |= FMODE_64BITHASH;
732 else
733 file->f_mode |= FMODE_32BITHASH;
734
735 *filp = file;
719out_nfserr: 736out_nfserr:
720 err = nfserrno(host_err); 737 err = nfserrno(host_err);
721out: 738out:
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 465e7799742a..5720e9457f33 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -428,6 +428,17 @@ struct nfsd4_reclaim_complete {
428 u32 rca_one_fs; 428 u32 rca_one_fs;
429}; 429};
430 430
431struct nfsd4_seek {
432 /* request */
433 stateid_t seek_stateid;
434 loff_t seek_offset;
435 u32 seek_whence;
436
437 /* response */
438 u32 seek_eof;
439 loff_t seek_pos;
440};
441
431struct nfsd4_op { 442struct nfsd4_op {
432 int opnum; 443 int opnum;
433 __be32 status; 444 __be32 status;
@@ -473,6 +484,9 @@ struct nfsd4_op {
473 struct nfsd4_reclaim_complete reclaim_complete; 484 struct nfsd4_reclaim_complete reclaim_complete;
474 struct nfsd4_test_stateid test_stateid; 485 struct nfsd4_test_stateid test_stateid;
475 struct nfsd4_free_stateid free_stateid; 486 struct nfsd4_free_stateid free_stateid;
487
488 /* NFSv4.2 */
489 struct nfsd4_seek seek;
476 } u; 490 } u;
477 struct nfs4_replay * replay; 491 struct nfs4_replay * replay;
478}; 492};
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index a1e3064a8d99..026b0c042c40 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -110,6 +110,20 @@ enum nfs_opnum4 {
110 OP_DESTROY_CLIENTID = 57, 110 OP_DESTROY_CLIENTID = 57,
111 OP_RECLAIM_COMPLETE = 58, 111 OP_RECLAIM_COMPLETE = 58,
112 112
113 /* nfs42 */
114 OP_ALLOCATE = 59,
115 OP_COPY = 60,
116 OP_COPY_NOTIFY = 61,
117 OP_DEALLOCATE = 62,
118 OP_IO_ADVISE = 63,
119 OP_LAYOUTERROR = 64,
120 OP_LAYOUTSTATS = 65,
121 OP_OFFLOAD_CANCEL = 66,
122 OP_OFFLOAD_STATUS = 67,
123 OP_READ_PLUS = 68,
124 OP_SEEK = 69,
125 OP_WRITE_SAME = 70,
126
113 OP_ILLEGAL = 10044, 127 OP_ILLEGAL = 10044,
114}; 128};
115 129
@@ -117,10 +131,10 @@ enum nfs_opnum4 {
117Needs to be updated if more operations are defined in future.*/ 131Needs to be updated if more operations are defined in future.*/
118 132
119#define FIRST_NFS4_OP OP_ACCESS 133#define FIRST_NFS4_OP OP_ACCESS
120#define LAST_NFS4_OP OP_RECLAIM_COMPLETE 134#define LAST_NFS4_OP OP_WRITE_SAME
121#define LAST_NFS40_OP OP_RELEASE_LOCKOWNER 135#define LAST_NFS40_OP OP_RELEASE_LOCKOWNER
122#define LAST_NFS41_OP OP_RECLAIM_COMPLETE 136#define LAST_NFS41_OP OP_RECLAIM_COMPLETE
123#define LAST_NFS42_OP OP_RECLAIM_COMPLETE 137#define LAST_NFS42_OP OP_WRITE_SAME
124 138
125enum nfsstat4 { 139enum nfsstat4 {
126 NFS4_OK = 0, 140 NFS4_OK = 0,
@@ -235,10 +249,11 @@ enum nfsstat4 {
235 /* nfs42 */ 249 /* nfs42 */
236 NFS4ERR_PARTNER_NOTSUPP = 10088, 250 NFS4ERR_PARTNER_NOTSUPP = 10088,
237 NFS4ERR_PARTNER_NO_AUTH = 10089, 251 NFS4ERR_PARTNER_NO_AUTH = 10089,
238 NFS4ERR_METADATA_NOTSUPP = 10090, 252 NFS4ERR_UNION_NOTSUPP = 10090,
239 NFS4ERR_OFFLOAD_DENIED = 10091, 253 NFS4ERR_OFFLOAD_DENIED = 10091,
240 NFS4ERR_WRONG_LFS = 10092, 254 NFS4ERR_WRONG_LFS = 10092,
241 NFS4ERR_BADLABEL = 10093, 255 NFS4ERR_BADLABEL = 10093,
256 NFS4ERR_OFFLOAD_NO_REQS = 10094,
242}; 257};
243 258
244static inline bool seqid_mutating_err(u32 err) 259static inline bool seqid_mutating_err(u32 err)
@@ -535,4 +550,9 @@ struct nfs4_deviceid {
535 char data[NFS4_DEVICEID4_SIZE]; 550 char data[NFS4_DEVICEID4_SIZE];
536}; 551};
537 552
553enum data_content4 {
554 NFS4_CONTENT_DATA = 0,
555 NFS4_CONTENT_HOLE = 1,
556};
557
538#endif 558#endif
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 9d117f61d976..b97bf2ef996e 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -74,6 +74,8 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p
74 74
75#endif /* CONFIG_PROC_FS */ 75#endif /* CONFIG_PROC_FS */
76 76
77struct net;
78
77static inline struct proc_dir_entry *proc_net_mkdir( 79static inline struct proc_dir_entry *proc_net_mkdir(
78 struct net *net, const char *name, struct proc_dir_entry *parent) 80 struct net *net, const char *name, struct proc_dir_entry *parent)
79{ 81{
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index cf61ecd148e0..21678464883a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -280,7 +280,6 @@ struct svc_rqst {
280 bool rq_splice_ok; /* turned off in gss privacy 280 bool rq_splice_ok; /* turned off in gss privacy
281 * to prevent encrypting page 281 * to prevent encrypting page
282 * cache pages */ 282 * cache pages */
283 wait_queue_head_t rq_wait; /* synchronization */
284 struct task_struct *rq_task; /* service thread */ 283 struct task_struct *rq_task; /* service thread */
285}; 284};
286 285
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index cf47c313794e..584b6ef3a5e8 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -28,7 +28,8 @@
28#define NFSEXP_ALLSQUASH 0x0008 28#define NFSEXP_ALLSQUASH 0x0008
29#define NFSEXP_ASYNC 0x0010 29#define NFSEXP_ASYNC 0x0010
30#define NFSEXP_GATHERED_WRITES 0x0020 30#define NFSEXP_GATHERED_WRITES 0x0020
31/* 40 80 100 currently unused */ 31#define NFSEXP_NOREADDIRPLUS 0x0040
32/* 80 100 currently unused */
32#define NFSEXP_NOHIDE 0x0200 33#define NFSEXP_NOHIDE 0x0200
33#define NFSEXP_NOSUBTREECHECK 0x0400 34#define NFSEXP_NOSUBTREECHECK 0x0400
34#define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */ 35#define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */
@@ -47,7 +48,7 @@
47 */ 48 */
48#define NFSEXP_V4ROOT 0x10000 49#define NFSEXP_V4ROOT 0x10000
49/* All flags that we claim to support. (Note we don't support NOACL.) */ 50/* All flags that we claim to support. (Note we don't support NOACL.) */
50#define NFSEXP_ALLFLAGS 0x17E3F 51#define NFSEXP_ALLFLAGS 0x1FE7F
51 52
52/* The flags that may vary depending on security flavor: */ 53/* The flags that may vary depending on security flavor: */
53#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \ 54#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 1db5007ddbce..ca8a7958f4e6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
612 if (!rqstp) 612 if (!rqstp)
613 goto out_enomem; 613 goto out_enomem;
614 614
615 init_waitqueue_head(&rqstp->rq_wait);
616
617 serv->sv_nrthreads++; 615 serv->sv_nrthreads++;
618 spin_lock_bh(&pool->sp_lock); 616 spin_lock_bh(&pool->sp_lock);
619 pool->sp_nrthreads++; 617 pool->sp_nrthreads++;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 6666c6745858..c179ca2a5aa4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -346,20 +346,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
346 if (!svc_xprt_has_something_to_do(xprt)) 346 if (!svc_xprt_has_something_to_do(xprt))
347 return; 347 return;
348 348
349 cpu = get_cpu();
350 pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
351 put_cpu();
352
353 spin_lock_bh(&pool->sp_lock);
354
355 if (!list_empty(&pool->sp_threads) &&
356 !list_empty(&pool->sp_sockets))
357 printk(KERN_ERR
358 "svc_xprt_enqueue: "
359 "threads and transports both waiting??\n");
360
361 pool->sp_stats.packets++;
362
363 /* Mark transport as busy. It will remain in this state until 349 /* Mark transport as busy. It will remain in this state until
364 * the provider calls svc_xprt_received. We update XPT_BUSY 350 * the provider calls svc_xprt_received. We update XPT_BUSY
365 * atomically because it also guards against trying to enqueue 351 * atomically because it also guards against trying to enqueue
@@ -368,9 +354,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
368 if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { 354 if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
369 /* Don't enqueue transport while already enqueued */ 355 /* Don't enqueue transport while already enqueued */
370 dprintk("svc: transport %p busy, not enqueued\n", xprt); 356 dprintk("svc: transport %p busy, not enqueued\n", xprt);
371 goto out_unlock; 357 return;
372 } 358 }
373 359
360 cpu = get_cpu();
361 pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
362 spin_lock_bh(&pool->sp_lock);
363
364 pool->sp_stats.packets++;
365
374 if (!list_empty(&pool->sp_threads)) { 366 if (!list_empty(&pool->sp_threads)) {
375 rqstp = list_entry(pool->sp_threads.next, 367 rqstp = list_entry(pool->sp_threads.next,
376 struct svc_rqst, 368 struct svc_rqst,
@@ -382,18 +374,23 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
382 printk(KERN_ERR 374 printk(KERN_ERR
383 "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", 375 "svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
384 rqstp, rqstp->rq_xprt); 376 rqstp, rqstp->rq_xprt);
385 rqstp->rq_xprt = xprt; 377 /* Note the order of the following 3 lines:
378 * We want to assign xprt to rqstp->rq_xprt only _after_
379 * we've woken up the process, so that we don't race with
380 * the lockless check in svc_get_next_xprt().
381 */
386 svc_xprt_get(xprt); 382 svc_xprt_get(xprt);
383 wake_up_process(rqstp->rq_task);
384 rqstp->rq_xprt = xprt;
387 pool->sp_stats.threads_woken++; 385 pool->sp_stats.threads_woken++;
388 wake_up(&rqstp->rq_wait);
389 } else { 386 } else {
390 dprintk("svc: transport %p put into queue\n", xprt); 387 dprintk("svc: transport %p put into queue\n", xprt);
391 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); 388 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
392 pool->sp_stats.sockets_queued++; 389 pool->sp_stats.sockets_queued++;
393 } 390 }
394 391
395out_unlock:
396 spin_unlock_bh(&pool->sp_lock); 392 spin_unlock_bh(&pool->sp_lock);
393 put_cpu();
397} 394}
398 395
399/* 396/*
@@ -509,7 +506,7 @@ void svc_wake_up(struct svc_serv *serv)
509 svc_thread_dequeue(pool, rqstp); 506 svc_thread_dequeue(pool, rqstp);
510 rqstp->rq_xprt = NULL; 507 rqstp->rq_xprt = NULL;
511 */ 508 */
512 wake_up(&rqstp->rq_wait); 509 wake_up_process(rqstp->rq_task);
513 } else 510 } else
514 pool->sp_task_pending = 1; 511 pool->sp_task_pending = 1;
515 spin_unlock_bh(&pool->sp_lock); 512 spin_unlock_bh(&pool->sp_lock);
@@ -628,8 +625,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
628{ 625{
629 struct svc_xprt *xprt; 626 struct svc_xprt *xprt;
630 struct svc_pool *pool = rqstp->rq_pool; 627 struct svc_pool *pool = rqstp->rq_pool;
631 DECLARE_WAITQUEUE(wait, current); 628 long time_left = 0;
632 long time_left;
633 629
634 /* Normally we will wait up to 5 seconds for any required 630 /* Normally we will wait up to 5 seconds for any required
635 * cache information to be provided. 631 * cache information to be provided.
@@ -651,40 +647,32 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
651 } else { 647 } else {
652 if (pool->sp_task_pending) { 648 if (pool->sp_task_pending) {
653 pool->sp_task_pending = 0; 649 pool->sp_task_pending = 0;
654 spin_unlock_bh(&pool->sp_lock); 650 xprt = ERR_PTR(-EAGAIN);
655 return ERR_PTR(-EAGAIN); 651 goto out;
656 } 652 }
657 /* No data pending. Go to sleep */
658 svc_thread_enqueue(pool, rqstp);
659
660 /* 653 /*
661 * We have to be able to interrupt this wait 654 * We have to be able to interrupt this wait
662 * to bring down the daemons ... 655 * to bring down the daemons ...
663 */ 656 */
664 set_current_state(TASK_INTERRUPTIBLE); 657 set_current_state(TASK_INTERRUPTIBLE);
665 658
666 /* 659 /* No data pending. Go to sleep */
667 * checking kthread_should_stop() here allows us to avoid 660 svc_thread_enqueue(pool, rqstp);
668 * locking and signalling when stopping kthreads that call
669 * svc_recv. If the thread has already been woken up, then
670 * we can exit here without sleeping. If not, then it
671 * it'll be woken up quickly during the schedule_timeout
672 */
673 if (kthread_should_stop()) {
674 set_current_state(TASK_RUNNING);
675 spin_unlock_bh(&pool->sp_lock);
676 return ERR_PTR(-EINTR);
677 }
678
679 add_wait_queue(&rqstp->rq_wait, &wait);
680 spin_unlock_bh(&pool->sp_lock); 661 spin_unlock_bh(&pool->sp_lock);
681 662
682 time_left = schedule_timeout(timeout); 663 if (!(signalled() || kthread_should_stop())) {
664 time_left = schedule_timeout(timeout);
665 __set_current_state(TASK_RUNNING);
683 666
684 try_to_freeze(); 667 try_to_freeze();
668
669 xprt = rqstp->rq_xprt;
670 if (xprt != NULL)
671 return xprt;
672 } else
673 __set_current_state(TASK_RUNNING);
685 674
686 spin_lock_bh(&pool->sp_lock); 675 spin_lock_bh(&pool->sp_lock);
687 remove_wait_queue(&rqstp->rq_wait, &wait);
688 if (!time_left) 676 if (!time_left)
689 pool->sp_stats.threads_timedout++; 677 pool->sp_stats.threads_timedout++;
690 678
@@ -699,6 +687,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
699 return ERR_PTR(-EAGAIN); 687 return ERR_PTR(-EAGAIN);
700 } 688 }
701 } 689 }
690out:
702 spin_unlock_bh(&pool->sp_lock); 691 spin_unlock_bh(&pool->sp_lock);
703 return xprt; 692 return xprt;
704} 693}
@@ -744,7 +733,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
744 svc_add_new_temp_xprt(serv, newxpt); 733 svc_add_new_temp_xprt(serv, newxpt);
745 else 734 else
746 module_put(xprt->xpt_class->xcl_owner); 735 module_put(xprt->xpt_class->xcl_owner);
747 } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { 736 } else {
748 /* XPT_DATA|XPT_DEFERRED case: */ 737 /* XPT_DATA|XPT_DEFERRED case: */
749 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", 738 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
750 rqstp, rqstp->rq_pool->sp_id, xprt, 739 rqstp, rqstp->rq_pool->sp_id, xprt,
@@ -781,10 +770,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
781 printk(KERN_ERR 770 printk(KERN_ERR
782 "svc_recv: service %p, transport not NULL!\n", 771 "svc_recv: service %p, transport not NULL!\n",
783 rqstp); 772 rqstp);
784 if (waitqueue_active(&rqstp->rq_wait))
785 printk(KERN_ERR
786 "svc_recv: service %p, wait queue active!\n",
787 rqstp);
788 773
789 err = svc_alloc_arg(rqstp); 774 err = svc_alloc_arg(rqstp);
790 if (err) 775 if (err)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c24a8ff33f8f..3f959c681885 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -312,19 +312,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
312} 312}
313 313
314/* 314/*
315 * Check input queue length
316 */
317static int svc_recv_available(struct svc_sock *svsk)
318{
319 struct socket *sock = svsk->sk_sock;
320 int avail, err;
321
322 err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
323
324 return (err >= 0)? avail : err;
325}
326
327/*
328 * Generic recvfrom routine. 315 * Generic recvfrom routine.
329 */ 316 */
330static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, 317static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
@@ -339,8 +326,14 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
339 326
340 rqstp->rq_xprt_hlen = 0; 327 rqstp->rq_xprt_hlen = 0;
341 328
329 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
342 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, 330 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
343 msg.msg_flags); 331 msg.msg_flags);
332 /* If we read a full record, then assume there may be more
333 * data to read (stream based sockets only!)
334 */
335 if (len == buflen)
336 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
344 337
345 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", 338 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
346 svsk, iov[0].iov_base, iov[0].iov_len, len); 339 svsk, iov[0].iov_base, iov[0].iov_len, len);
@@ -980,8 +973,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
980 unsigned int want; 973 unsigned int want;
981 int len; 974 int len;
982 975
983 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
984
985 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 976 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
986 struct kvec iov; 977 struct kvec iov;
987 978
@@ -1036,7 +1027,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
1036 "%s: Got unrecognized reply: " 1027 "%s: Got unrecognized reply: "
1037 "calldir 0x%x xpt_bc_xprt %p xid %08x\n", 1028 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
1038 __func__, ntohl(calldir), 1029 __func__, ntohl(calldir),
1039 bc_xprt, xid); 1030 bc_xprt, ntohl(xid));
1040 return -EAGAIN; 1031 return -EAGAIN;
1041 } 1032 }
1042 1033
@@ -1073,8 +1064,6 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
1073static void svc_tcp_fragment_received(struct svc_sock *svsk) 1064static void svc_tcp_fragment_received(struct svc_sock *svsk)
1074{ 1065{
1075 /* If we have more data, signal svc_xprt_enqueue() to try again */ 1066 /* If we have more data, signal svc_xprt_enqueue() to try again */
1076 if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
1077 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1078 dprintk("svc: TCP %s record (%d bytes)\n", 1067 dprintk("svc: TCP %s record (%d bytes)\n",
1079 svc_sock_final_rec(svsk) ? "final" : "nonfinal", 1068 svc_sock_final_rec(svsk) ? "final" : "nonfinal",
1080 svc_sock_reclen(svsk)); 1069 svc_sock_reclen(svsk));
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 374feb44afea..4e618808bc98 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
91 .xcl_name = "rdma", 91 .xcl_name = "rdma",
92 .xcl_owner = THIS_MODULE, 92 .xcl_owner = THIS_MODULE,
93 .xcl_ops = &svc_rdma_ops, 93 .xcl_ops = &svc_rdma_ops,
94 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, 94 .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
95 .xcl_ident = XPRT_TRANSPORT_RDMA, 95 .xcl_ident = XPRT_TRANSPORT_RDMA,
96}; 96};
97 97
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c419498b8f46..ac7fc9a31342 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,6 +51,7 @@
51#include <linux/sunrpc/clnt.h> /* rpc_xprt */ 51#include <linux/sunrpc/clnt.h> /* rpc_xprt */
52#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ 52#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
53#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ 53#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
54#include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */
54 55
55#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ 56#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
56#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ 57#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
@@ -392,4 +393,10 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
392/* Workqueue created in svc_rdma.c */ 393/* Workqueue created in svc_rdma.c */
393extern struct workqueue_struct *svc_rdma_wq; 394extern struct workqueue_struct *svc_rdma_wq;
394 395
396#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
397#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
398#else
399#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
400#endif
401
395#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ 402#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */