aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile5
-rw-r--r--fs/compat.c2
-rw-r--r--fs/lockd/host.c1
-rw-r--r--fs/lockd/mon.c1
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svclock.c31
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/nfs/Kconfig8
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/callback_proc.c8
-rw-r--r--fs/nfs/client.c11
-rw-r--r--fs/nfs/dns_resolve.c6
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/inode.c3
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/nfs4filelayout.c280
-rw-r--r--fs/nfs/nfs4filelayout.h94
-rw-r--r--fs/nfs/nfs4filelayoutdev.c448
-rw-r--r--fs/nfs/nfs4proc.c218
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/nfs4xdr.c360
-rw-r--r--fs/nfs/pnfs.c783
-rw-r--r--fs/nfs/pnfs.h189
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfsd/Kconfig12
-rw-r--r--fs/nfsd/export.c73
-rw-r--r--fs/nfsd/nfs4callback.c245
-rw-r--r--fs/nfsd/nfs4idmap.c105
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/nfsd/nfs4state.c493
-rw-r--r--fs/nfsd/nfs4xdr.c18
-rw-r--r--fs/nfsd/nfsctl.c26
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/nfsd/state.h52
37 files changed, 3009 insertions, 503 deletions
diff --git a/fs/Makefile b/fs/Makefile
index e6ec1d309b1d..26956fcec917 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -29,10 +29,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o
29obj-$(CONFIG_AIO) += aio.o 29obj-$(CONFIG_AIO) += aio.o
30obj-$(CONFIG_FILE_LOCKING) += locks.o 30obj-$(CONFIG_FILE_LOCKING) += locks.o
31obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 31obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
32 32obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o
33nfsd-$(CONFIG_NFSD) := nfsctl.o
34obj-y += $(nfsd-y) $(nfsd-m)
35
36obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o 33obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
37obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o 34obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
38obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o 35obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
diff --git a/fs/compat.c b/fs/compat.c
index 0644a154672b..f03abdadc401 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1963,7 +1963,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1963} 1963}
1964#endif /* HAVE_SET_RESTORE_SIGMASK */ 1964#endif /* HAVE_SET_RESTORE_SIGMASK */
1965 1965
1966#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) 1966#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED)
1967/* Stuff for NFS server syscalls... */ 1967/* Stuff for NFS server syscalls... */
1968struct compat_nfsctl_svc { 1968struct compat_nfsctl_svc {
1969 u16 svc32_port; 1969 u16 svc32_port;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index bb464d12104c..25e21e4023b2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -353,6 +353,7 @@ nlm_bind_host(struct nlm_host *host)
353 .to_retries = 5U, 353 .to_retries = 5U,
354 }; 354 };
355 struct rpc_create_args args = { 355 struct rpc_create_args args = {
356 .net = &init_net,
356 .protocol = host->h_proto, 357 .protocol = host->h_proto,
357 .address = nlm_addr(host), 358 .address = nlm_addr(host),
358 .addrsize = host->h_addrlen, 359 .addrsize = host->h_addrlen,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e3015464fbab..e0c918949644 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -69,6 +69,7 @@ static struct rpc_clnt *nsm_create(void)
69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK), 69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
70 }; 70 };
71 struct rpc_create_args args = { 71 struct rpc_create_args args = {
72 .net = &init_net,
72 .protocol = XPRT_TRANSPORT_UDP, 73 .protocol = XPRT_TRANSPORT_UDP,
73 .address = (struct sockaddr *)&sin, 74 .address = (struct sockaddr *)&sin,
74 .addrsize = sizeof(sin), 75 .addrsize = sizeof(sin),
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f1bacf1a0391..b13aabc12298 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -206,7 +206,7 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name,
206 206
207 xprt = svc_find_xprt(serv, name, family, 0); 207 xprt = svc_find_xprt(serv, name, family, 0);
208 if (xprt == NULL) 208 if (xprt == NULL)
209 return svc_create_xprt(serv, name, family, port, 209 return svc_create_xprt(serv, name, &init_net, family, port,
210 SVC_SOCK_DEFAULTS); 210 SVC_SOCK_DEFAULTS);
211 svc_xprt_put(xprt); 211 svc_xprt_put(xprt);
212 return 0; 212 return 0;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 031c6569a134..a336e832475d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -230,9 +230,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
230 230
231static void nlm4svc_callback_release(void *data) 231static void nlm4svc_callback_release(void *data)
232{ 232{
233 lock_kernel();
234 nlm_release_call(data); 233 nlm_release_call(data);
235 unlock_kernel();
236} 234}
237 235
238static const struct rpc_call_ops nlm4svc_callback_ops = { 236static const struct rpc_call_ops nlm4svc_callback_ops = {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 84055d31bfc5..6f1ef000975a 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -52,12 +52,13 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
52 * The list of blocked locks to retry 52 * The list of blocked locks to retry
53 */ 53 */
54static LIST_HEAD(nlm_blocked); 54static LIST_HEAD(nlm_blocked);
55static DEFINE_SPINLOCK(nlm_blocked_lock);
55 56
56/* 57/*
57 * Insert a blocked lock into the global list 58 * Insert a blocked lock into the global list
58 */ 59 */
59static void 60static void
60nlmsvc_insert_block(struct nlm_block *block, unsigned long when) 61nlmsvc_insert_block_locked(struct nlm_block *block, unsigned long when)
61{ 62{
62 struct nlm_block *b; 63 struct nlm_block *b;
63 struct list_head *pos; 64 struct list_head *pos;
@@ -87,6 +88,13 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
87 block->b_when = when; 88 block->b_when = when;
88} 89}
89 90
91static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
92{
93 spin_lock(&nlm_blocked_lock);
94 nlmsvc_insert_block_locked(block, when);
95 spin_unlock(&nlm_blocked_lock);
96}
97
90/* 98/*
91 * Remove a block from the global list 99 * Remove a block from the global list
92 */ 100 */
@@ -94,7 +102,9 @@ static inline void
94nlmsvc_remove_block(struct nlm_block *block) 102nlmsvc_remove_block(struct nlm_block *block)
95{ 103{
96 if (!list_empty(&block->b_list)) { 104 if (!list_empty(&block->b_list)) {
105 spin_lock(&nlm_blocked_lock);
97 list_del_init(&block->b_list); 106 list_del_init(&block->b_list);
107 spin_unlock(&nlm_blocked_lock);
98 nlmsvc_release_block(block); 108 nlmsvc_release_block(block);
99 } 109 }
100} 110}
@@ -651,7 +661,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
651 struct nlm_block *block; 661 struct nlm_block *block;
652 int rc = -ENOENT; 662 int rc = -ENOENT;
653 663
654 lock_kernel(); 664 spin_lock(&nlm_blocked_lock);
655 list_for_each_entry(block, &nlm_blocked, b_list) { 665 list_for_each_entry(block, &nlm_blocked, b_list) {
656 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { 666 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
657 dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n", 667 dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
@@ -665,13 +675,13 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
665 } else if (result == 0) 675 } else if (result == 0)
666 block->b_granted = 1; 676 block->b_granted = 1;
667 677
668 nlmsvc_insert_block(block, 0); 678 nlmsvc_insert_block_locked(block, 0);
669 svc_wake_up(block->b_daemon); 679 svc_wake_up(block->b_daemon);
670 rc = 0; 680 rc = 0;
671 break; 681 break;
672 } 682 }
673 } 683 }
674 unlock_kernel(); 684 spin_unlock(&nlm_blocked_lock);
675 if (rc == -ENOENT) 685 if (rc == -ENOENT)
676 printk(KERN_WARNING "lockd: grant for unknown block\n"); 686 printk(KERN_WARNING "lockd: grant for unknown block\n");
677 return rc; 687 return rc;
@@ -803,7 +813,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
803 813
804 dprintk("lockd: GRANT_MSG RPC callback\n"); 814 dprintk("lockd: GRANT_MSG RPC callback\n");
805 815
806 lock_kernel(); 816 spin_lock(&nlm_blocked_lock);
807 /* if the block is not on a list at this point then it has 817 /* if the block is not on a list at this point then it has
808 * been invalidated. Don't try to requeue it. 818 * been invalidated. Don't try to requeue it.
809 * 819 *
@@ -825,19 +835,20 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
825 /* Call was successful, now wait for client callback */ 835 /* Call was successful, now wait for client callback */
826 timeout = 60 * HZ; 836 timeout = 60 * HZ;
827 } 837 }
828 nlmsvc_insert_block(block, timeout); 838 nlmsvc_insert_block_locked(block, timeout);
829 svc_wake_up(block->b_daemon); 839 svc_wake_up(block->b_daemon);
830out: 840out:
831 unlock_kernel(); 841 spin_unlock(&nlm_blocked_lock);
832} 842}
833 843
844/*
845 * FIXME: nlmsvc_release_block() grabs a mutex. This is not allowed for an
846 * .rpc_release rpc_call_op
847 */
834static void nlmsvc_grant_release(void *data) 848static void nlmsvc_grant_release(void *data)
835{ 849{
836 struct nlm_rqst *call = data; 850 struct nlm_rqst *call = data;
837
838 lock_kernel();
839 nlmsvc_release_block(call->a_block); 851 nlmsvc_release_block(call->a_block);
840 unlock_kernel();
841} 852}
842 853
843static const struct rpc_call_ops nlmsvc_grant_ops = { 854static const struct rpc_call_ops nlmsvc_grant_ops = {
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0f2ab741ae7c..c3069f38d602 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -260,9 +260,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
260 260
261static void nlmsvc_callback_release(void *data) 261static void nlmsvc_callback_release(void *data)
262{ 262{
263 lock_kernel();
264 nlm_release_call(data); 263 nlm_release_call(data);
265 unlock_kernel();
266} 264}
267 265
268static const struct rpc_call_ops nlmsvc_callback_ops = { 266static const struct rpc_call_ops nlmsvc_callback_ops = {
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 5c55c26af165..fd667652c502 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -77,13 +77,17 @@ config NFS_V4
77 77
78config NFS_V4_1 78config NFS_V4_1
79 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" 79 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
80 depends on NFS_V4 && EXPERIMENTAL 80 depends on NFS_FS && NFS_V4 && EXPERIMENTAL
81 select PNFS_FILE_LAYOUT
81 help 82 help
82 This option enables support for minor version 1 of the NFSv4 protocol 83 This option enables support for minor version 1 of the NFSv4 protocol
83 (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. 84 (RFC 5661) in the kernel's NFS client.
84 85
85 If unsure, say N. 86 If unsure, say N.
86 87
88config PNFS_FILE_LAYOUT
89 tristate
90
87config ROOT_NFS 91config ROOT_NFS
88 bool "Root file system on NFS" 92 bool "Root file system on NFS"
89 depends on NFS_FS=y && IP_PNP 93 depends on NFS_FS=y && IP_PNP
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index da7fda639eac..4776ff9e3814 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
15 delegation.o idmap.o \ 15 delegation.o idmap.o \
16 callback.o callback_xdr.o callback_proc.o \ 16 callback.o callback_xdr.o callback_proc.o \
17 nfs4namespace.o 17 nfs4namespace.o
18nfs-$(CONFIG_NFS_V4_1) += pnfs.o
18nfs-$(CONFIG_SYSCTL) += sysctl.o 19nfs-$(CONFIG_SYSCTL) += sysctl.o
19nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o 20nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
21
22obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
23nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index e17b49e2eabd..aeec017fe814 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -109,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv)
109{ 109{
110 int ret; 110 int ret;
111 111
112 ret = svc_create_xprt(serv, "tcp", PF_INET, 112 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
114 if (ret <= 0) 114 if (ret <= 0)
115 goto out_err; 115 goto out_err;
@@ -117,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv)
117 dprintk("NFS: Callback listener port = %u (af %u)\n", 117 dprintk("NFS: Callback listener port = %u (af %u)\n",
118 nfs_callback_tcpport, PF_INET); 118 nfs_callback_tcpport, PF_INET);
119 119
120 ret = svc_create_xprt(serv, "tcp", PF_INET6, 120 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
122 if (ret > 0) { 122 if (ret > 0) {
123 nfs_callback_tcpport6 = ret; 123 nfs_callback_tcpport6 = ret;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 930d10fecdaf..2950fca0c61b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -118,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
118 if (delegation == NULL) 118 if (delegation == NULL)
119 return 0; 119 return 0;
120 120
121 /* seqid is 4-bytes long */ 121 if (stateid->stateid.seqid != 0)
122 if (((u32 *) &stateid->data)[0] != 0)
123 return 0; 122 return 0;
124 if (memcmp(&delegation->stateid.data[4], &stateid->data[4], 123 if (memcmp(&delegation->stateid.stateid.other,
125 sizeof(stateid->data)-4)) 124 &stateid->stateid.other,
125 NFS4_STATEID_OTHER_SIZE))
126 return 0; 126 return 0;
127 127
128 return 1; 128 return 1;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a882785eba41..0870d0d4efc0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -48,6 +48,7 @@
48#include "iostat.h" 48#include "iostat.h"
49#include "internal.h" 49#include "internal.h"
50#include "fscache.h" 50#include "fscache.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_CLIENT 53#define NFSDBG_FACILITY NFSDBG_CLIENT
53 54
@@ -155,7 +156,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
155 cred = rpc_lookup_machine_cred(); 156 cred = rpc_lookup_machine_cred();
156 if (!IS_ERR(cred)) 157 if (!IS_ERR(cred))
157 clp->cl_machine_cred = cred; 158 clp->cl_machine_cred = cred;
158 159#if defined(CONFIG_NFS_V4_1)
160 INIT_LIST_HEAD(&clp->cl_layouts);
161#endif
159 nfs_fscache_get_client_cookie(clp); 162 nfs_fscache_get_client_cookie(clp);
160 163
161 return clp; 164 return clp;
@@ -252,6 +255,7 @@ void nfs_put_client(struct nfs_client *clp)
252 nfs_free_client(clp); 255 nfs_free_client(clp);
253 } 256 }
254} 257}
258EXPORT_SYMBOL_GPL(nfs_put_client);
255 259
256#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 260#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
257/* 261/*
@@ -601,6 +605,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
601{ 605{
602 struct rpc_clnt *clnt = NULL; 606 struct rpc_clnt *clnt = NULL;
603 struct rpc_create_args args = { 607 struct rpc_create_args args = {
608 .net = &init_net,
604 .protocol = clp->cl_proto, 609 .protocol = clp->cl_proto,
605 .address = (struct sockaddr *)&clp->cl_addr, 610 .address = (struct sockaddr *)&clp->cl_addr,
606 .addrsize = clp->cl_addrlen, 611 .addrsize = clp->cl_addrlen,
@@ -900,6 +905,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
900 if (server->wsize > NFS_MAX_FILE_IO_SIZE) 905 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
901 server->wsize = NFS_MAX_FILE_IO_SIZE; 906 server->wsize = NFS_MAX_FILE_IO_SIZE;
902 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 907 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
908 set_pnfs_layoutdriver(server, fsinfo->layouttype);
909
903 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); 910 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
904 911
905 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); 912 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
@@ -939,6 +946,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
939 } 946 }
940 947
941 fsinfo.fattr = fattr; 948 fsinfo.fattr = fattr;
949 fsinfo.layouttype = 0;
942 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); 950 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
943 if (error < 0) 951 if (error < 0)
944 goto out_error; 952 goto out_error;
@@ -1021,6 +1029,7 @@ void nfs_free_server(struct nfs_server *server)
1021{ 1029{
1022 dprintk("--> nfs_free_server()\n"); 1030 dprintk("--> nfs_free_server()\n");
1023 1031
1032 unset_pnfs_layoutdriver(server);
1024 spin_lock(&nfs_client_lock); 1033 spin_lock(&nfs_client_lock);
1025 list_del(&server->client_link); 1034 list_del(&server->client_link);
1026 list_del(&server->master_link); 1035 list_del(&server->master_link);
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index dba50a5625db..a6e711ad130f 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
167 return 0; 167 return 0;
168 } 168 }
169 item = container_of(h, struct nfs_dns_ent, h); 169 item = container_of(h, struct nfs_dns_ent, h);
170 ttl = (long)item->h.expiry_time - (long)get_seconds(); 170 ttl = item->h.expiry_time - seconds_since_boot();
171 if (ttl < 0) 171 if (ttl < 0)
172 ttl = 0; 172 ttl = 0;
173 173
@@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
239 ttl = get_expiry(&buf); 239 ttl = get_expiry(&buf);
240 if (ttl == 0) 240 if (ttl == 0)
241 goto out; 241 goto out;
242 key.h.expiry_time = ttl + get_seconds(); 242 key.h.expiry_time = ttl + seconds_since_boot();
243 243
244 ret = -ENOMEM; 244 ret = -ENOMEM;
245 item = nfs_dns_lookup(cd, &key); 245 item = nfs_dns_lookup(cd, &key);
@@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd,
301 goto out_err; 301 goto out_err;
302 ret = -ETIMEDOUT; 302 ret = -ETIMEDOUT;
303 if (!test_bit(CACHE_VALID, &(*item)->h.flags) 303 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
304 || (*item)->h.expiry_time < get_seconds() 304 || (*item)->h.expiry_time < seconds_since_boot()
305 || cd->flush_time > (*item)->h.last_refresh) 305 || cd->flush_time > (*item)->h.last_refresh)
306 goto out_put; 306 goto out_put;
307 ret = -ENOENT; 307 ret = -ENOENT;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e18c31e08a28..e756075637b0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -36,6 +36,7 @@
36#include "internal.h" 36#include "internal.h"
37#include "iostat.h" 37#include "iostat.h"
38#include "fscache.h" 38#include "fscache.h"
39#include "pnfs.h"
39 40
40#define NFSDBG_FACILITY NFSDBG_FILE 41#define NFSDBG_FACILITY NFSDBG_FILE
41 42
@@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
386 file->f_path.dentry->d_name.name, 387 file->f_path.dentry->d_name.name,
387 mapping->host->i_ino, len, (long long) pos); 388 mapping->host->i_ino, len, (long long) pos);
388 389
390 pnfs_update_layout(mapping->host,
391 nfs_file_open_context(file),
392 IOMODE_RW);
393
389start: 394start:
390 /* 395 /*
391 * Prevent starvation issues if someone is doing a consistency 396 * Prevent starvation issues if someone is doing a consistency
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6eec28656415..314f57164602 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -48,6 +48,7 @@
48#include "internal.h" 48#include "internal.h"
49#include "fscache.h" 49#include "fscache.h"
50#include "dns_resolve.h" 50#include "dns_resolve.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_VFS 53#define NFSDBG_FACILITY NFSDBG_VFS
53 54
@@ -1410,6 +1411,7 @@ void nfs4_evict_inode(struct inode *inode)
1410{ 1411{
1411 truncate_inode_pages(&inode->i_data, 0); 1412 truncate_inode_pages(&inode->i_data, 0);
1412 end_writeback(inode); 1413 end_writeback(inode);
1414 pnfs_destroy_layout(NFS_I(inode));
1413 /* If we are holding a delegation, return it! */ 1415 /* If we are holding a delegation, return it! */
1414 nfs_inode_return_delegation_noreclaim(inode); 1416 nfs_inode_return_delegation_noreclaim(inode);
1415 /* First call standard NFS clear_inode() code */ 1417 /* First call standard NFS clear_inode() code */
@@ -1447,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1447 nfsi->delegation = NULL; 1449 nfsi->delegation = NULL;
1448 nfsi->delegation_state = 0; 1450 nfsi->delegation_state = 0;
1449 init_rwsem(&nfsi->rwsem); 1451 init_rwsem(&nfsi->rwsem);
1452 nfsi->layout = NULL;
1450#endif 1453#endif
1451} 1454}
1452 1455
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d610203d95c6..eceafe74f473 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
153 .rpc_resp = &result, 153 .rpc_resp = &result,
154 }; 154 };
155 struct rpc_create_args args = { 155 struct rpc_create_args args = {
156 .net = &init_net,
156 .protocol = info->protocol, 157 .protocol = info->protocol,
157 .address = info->sap, 158 .address = info->sap,
158 .addrsize = info->salen, 159 .addrsize = info->salen,
@@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
224 .to_retries = 2, 225 .to_retries = 2,
225 }; 226 };
226 struct rpc_create_args args = { 227 struct rpc_create_args args = {
228 .net = &init_net,
227 .protocol = IPPROTO_UDP, 229 .protocol = IPPROTO_UDP,
228 .address = info->sap, 230 .address = info->sap,
229 .addrsize = info->salen, 231 .addrsize = info->salen,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
new file mode 100644
index 000000000000..2e92f0d8d654
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.c
@@ -0,0 +1,280 @@
1/*
2 * Module for the pnfs nfs4 file layout driver.
3 * Defines all I/O and Policy interface operations, plus code
4 * to register itself with the pNFS client.
5 *
6 * Copyright (c) 2002
7 * The Regents of the University of Michigan
8 * All Rights Reserved
9 *
10 * Dean Hildebrand <dhildebz@umich.edu>
11 *
12 * Permission is granted to use, copy, create derivative works, and
13 * redistribute this software and such derivative works for any purpose,
14 * so long as the name of the University of Michigan is not used in
15 * any advertising or publicity pertaining to the use or distribution
16 * of this software without specific, written prior authorization. If
17 * the above copyright notice or any other identification of the
18 * University of Michigan is included in any copy of any portion of
19 * this software, then the disclaimer below must also be included.
20 *
21 * This software is provided as is, without representation or warranty
22 * of any kind either express or implied, including without limitation
23 * the implied warranties of merchantability, fitness for a particular
24 * purpose, or noninfringement. The Regents of the University of
25 * Michigan shall not be liable for any damages, including special,
26 * indirect, incidental, or consequential damages, with respect to any
27 * claim arising out of or in connection with the use of the software,
28 * even if it has been or is hereafter advised of the possibility of
29 * such damages.
30 */
31
32#include <linux/nfs_fs.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
41MODULE_DESCRIPTION("The NFSv4 file layout driver");
42
43static int
44filelayout_set_layoutdriver(struct nfs_server *nfss)
45{
46 int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client,
47 nfs4_fl_free_deviceid_callback);
48 if (status) {
49 printk(KERN_WARNING "%s: deviceid cache could not be "
50 "initialized\n", __func__);
51 return status;
52 }
53 dprintk("%s: deviceid cache has been initialized successfully\n",
54 __func__);
55 return 0;
56}
57
58/* Clear out the layout by destroying its device list */
59static int
60filelayout_clear_layoutdriver(struct nfs_server *nfss)
61{
62 dprintk("--> %s\n", __func__);
63
64 if (nfss->nfs_client->cl_devid_cache)
65 pnfs_put_deviceid_cache(nfss->nfs_client);
66 return 0;
67}
68
69/*
70 * filelayout_check_layout()
71 *
72 * Make sure layout segment parameters are sane WRT the device.
73 * At this point no generic layer initialization of the lseg has occurred,
74 * and nothing has been added to the layout_hdr cache.
75 *
76 */
77static int
78filelayout_check_layout(struct pnfs_layout_hdr *lo,
79 struct nfs4_filelayout_segment *fl,
80 struct nfs4_layoutget_res *lgr,
81 struct nfs4_deviceid *id)
82{
83 struct nfs4_file_layout_dsaddr *dsaddr;
84 int status = -EINVAL;
85 struct nfs_server *nfss = NFS_SERVER(lo->inode);
86
87 dprintk("--> %s\n", __func__);
88
89 if (fl->pattern_offset > lgr->range.offset) {
90 dprintk("%s pattern_offset %lld to large\n",
91 __func__, fl->pattern_offset);
92 goto out;
93 }
94
95 if (fl->stripe_unit % PAGE_SIZE) {
96 dprintk("%s Stripe unit (%u) not page aligned\n",
97 __func__, fl->stripe_unit);
98 goto out;
99 }
100
101 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
103 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->inode, id);
105 if (dsaddr == NULL)
106 goto out;
107 }
108 fl->dsaddr = dsaddr;
109
110 if (fl->first_stripe_index < 0 ||
111 fl->first_stripe_index >= dsaddr->stripe_count) {
112 dprintk("%s Bad first_stripe_index %d\n",
113 __func__, fl->first_stripe_index);
114 goto out_put;
115 }
116
117 if ((fl->stripe_type == STRIPE_SPARSE &&
118 fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
119 (fl->stripe_type == STRIPE_DENSE &&
120 fl->num_fh != dsaddr->stripe_count)) {
121 dprintk("%s num_fh %u not valid for given packing\n",
122 __func__, fl->num_fh);
123 goto out_put;
124 }
125
126 if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
127 dprintk("%s Stripe unit (%u) not aligned with rsize %u "
128 "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
129 nfss->wsize);
130 }
131
132 status = 0;
133out:
134 dprintk("--> %s returns %d\n", __func__, status);
135 return status;
136out_put:
137 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid);
138 goto out;
139}
140
141static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
142{
143 int i;
144
145 for (i = 0; i < fl->num_fh; i++) {
146 if (!fl->fh_array[i])
147 break;
148 kfree(fl->fh_array[i]);
149 }
150 kfree(fl->fh_array);
151 fl->fh_array = NULL;
152}
153
154static void
155_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
156{
157 filelayout_free_fh_array(fl);
158 kfree(fl);
159}
160
161static int
162filelayout_decode_layout(struct pnfs_layout_hdr *flo,
163 struct nfs4_filelayout_segment *fl,
164 struct nfs4_layoutget_res *lgr,
165 struct nfs4_deviceid *id)
166{
167 uint32_t *p = (uint32_t *)lgr->layout.buf;
168 uint32_t nfl_util;
169 int i;
170
171 dprintk("%s: set_layout_map Begin\n", __func__);
172
173 memcpy(id, p, sizeof(*id));
174 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
175 print_deviceid(id);
176
177 nfl_util = be32_to_cpup(p++);
178 if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
179 fl->commit_through_mds = 1;
180 if (nfl_util & NFL4_UFLG_DENSE)
181 fl->stripe_type = STRIPE_DENSE;
182 else
183 fl->stripe_type = STRIPE_SPARSE;
184 fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
185
186 fl->first_stripe_index = be32_to_cpup(p++);
187 p = xdr_decode_hyper(p, &fl->pattern_offset);
188 fl->num_fh = be32_to_cpup(p++);
189
190 dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n",
191 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
192 fl->pattern_offset);
193
194 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
195 GFP_KERNEL);
196 if (!fl->fh_array)
197 return -ENOMEM;
198
199 for (i = 0; i < fl->num_fh; i++) {
200 /* Do we want to use a mempool here? */
201 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
202 if (!fl->fh_array[i]) {
203 filelayout_free_fh_array(fl);
204 return -ENOMEM;
205 }
206 fl->fh_array[i]->size = be32_to_cpup(p++);
207 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
208 printk(KERN_ERR "Too big fh %d received %d\n",
209 i, fl->fh_array[i]->size);
210 filelayout_free_fh_array(fl);
211 return -EIO;
212 }
213 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
214 p += XDR_QUADLEN(fl->fh_array[i]->size);
215 dprintk("DEBUG: %s: fh len %d\n", __func__,
216 fl->fh_array[i]->size);
217 }
218
219 return 0;
220}
221
222static struct pnfs_layout_segment *
223filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
224 struct nfs4_layoutget_res *lgr)
225{
226 struct nfs4_filelayout_segment *fl;
227 int rc;
228 struct nfs4_deviceid id;
229
230 dprintk("--> %s\n", __func__);
231 fl = kzalloc(sizeof(*fl), GFP_KERNEL);
232 if (!fl)
233 return NULL;
234
235 rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
236 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
237 _filelayout_free_lseg(fl);
238 return NULL;
239 }
240 return &fl->generic_hdr;
241}
242
243static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{
246 struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248
249 dprintk("--> %s\n", __func__);
250 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache,
251 &fl->dsaddr->deviceid);
252 _filelayout_free_lseg(fl);
253}
254
255static struct pnfs_layoutdriver_type filelayout_type = {
256 .id = LAYOUT_NFSV4_1_FILES,
257 .name = "LAYOUT_NFSV4_1_FILES",
258 .owner = THIS_MODULE,
259 .set_layoutdriver = filelayout_set_layoutdriver,
260 .clear_layoutdriver = filelayout_clear_layoutdriver,
261 .alloc_lseg = filelayout_alloc_lseg,
262 .free_lseg = filelayout_free_lseg,
263};
264
265static int __init nfs4filelayout_init(void)
266{
267 printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
268 __func__);
269 return pnfs_register_layoutdriver(&filelayout_type);
270}
271
272static void __exit nfs4filelayout_exit(void)
273{
274 printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
275 __func__);
276 pnfs_unregister_layoutdriver(&filelayout_type);
277}
278
279module_init(nfs4filelayout_init);
280module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
new file mode 100644
index 000000000000..bbf60dd2ab9d
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.h
@@ -0,0 +1,94 @@
1/*
2 * NFSv4 file layout driver data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_NFS4FILELAYOUT_H
31#define FS_NFS_NFS4FILELAYOUT_H
32
33#include "pnfs.h"
34
35/*
36 * Field testing shows we need to support upto 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256
39 * RFC 5661 multipath_list4 structures.
40 */
41#define NFS4_PNFS_MAX_STRIPE_CNT 4096
42#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
43
44enum stripetype4 {
45 STRIPE_SPARSE = 1,
46 STRIPE_DENSE = 2
47};
48
49/* Individual ip address */
50struct nfs4_pnfs_ds {
51 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
52 u32 ds_ip_addr;
53 u32 ds_port;
54 struct nfs_client *ds_clp;
55 atomic_t ds_count;
56};
57
58struct nfs4_file_layout_dsaddr {
59 struct pnfs_deviceid_node deviceid;
60 u32 stripe_count;
61 u8 *stripe_indices;
62 u32 ds_num;
63 struct nfs4_pnfs_ds *ds_list[1];
64};
65
66struct nfs4_filelayout_segment {
67 struct pnfs_layout_segment generic_hdr;
68 u32 stripe_type;
69 u32 commit_through_mds;
70 u32 stripe_unit;
71 u32 first_stripe_index;
72 u64 pattern_offset;
73 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
74 unsigned int num_fh;
75 struct nfs_fh **fh_array;
76};
77
78static inline struct nfs4_filelayout_segment *
79FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
80{
81 return container_of(lseg,
82 struct nfs4_filelayout_segment,
83 generic_hdr);
84}
85
86extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *);
87extern void print_ds(struct nfs4_pnfs_ds *ds);
88extern void print_deviceid(struct nfs4_deviceid *dev_id);
89extern struct nfs4_file_layout_dsaddr *
90nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id);
91struct nfs4_file_layout_dsaddr *
92get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
93
94#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
new file mode 100644
index 000000000000..51fe64ace55a
--- /dev/null
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -0,0 +1,448 @@
1/*
2 * Device operations for the pnfs nfs4 file layout driver.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 * Garth Goodson <Garth.Goodson@netapp.com>
10 *
11 * Permission is granted to use, copy, create derivative works, and
12 * redistribute this software and such derivative works for any purpose,
13 * so long as the name of the University of Michigan is not used in
14 * any advertising or publicity pertaining to the use or distribution
15 * of this software without specific, written prior authorization. If
16 * the above copyright notice or any other identification of the
17 * University of Michigan is included in any copy of any portion of
18 * this software, then the disclaimer below must also be included.
19 *
20 * This software is provided as is, without representation or warranty
21 * of any kind either express or implied, including without limitation
22 * the implied warranties of merchantability, fitness for a particular
23 * purpose, or noninfringement. The Regents of the University of
24 * Michigan shall not be liable for any damages, including special,
25 * indirect, incidental, or consequential damages, with respect to any
26 * claim arising out of or in connection with the use of the software,
27 * even if it has been or is hereafter advised of the possibility of
28 * such damages.
29 */
30
31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39/*
40 * Data server cache
41 *
42 * Data servers can be mapped to different device ids.
43 * nfs4_pnfs_ds reference counting
44 * - set to 1 on allocation
45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache.
47 */
48DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49static LIST_HEAD(nfs4_data_server_cache);
50
51/* Debug routines */
52void
53print_ds(struct nfs4_pnfs_ds *ds)
54{
55 if (ds == NULL) {
56 printk("%s NULL device\n", __func__);
57 return;
58 }
59 printk(" ip_addr %x port %hu\n"
60 " ref count %d\n"
61 " client %p\n"
62 " cl_exchange_flags %x\n",
63 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
64 atomic_read(&ds->ds_count), ds->ds_clp,
65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66}
67
68void
69print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
70{
71 int i;
72
73 ifdebug(FACILITY) {
74 printk("%s dsaddr->ds_num %d\n", __func__,
75 dsaddr->ds_num);
76 for (i = 0; i < dsaddr->ds_num; i++)
77 print_ds(dsaddr->ds_list[i]);
78 }
79}
80
81void print_deviceid(struct nfs4_deviceid *id)
82{
83 u32 *p = (u32 *)id;
84
85 dprintk("%s: device id= [%x%x%x%x]\n", __func__,
86 p[0], p[1], p[2], p[3]);
87}
88
89/* nfs4_ds_cache_lock is held */
90static struct nfs4_pnfs_ds *
91_data_server_lookup_locked(u32 ip_addr, u32 port)
92{
93 struct nfs4_pnfs_ds *ds;
94
95 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
96 ntohl(ip_addr), ntohs(port));
97
98 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
99 if (ds->ds_ip_addr == ip_addr &&
100 ds->ds_port == port) {
101 return ds;
102 }
103 }
104 return NULL;
105}
106
107static void
108destroy_ds(struct nfs4_pnfs_ds *ds)
109{
110 dprintk("--> %s\n", __func__);
111 ifdebug(FACILITY)
112 print_ds(ds);
113
114 if (ds->ds_clp)
115 nfs_put_client(ds->ds_clp);
116 kfree(ds);
117}
118
119static void
120nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
121{
122 struct nfs4_pnfs_ds *ds;
123 int i;
124
125 print_deviceid(&dsaddr->deviceid.de_id);
126
127 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i];
129 if (ds != NULL) {
130 if (atomic_dec_and_lock(&ds->ds_count,
131 &nfs4_ds_cache_lock)) {
132 list_del_init(&ds->ds_node);
133 spin_unlock(&nfs4_ds_cache_lock);
134 destroy_ds(ds);
135 }
136 }
137 }
138 kfree(dsaddr->stripe_indices);
139 kfree(dsaddr);
140}
141
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{
154 struct nfs4_pnfs_ds *tmp_ds, *ds;
155
156 ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
157 if (!ds)
158 goto out;
159
160 spin_lock(&nfs4_ds_cache_lock);
161 tmp_ds = _data_server_lookup_locked(ip_addr, port);
162 if (tmp_ds == NULL) {
163 ds->ds_ip_addr = ip_addr;
164 ds->ds_port = port;
165 atomic_set(&ds->ds_count, 1);
166 INIT_LIST_HEAD(&ds->ds_node);
167 ds->ds_clp = NULL;
168 list_add(&ds->ds_node, &nfs4_data_server_cache);
169 dprintk("%s add new data server ip 0x%x\n", __func__,
170 ds->ds_ip_addr);
171 } else {
172 kfree(ds);
173 atomic_inc(&tmp_ds->ds_count);
174 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
175 __func__, tmp_ds->ds_ip_addr,
176 atomic_read(&tmp_ds->ds_count));
177 ds = tmp_ds;
178 }
179 spin_unlock(&nfs4_ds_cache_lock);
180out:
181 return ds;
182}
183
184/*
185 * Currently only support ipv4, and one multi-path address.
186 */
187static struct nfs4_pnfs_ds *
188decode_and_add_ds(__be32 **pp, struct inode *inode)
189{
190 struct nfs4_pnfs_ds *ds = NULL;
191 char *buf;
192 const char *ipend, *pstr;
193 u32 ip_addr, port;
194 int nlen, rlen, i;
195 int tmp[2];
196 __be32 *r_netid, *r_addr, *p = *pp;
197
198 /* r_netid */
199 nlen = be32_to_cpup(p++);
200 r_netid = p;
201 p += XDR_QUADLEN(nlen);
202
203 /* r_addr */
204 rlen = be32_to_cpup(p++);
205 r_addr = p;
206 p += XDR_QUADLEN(rlen);
207 *pp = p;
208
209 /* Check that netid is "tcp" */
210 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) {
211 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
212 goto out_err;
213 }
214
215 /* ipv6 length plus port is legal */
216 if (rlen > INET6_ADDRSTRLEN + 8) {
217 dprintk("%s Invalid address, length %d\n", __func__,
218 rlen);
219 goto out_err;
220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL);
222 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen);
224
225 /* replace the port dots with dashes for the in4_pton() delimiter*/
226 for (i = 0; i < 2; i++) {
227 char *res = strrchr(buf, '.');
228 *res = '-';
229 }
230
231 /* Currently only support ipv4 address */
232 if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
233 dprintk("%s: Only ipv4 addresses supported\n", __func__);
234 goto out_free;
235 }
236
237 /* port */
238 pstr = ipend;
239 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
240 port = htons((tmp[0] << 8) | (tmp[1]));
241
242 ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
243 dprintk("%s Decoded address and port %s\n", __func__, buf);
244out_free:
245 kfree(buf);
246out_err:
247 return ds;
248}
249
250/* Decode opaque device data and return the result */
251static struct nfs4_file_layout_dsaddr*
252decode_device(struct inode *ino, struct pnfs_device *pdev)
253{
254 int i, dummy;
255 u32 cnt, num;
256 u8 *indexp;
257 __be32 *p = (__be32 *)pdev->area, *indicesp;
258 struct nfs4_file_layout_dsaddr *dsaddr;
259
260 /* Get the stripe count (number of stripe index) */
261 cnt = be32_to_cpup(p++);
262 dprintk("%s stripe count %d\n", __func__, cnt);
263 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
264 printk(KERN_WARNING "%s: stripe count %d greater than "
265 "supported maximum %d\n", __func__,
266 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
267 goto out_err;
268 }
269
270 /* Check the multipath list count */
271 indicesp = p;
272 p += XDR_QUADLEN(cnt << 2);
273 num = be32_to_cpup(p++);
274 dprintk("%s ds_num %u\n", __func__, num);
275 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
276 printk(KERN_WARNING "%s: multipath count %d greater than "
277 "supported maximum %d\n", __func__,
278 num, NFS4_PNFS_MAX_MULTI_CNT);
279 goto out_err;
280 }
281 dsaddr = kzalloc(sizeof(*dsaddr) +
282 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
283 GFP_KERNEL);
284 if (!dsaddr)
285 goto out_err;
286
287 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
288 if (!dsaddr->stripe_indices)
289 goto out_err_free;
290
291 dsaddr->stripe_count = cnt;
292 dsaddr->ds_num = num;
293
294 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
295
296 /* Go back an read stripe indices */
297 p = indicesp;
298 indexp = &dsaddr->stripe_indices[0];
299 for (i = 0; i < dsaddr->stripe_count; i++) {
300 *indexp = be32_to_cpup(p++);
301 if (*indexp >= num)
302 goto out_err_free;
303 indexp++;
304 }
305 /* Skip already read multipath list count */
306 p++;
307
308 for (i = 0; i < dsaddr->ds_num; i++) {
309 int j;
310
311 dummy = be32_to_cpup(p++); /* multipath count */
312 if (dummy > 1) {
313 printk(KERN_WARNING
314 "%s: Multipath count %d not supported, "
315 "skipping all greater than 1\n", __func__,
316 dummy);
317 }
318 for (j = 0; j < dummy; j++) {
319 if (j == 0) {
320 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
321 if (dsaddr->ds_list[i] == NULL)
322 goto out_err_free;
323 } else {
324 u32 len;
325 /* skip extra multipath */
326 len = be32_to_cpup(p++);
327 p += XDR_QUADLEN(len);
328 len = be32_to_cpup(p++);
329 p += XDR_QUADLEN(len);
330 continue;
331 }
332 }
333 }
334 return dsaddr;
335
336out_err_free:
337 nfs4_fl_free_deviceid(dsaddr);
338out_err:
339 dprintk("%s ERROR: returning NULL\n", __func__);
340 return NULL;
341}
342
343/*
344 * Decode the opaque device specified in 'dev'
345 * and add it to the list of available devices.
346 * If the deviceid is already cached, nfs4_add_deviceid will return
347 * a pointer to the cached struct and throw away the new.
348 */
349static struct nfs4_file_layout_dsaddr*
350decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
351{
352 struct nfs4_file_layout_dsaddr *dsaddr;
353 struct pnfs_deviceid_node *d;
354
355 dsaddr = decode_device(inode, dev);
356 if (!dsaddr) {
357 printk(KERN_WARNING "%s: Could not decode or add device\n",
358 __func__);
359 return NULL;
360 }
361
362 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
363 &dsaddr->deviceid);
364
365 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
366}
367
368/*
369 * Retrieve the information for dev_id, add it to the list
370 * of available devices, and return it.
371 */
372struct nfs4_file_layout_dsaddr *
373get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
374{
375 struct pnfs_device *pdev = NULL;
376 u32 max_resp_sz;
377 int max_pages;
378 struct page **pages = NULL;
379 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
380 int rc, i;
381 struct nfs_server *server = NFS_SERVER(inode);
382
383 /*
384 * Use the session max response size as the basis for setting
385 * GETDEVICEINFO's maxcount
386 */
387 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
388 max_pages = max_resp_sz >> PAGE_SHIFT;
389 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
390 __func__, inode, max_resp_sz, max_pages);
391
392 pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
393 if (pdev == NULL)
394 return NULL;
395
396 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
397 if (pages == NULL) {
398 kfree(pdev);
399 return NULL;
400 }
401 for (i = 0; i < max_pages; i++) {
402 pages[i] = alloc_page(GFP_KERNEL);
403 if (!pages[i])
404 goto out_free;
405 }
406
407 /* set pdev->area */
408 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
409 if (!pdev->area)
410 goto out_free;
411
412 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
413 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
414 pdev->pages = pages;
415 pdev->pgbase = 0;
416 pdev->pglen = PAGE_SIZE * max_pages;
417 pdev->mincount = 0;
418
419 rc = nfs4_proc_getdeviceinfo(server, pdev);
420 dprintk("%s getdevice info returns %d\n", __func__, rc);
421 if (rc)
422 goto out_free;
423
424 /*
425 * Found new device, need to decode it and then add it to the
426 * list of known devices for this mountpoint.
427 */
428 dsaddr = decode_and_add_device(inode, pdev);
429out_free:
430 if (pdev->area != NULL)
431 vunmap(pdev->area);
432 for (i = 0; i < max_pages; i++)
433 __free_page(pages[i]);
434 kfree(pages);
435 kfree(pdev);
436 dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
437 return dsaddr;
438}
439
440struct nfs4_file_layout_dsaddr *
441nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
442{
443 struct pnfs_deviceid_node *d;
444
445 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
446 return (d == NULL) ? NULL :
447 container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
448}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e87fe612ca18..32c8758c99fd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,7 @@
55#include "internal.h" 55#include "internal.h"
56#include "iostat.h" 56#include "iostat.h"
57#include "callback.h" 57#include "callback.h"
58#include "pnfs.h"
58 59
59#define NFSDBG_FACILITY NFSDBG_PROC 60#define NFSDBG_FACILITY NFSDBG_PROC
60 61
@@ -130,6 +131,7 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
130 | FATTR4_WORD0_MAXWRITE 131 | FATTR4_WORD0_MAXWRITE
131 | FATTR4_WORD0_LEASE_TIME, 132 | FATTR4_WORD0_LEASE_TIME,
132 FATTR4_WORD1_TIME_DELTA 133 FATTR4_WORD1_TIME_DELTA
134 | FATTR4_WORD1_FS_LAYOUT_TYPES
133}; 135};
134 136
135const u32 nfs4_fs_locations_bitmap[2] = { 137const u32 nfs4_fs_locations_bitmap[2] = {
@@ -4840,49 +4842,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
4840 args->bc_attrs.max_reqs); 4842 args->bc_attrs.max_reqs);
4841} 4843}
4842 4844
4843static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) 4845static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4844{ 4846{
4845 if (rcvd <= sent) 4847 struct nfs4_channel_attrs *sent = &args->fc_attrs;
4846 return 0; 4848 struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
4847 printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " 4849
4848 "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); 4850 if (rcvd->headerpadsz > sent->headerpadsz)
4849 return -EINVAL; 4851 return -EINVAL;
4852 if (rcvd->max_resp_sz > sent->max_resp_sz)
4853 return -EINVAL;
4854 /*
4855 * Our requested max_ops is the minimum we need; we're not
4856 * prepared to break up compounds into smaller pieces than that.
4857 * So, no point even trying to continue if the server won't
4858 * cooperate:
4859 */
4860 if (rcvd->max_ops < sent->max_ops)
4861 return -EINVAL;
4862 if (rcvd->max_reqs == 0)
4863 return -EINVAL;
4864 return 0;
4850} 4865}
4851 4866
4852#define _verify_fore_channel_attr(_name_) \ 4867static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4853 _verify_channel_attr("fore", #_name_, \ 4868{
4854 args->fc_attrs._name_, \ 4869 struct nfs4_channel_attrs *sent = &args->bc_attrs;
4855 session->fc_attrs._name_) 4870 struct nfs4_channel_attrs *rcvd = &session->bc_attrs;
4856 4871
4857#define _verify_back_channel_attr(_name_) \ 4872 if (rcvd->max_rqst_sz > sent->max_rqst_sz)
4858 _verify_channel_attr("back", #_name_, \ 4873 return -EINVAL;
4859 args->bc_attrs._name_, \ 4874 if (rcvd->max_resp_sz < sent->max_resp_sz)
4860 session->bc_attrs._name_) 4875 return -EINVAL;
4876 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
4877 return -EINVAL;
4878 /* These would render the backchannel useless: */
4879 if (rcvd->max_ops == 0)
4880 return -EINVAL;
4881 if (rcvd->max_reqs == 0)
4882 return -EINVAL;
4883 return 0;
4884}
4861 4885
4862/*
4863 * The server is not allowed to increase the fore channel header pad size,
4864 * maximum response size, or maximum number of operations.
4865 *
4866 * The back channel attributes are only negotiatied down: We send what the
4867 * (back channel) server insists upon.
4868 */
4869static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, 4886static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
4870 struct nfs4_session *session) 4887 struct nfs4_session *session)
4871{ 4888{
4872 int ret = 0; 4889 int ret;
4873
4874 ret |= _verify_fore_channel_attr(headerpadsz);
4875 ret |= _verify_fore_channel_attr(max_resp_sz);
4876 ret |= _verify_fore_channel_attr(max_ops);
4877
4878 ret |= _verify_back_channel_attr(headerpadsz);
4879 ret |= _verify_back_channel_attr(max_rqst_sz);
4880 ret |= _verify_back_channel_attr(max_resp_sz);
4881 ret |= _verify_back_channel_attr(max_resp_sz_cached);
4882 ret |= _verify_back_channel_attr(max_ops);
4883 ret |= _verify_back_channel_attr(max_reqs);
4884 4890
4885 return ret; 4891 ret = nfs4_verify_fore_channel_attrs(args, session);
4892 if (ret)
4893 return ret;
4894 return nfs4_verify_back_channel_attrs(args, session);
4886} 4895}
4887 4896
4888static int _nfs4_proc_create_session(struct nfs_client *clp) 4897static int _nfs4_proc_create_session(struct nfs_client *clp)
@@ -5255,6 +5264,147 @@ out:
5255 dprintk("<-- %s status=%d\n", __func__, status); 5264 dprintk("<-- %s status=%d\n", __func__, status);
5256 return status; 5265 return status;
5257} 5266}
5267
5268static void
5269nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5270{
5271 struct nfs4_layoutget *lgp = calldata;
5272 struct inode *ino = lgp->args.inode;
5273 struct nfs_server *server = NFS_SERVER(ino);
5274
5275 dprintk("--> %s\n", __func__);
5276 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5277 &lgp->res.seq_res, 0, task))
5278 return;
5279 rpc_call_start(task);
5280}
5281
5282static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
5283{
5284 struct nfs4_layoutget *lgp = calldata;
5285 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5286
5287 dprintk("--> %s\n", __func__);
5288
5289 if (!nfs4_sequence_done(task, &lgp->res.seq_res))
5290 return;
5291
5292 switch (task->tk_status) {
5293 case 0:
5294 break;
5295 case -NFS4ERR_LAYOUTTRYLATER:
5296 case -NFS4ERR_RECALLCONFLICT:
5297 task->tk_status = -NFS4ERR_DELAY;
5298 /* Fall through */
5299 default:
5300 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5301 rpc_restart_call_prepare(task);
5302 return;
5303 }
5304 }
5305 lgp->status = task->tk_status;
5306 dprintk("<-- %s\n", __func__);
5307}
5308
5309static void nfs4_layoutget_release(void *calldata)
5310{
5311 struct nfs4_layoutget *lgp = calldata;
5312
5313 dprintk("--> %s\n", __func__);
5314 put_layout_hdr(lgp->args.inode);
5315 if (lgp->res.layout.buf != NULL)
5316 free_page((unsigned long) lgp->res.layout.buf);
5317 put_nfs_open_context(lgp->args.ctx);
5318 kfree(calldata);
5319 dprintk("<-- %s\n", __func__);
5320}
5321
5322static const struct rpc_call_ops nfs4_layoutget_call_ops = {
5323 .rpc_call_prepare = nfs4_layoutget_prepare,
5324 .rpc_call_done = nfs4_layoutget_done,
5325 .rpc_release = nfs4_layoutget_release,
5326};
5327
5328int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5329{
5330 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5331 struct rpc_task *task;
5332 struct rpc_message msg = {
5333 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
5334 .rpc_argp = &lgp->args,
5335 .rpc_resp = &lgp->res,
5336 };
5337 struct rpc_task_setup task_setup_data = {
5338 .rpc_client = server->client,
5339 .rpc_message = &msg,
5340 .callback_ops = &nfs4_layoutget_call_ops,
5341 .callback_data = lgp,
5342 .flags = RPC_TASK_ASYNC,
5343 };
5344 int status = 0;
5345
5346 dprintk("--> %s\n", __func__);
5347
5348 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
5349 if (lgp->res.layout.buf == NULL) {
5350 nfs4_layoutget_release(lgp);
5351 return -ENOMEM;
5352 }
5353
5354 lgp->res.seq_res.sr_slot = NULL;
5355 task = rpc_run_task(&task_setup_data);
5356 if (IS_ERR(task))
5357 return PTR_ERR(task);
5358 status = nfs4_wait_for_completion_rpc_task(task);
5359 if (status != 0)
5360 goto out;
5361 status = lgp->status;
5362 if (status != 0)
5363 goto out;
5364 status = pnfs_layout_process(lgp);
5365out:
5366 rpc_put_task(task);
5367 dprintk("<-- %s status=%d\n", __func__, status);
5368 return status;
5369}
5370
5371static int
5372_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5373{
5374 struct nfs4_getdeviceinfo_args args = {
5375 .pdev = pdev,
5376 };
5377 struct nfs4_getdeviceinfo_res res = {
5378 .pdev = pdev,
5379 };
5380 struct rpc_message msg = {
5381 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
5382 .rpc_argp = &args,
5383 .rpc_resp = &res,
5384 };
5385 int status;
5386
5387 dprintk("--> %s\n", __func__);
5388 status = nfs4_call_sync(server, &msg, &args, &res, 0);
5389 dprintk("<-- %s status=%d\n", __func__, status);
5390
5391 return status;
5392}
5393
5394int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5395{
5396 struct nfs4_exception exception = { };
5397 int err;
5398
5399 do {
5400 err = nfs4_handle_exception(server,
5401 _nfs4_proc_getdeviceinfo(server, pdev),
5402 &exception);
5403 } while (exception.retry);
5404 return err;
5405}
5406EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5407
5258#endif /* CONFIG_NFS_V4_1 */ 5408#endif /* CONFIG_NFS_V4_1 */
5259 5409
5260struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5410struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index aa0b02a610c4..f575a3126737 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -54,6 +54,7 @@
54#include "callback.h" 54#include "callback.h"
55#include "delegation.h" 55#include "delegation.h"
56#include "internal.h" 56#include "internal.h"
57#include "pnfs.h"
57 58
58#define OPENOWNER_POOL_SIZE 8 59#define OPENOWNER_POOL_SIZE 8
59 60
@@ -1475,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1475 } 1476 }
1476 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1477 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1477 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); 1478 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1479 pnfs_destroy_all_layouts(clp);
1478 } 1480 }
1479 1481
1480 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { 1482 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index bd2101d918c8..f313c4cce7e4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
52#include <linux/nfs_idmap.h> 52#include <linux/nfs_idmap.h>
53#include "nfs4_fs.h" 53#include "nfs4_fs.h"
54#include "internal.h" 54#include "internal.h"
55#include "pnfs.h"
55 56
56#define NFSDBG_FACILITY NFSDBG_XDR 57#define NFSDBG_FACILITY NFSDBG_XDR
57 58
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int);
310 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) 311 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
311#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) 312#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4)
312#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) 313#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4)
314#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
315 XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
316#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
317 1 /* layout type */ + \
318 1 /* opaque devaddr4 length */ + \
319 /* devaddr4 payload is read into page */ \
320 1 /* notification bitmap length */ + \
321 1 /* notification bitmap */)
322#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
323 encode_stateid_maxsz)
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
313#else /* CONFIG_NFS_V4_1 */ 327#else /* CONFIG_NFS_V4_1 */
314#define encode_sequence_maxsz 0 328#define encode_sequence_maxsz 0
315#define decode_sequence_maxsz 0 329#define decode_sequence_maxsz 0
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int);
699#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ 713#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \
700 decode_sequence_maxsz + \ 714 decode_sequence_maxsz + \
701 decode_reclaim_complete_maxsz) 715 decode_reclaim_complete_maxsz)
716#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \
717 encode_sequence_maxsz +\
718 encode_getdeviceinfo_maxsz)
719#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \
720 decode_sequence_maxsz + \
721 decode_getdeviceinfo_maxsz)
722#define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \
723 encode_sequence_maxsz + \
724 encode_putfh_maxsz + \
725 encode_layoutget_maxsz)
726#define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \
727 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \
729 decode_layoutget_maxsz)
702 730
703const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
704 compound_encode_hdr_maxsz + 732 compound_encode_hdr_maxsz +
@@ -1737,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr,
1737#endif /* CONFIG_NFS_V4_1 */ 1765#endif /* CONFIG_NFS_V4_1 */
1738} 1766}
1739 1767
1768#ifdef CONFIG_NFS_V4_1
1769static void
1770encode_getdeviceinfo(struct xdr_stream *xdr,
1771 const struct nfs4_getdeviceinfo_args *args,
1772 struct compound_hdr *hdr)
1773{
1774 __be32 *p;
1775
1776 p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
1777 *p++ = cpu_to_be32(OP_GETDEVICEINFO);
1778 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
1779 NFS4_DEVICEID4_SIZE);
1780 *p++ = cpu_to_be32(args->pdev->layout_type);
1781 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
1782 *p++ = cpu_to_be32(0); /* bitmap length 0 */
1783 hdr->nops++;
1784 hdr->replen += decode_getdeviceinfo_maxsz;
1785}
1786
1787static void
1788encode_layoutget(struct xdr_stream *xdr,
1789 const struct nfs4_layoutget_args *args,
1790 struct compound_hdr *hdr)
1791{
1792 nfs4_stateid stateid;
1793 __be32 *p;
1794
1795 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
1796 *p++ = cpu_to_be32(OP_LAYOUTGET);
1797 *p++ = cpu_to_be32(0); /* Signal layout available */
1798 *p++ = cpu_to_be32(args->type);
1799 *p++ = cpu_to_be32(args->range.iomode);
1800 p = xdr_encode_hyper(p, args->range.offset);
1801 p = xdr_encode_hyper(p, args->range.length);
1802 p = xdr_encode_hyper(p, args->minlength);
1803 pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
1804 args->ctx->state);
1805 p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
1806 *p = cpu_to_be32(args->maxcount);
1807
1808 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
1809 __func__,
1810 args->type,
1811 args->range.iomode,
1812 (unsigned long)args->range.offset,
1813 (unsigned long)args->range.length,
1814 args->maxcount);
1815 hdr->nops++;
1816 hdr->replen += decode_layoutget_maxsz;
1817}
1818#endif /* CONFIG_NFS_V4_1 */
1819
1740/* 1820/*
1741 * END OF "GENERIC" ENCODE ROUTINES. 1821 * END OF "GENERIC" ENCODE ROUTINES.
1742 */ 1822 */
@@ -2554,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
2554 return 0; 2634 return 0;
2555} 2635}
2556 2636
2637/*
2638 * Encode GETDEVICEINFO request
2639 */
2640static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
2641 struct nfs4_getdeviceinfo_args *args)
2642{
2643 struct xdr_stream xdr;
2644 struct compound_hdr hdr = {
2645 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2646 };
2647
2648 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2649 encode_compound_hdr(&xdr, req, &hdr);
2650 encode_sequence(&xdr, &args->seq_args, &hdr);
2651 encode_getdeviceinfo(&xdr, args, &hdr);
2652
2653 /* set up reply kvec. Subtract notification bitmap max size (2)
2654 * so that notification bitmap is put in xdr_buf tail */
2655 xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
2656 args->pdev->pages, args->pdev->pgbase,
2657 args->pdev->pglen);
2658
2659 encode_nops(&hdr);
2660 return 0;
2661}
2662
2663/*
2664 * Encode LAYOUTGET request
2665 */
2666static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
2667 struct nfs4_layoutget_args *args)
2668{
2669 struct xdr_stream xdr;
2670 struct compound_hdr hdr = {
2671 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2672 };
2673
2674 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2675 encode_compound_hdr(&xdr, req, &hdr);
2676 encode_sequence(&xdr, &args->seq_args, &hdr);
2677 encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
2678 encode_layoutget(&xdr, args, &hdr);
2679 encode_nops(&hdr);
2680 return 0;
2681}
2557#endif /* CONFIG_NFS_V4_1 */ 2682#endif /* CONFIG_NFS_V4_1 */
2558 2683
2559static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2684static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -3978,6 +4103,61 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3978 return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep); 4103 return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
3979} 4104}
3980 4105
4106/*
4107 * Decode potentially multiple layout types. Currently we only support
4108 * one layout driver per file system.
4109 */
4110static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
4111 uint32_t *layouttype)
4112{
4113 uint32_t *p;
4114 int num;
4115
4116 p = xdr_inline_decode(xdr, 4);
4117 if (unlikely(!p))
4118 goto out_overflow;
4119 num = be32_to_cpup(p);
4120
4121 /* pNFS is not supported by the underlying file system */
4122 if (num == 0) {
4123 *layouttype = 0;
4124 return 0;
4125 }
4126 if (num > 1)
4127 printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
4128 "per filesystem not supported\n", __func__);
4129
4130 /* Decode and set first layout type, move xdr->p past unused types */
4131 p = xdr_inline_decode(xdr, num * 4);
4132 if (unlikely(!p))
4133 goto out_overflow;
4134 *layouttype = be32_to_cpup(p);
4135 return 0;
4136out_overflow:
4137 print_overflow_msg(__func__, xdr);
4138 return -EIO;
4139}
4140
4141/*
4142 * The type of file system exported.
4143 * Note we must ensure that layouttype is set in any non-error case.
4144 */
4145static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
4146 uint32_t *layouttype)
4147{
4148 int status = 0;
4149
4150 dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
4151 if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
4152 return -EIO;
4153 if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
4154 status = decode_first_pnfs_layout_type(xdr, layouttype);
4155 bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
4156 } else
4157 *layouttype = 0;
4158 return status;
4159}
4160
3981static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) 4161static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
3982{ 4162{
3983 __be32 *savep; 4163 __be32 *savep;
@@ -4006,6 +4186,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
4006 status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); 4186 status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
4007 if (status != 0) 4187 if (status != 0)
4008 goto xdr_error; 4188 goto xdr_error;
4189 status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
4190 if (status != 0)
4191 goto xdr_error;
4009 4192
4010 status = verify_attr_len(xdr, savep, attrlen); 4193 status = verify_attr_len(xdr, savep, attrlen);
4011xdr_error: 4194xdr_error:
@@ -4772,6 +4955,134 @@ out_overflow:
4772#endif /* CONFIG_NFS_V4_1 */ 4955#endif /* CONFIG_NFS_V4_1 */
4773} 4956}
4774 4957
4958#if defined(CONFIG_NFS_V4_1)
4959
4960static int decode_getdeviceinfo(struct xdr_stream *xdr,
4961 struct pnfs_device *pdev)
4962{
4963 __be32 *p;
4964 uint32_t len, type;
4965 int status;
4966
4967 status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
4968 if (status) {
4969 if (status == -ETOOSMALL) {
4970 p = xdr_inline_decode(xdr, 4);
4971 if (unlikely(!p))
4972 goto out_overflow;
4973 pdev->mincount = be32_to_cpup(p);
4974 dprintk("%s: Min count too small. mincnt = %u\n",
4975 __func__, pdev->mincount);
4976 }
4977 return status;
4978 }
4979
4980 p = xdr_inline_decode(xdr, 8);
4981 if (unlikely(!p))
4982 goto out_overflow;
4983 type = be32_to_cpup(p++);
4984 if (type != pdev->layout_type) {
4985 dprintk("%s: layout mismatch req: %u pdev: %u\n",
4986 __func__, pdev->layout_type, type);
4987 return -EINVAL;
4988 }
4989 /*
4990 * Get the length of the opaque device_addr4. xdr_read_pages places
4991 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
4992 * and places the remaining xdr data in xdr_buf->tail
4993 */
4994 pdev->mincount = be32_to_cpup(p);
4995 xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
4996
4997 /* Parse notification bitmap, verifying that it is zero. */
4998 p = xdr_inline_decode(xdr, 4);
4999 if (unlikely(!p))
5000 goto out_overflow;
5001 len = be32_to_cpup(p);
5002 if (len) {
5003 int i;
5004
5005 p = xdr_inline_decode(xdr, 4 * len);
5006 if (unlikely(!p))
5007 goto out_overflow;
5008 for (i = 0; i < len; i++, p++) {
5009 if (be32_to_cpup(p)) {
5010 dprintk("%s: notifications not supported\n",
5011 __func__);
5012 return -EIO;
5013 }
5014 }
5015 }
5016 return 0;
5017out_overflow:
5018 print_overflow_msg(__func__, xdr);
5019 return -EIO;
5020}
5021
5022static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5023 struct nfs4_layoutget_res *res)
5024{
5025 __be32 *p;
5026 int status;
5027 u32 layout_count;
5028
5029 status = decode_op_hdr(xdr, OP_LAYOUTGET);
5030 if (status)
5031 return status;
5032 p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
5033 if (unlikely(!p))
5034 goto out_overflow;
5035 res->return_on_close = be32_to_cpup(p++);
5036 p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
5037 layout_count = be32_to_cpup(p);
5038 if (!layout_count) {
5039 dprintk("%s: server responded with empty layout array\n",
5040 __func__);
5041 return -EINVAL;
5042 }
5043
5044 p = xdr_inline_decode(xdr, 24);
5045 if (unlikely(!p))
5046 goto out_overflow;
5047 p = xdr_decode_hyper(p, &res->range.offset);
5048 p = xdr_decode_hyper(p, &res->range.length);
5049 res->range.iomode = be32_to_cpup(p++);
5050 res->type = be32_to_cpup(p++);
5051
5052 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
5053 if (unlikely(status))
5054 return status;
5055
5056 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
5057 __func__,
5058 (unsigned long)res->range.offset,
5059 (unsigned long)res->range.length,
5060 res->range.iomode,
5061 res->type,
5062 res->layout.len);
5063
5064 /* nfs4_proc_layoutget allocated a single page */
5065 if (res->layout.len > PAGE_SIZE)
5066 return -ENOMEM;
5067 memcpy(res->layout.buf, p, res->layout.len);
5068
5069 if (layout_count > 1) {
5070 /* We only handle a length one array at the moment. Any
5071 * further entries are just ignored. Note that this means
5072 * the client may see a response that is less than the
5073 * minimum it requested.
5074 */
5075 dprintk("%s: server responded with %d layouts, dropping tail\n",
5076 __func__, layout_count);
5077 }
5078
5079 return 0;
5080out_overflow:
5081 print_overflow_msg(__func__, xdr);
5082 return -EIO;
5083}
5084#endif /* CONFIG_NFS_V4_1 */
5085
4775/* 5086/*
4776 * END OF "GENERIC" DECODE ROUTINES. 5087 * END OF "GENERIC" DECODE ROUTINES.
4777 */ 5088 */
@@ -5799,6 +6110,53 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
5799 status = decode_reclaim_complete(&xdr, (void *)NULL); 6110 status = decode_reclaim_complete(&xdr, (void *)NULL);
5800 return status; 6111 return status;
5801} 6112}
6113
6114/*
6115 * Decode GETDEVINFO response
6116 */
6117static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
6118 struct nfs4_getdeviceinfo_res *res)
6119{
6120 struct xdr_stream xdr;
6121 struct compound_hdr hdr;
6122 int status;
6123
6124 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6125 status = decode_compound_hdr(&xdr, &hdr);
6126 if (status != 0)
6127 goto out;
6128 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6129 if (status != 0)
6130 goto out;
6131 status = decode_getdeviceinfo(&xdr, res->pdev);
6132out:
6133 return status;
6134}
6135
6136/*
6137 * Decode LAYOUTGET response
6138 */
6139static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
6140 struct nfs4_layoutget_res *res)
6141{
6142 struct xdr_stream xdr;
6143 struct compound_hdr hdr;
6144 int status;
6145
6146 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6147 status = decode_compound_hdr(&xdr, &hdr);
6148 if (status)
6149 goto out;
6150 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6151 if (status)
6152 goto out;
6153 status = decode_putfh(&xdr);
6154 if (status)
6155 goto out;
6156 status = decode_layoutget(&xdr, rqstp, res);
6157out:
6158 return status;
6159}
5802#endif /* CONFIG_NFS_V4_1 */ 6160#endif /* CONFIG_NFS_V4_1 */
5803 6161
5804__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, 6162__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
@@ -5990,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = {
5990 PROC(SEQUENCE, enc_sequence, dec_sequence), 6348 PROC(SEQUENCE, enc_sequence, dec_sequence),
5991 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 6349 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
5992 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6350 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6351 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6352 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
5993#endif /* CONFIG_NFS_V4_1 */ 6353#endif /* CONFIG_NFS_V4_1 */
5994}; 6354};
5995 6355
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
new file mode 100644
index 000000000000..db773428f95f
--- /dev/null
+++ b/fs/nfs/pnfs.c
@@ -0,0 +1,783 @@
1/*
2 * pNFS functions to call and manage layout drivers.
3 *
4 * Copyright (c) 2002 [year of first publication]
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#include <linux/nfs_fs.h>
31#include "internal.h"
32#include "pnfs.h"
33
34#define NFSDBG_FACILITY NFSDBG_PNFS
35
36/* Locking:
37 *
38 * pnfs_spinlock:
39 * protects pnfs_modules_tbl.
40 */
41static DEFINE_SPINLOCK(pnfs_spinlock);
42
43/*
44 * pnfs_modules_tbl holds all pnfs modules
45 */
46static LIST_HEAD(pnfs_modules_tbl);
47
48/* Return the registered pnfs layout driver module matching given id */
49static struct pnfs_layoutdriver_type *
50find_pnfs_driver_locked(u32 id)
51{
52 struct pnfs_layoutdriver_type *local;
53
54 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
55 if (local->id == id)
56 goto out;
57 local = NULL;
58out:
59 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
60 return local;
61}
62
63static struct pnfs_layoutdriver_type *
64find_pnfs_driver(u32 id)
65{
66 struct pnfs_layoutdriver_type *local;
67
68 spin_lock(&pnfs_spinlock);
69 local = find_pnfs_driver_locked(id);
70 spin_unlock(&pnfs_spinlock);
71 return local;
72}
73
74void
75unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{
77 if (nfss->pnfs_curr_ld) {
78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 module_put(nfss->pnfs_curr_ld->owner);
80 }
81 nfss->pnfs_curr_ld = NULL;
82}
83
84/*
85 * Try to set the server's pnfs module to the pnfs layout type specified by id.
86 * Currently only one pNFS layout driver per filesystem is supported.
87 *
88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
89 */
90void
91set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
92{
93 struct pnfs_layoutdriver_type *ld_type = NULL;
94
95 if (id == 0)
96 goto out_no_driver;
97 if (!(server->nfs_client->cl_exchange_flags &
98 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
99 printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
100 id, server->nfs_client->cl_exchange_flags);
101 goto out_no_driver;
102 }
103 ld_type = find_pnfs_driver(id);
104 if (!ld_type) {
105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
106 ld_type = find_pnfs_driver(id);
107 if (!ld_type) {
108 dprintk("%s: No pNFS module found for %u.\n",
109 __func__, id);
110 goto out_no_driver;
111 }
112 }
113 if (!try_module_get(ld_type->owner)) {
114 dprintk("%s: Could not grab reference on module\n", __func__);
115 goto out_no_driver;
116 }
117 server->pnfs_curr_ld = ld_type;
118 if (ld_type->set_layoutdriver(server)) {
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
125 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return;
127
128out_no_driver:
129 dprintk("%s: Using NFSv4 I/O\n", __func__);
130 server->pnfs_curr_ld = NULL;
131}
132
133int
134pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
135{
136 int status = -EINVAL;
137 struct pnfs_layoutdriver_type *tmp;
138
139 if (ld_type->id == 0) {
140 printk(KERN_ERR "%s id 0 is reserved\n", __func__);
141 return status;
142 }
143 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
144 printk(KERN_ERR "%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__);
146 return status;
147 }
148
149 spin_lock(&pnfs_spinlock);
150 tmp = find_pnfs_driver_locked(ld_type->id);
151 if (!tmp) {
152 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
153 status = 0;
154 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
155 ld_type->name);
156 } else {
157 printk(KERN_ERR "%s Module with id %d already loaded!\n",
158 __func__, ld_type->id);
159 }
160 spin_unlock(&pnfs_spinlock);
161
162 return status;
163}
164EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
165
166void
167pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
168{
169 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
170 spin_lock(&pnfs_spinlock);
171 list_del(&ld_type->pnfs_tblid);
172 spin_unlock(&pnfs_spinlock);
173}
174EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
175
176/*
177 * pNFS client layout cache
178 */
179
180static void
181get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
182{
183 assert_spin_locked(&lo->inode->i_lock);
184 lo->refcount++;
185}
186
187static void
188put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
189{
190 assert_spin_locked(&lo->inode->i_lock);
191 BUG_ON(lo->refcount == 0);
192
193 lo->refcount--;
194 if (!lo->refcount) {
195 dprintk("%s: freeing layout cache %p\n", __func__, lo);
196 BUG_ON(!list_empty(&lo->layouts));
197 NFS_I(lo->inode)->layout = NULL;
198 kfree(lo);
199 }
200}
201
202void
203put_layout_hdr(struct inode *inode)
204{
205 spin_lock(&inode->i_lock);
206 put_layout_hdr_locked(NFS_I(inode)->layout);
207 spin_unlock(&inode->i_lock);
208}
209
210static void
211init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
212{
213 INIT_LIST_HEAD(&lseg->fi_list);
214 kref_init(&lseg->kref);
215 lseg->layout = lo;
216}
217
218/* Called without i_lock held, as the free_lseg call may sleep */
219static void
220destroy_lseg(struct kref *kref)
221{
222 struct pnfs_layout_segment *lseg =
223 container_of(kref, struct pnfs_layout_segment, kref);
224 struct inode *ino = lseg->layout->inode;
225
226 dprintk("--> %s\n", __func__);
227 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
228 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
229 put_layout_hdr(ino);
230}
231
232static void
233put_lseg(struct pnfs_layout_segment *lseg)
234{
235 if (!lseg)
236 return;
237
238 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
239 atomic_read(&lseg->kref.refcount));
240 kref_put(&lseg->kref, destroy_lseg);
241}
242
243static void
244pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
245{
246 struct pnfs_layout_segment *lseg, *next;
247 struct nfs_client *clp;
248
249 dprintk("%s:Begin lo %p\n", __func__, lo);
250
251 assert_spin_locked(&lo->inode->i_lock);
252 list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
253 dprintk("%s: freeing lseg %p\n", __func__, lseg);
254 list_move(&lseg->fi_list, tmp_list);
255 }
256 clp = NFS_SERVER(lo->inode)->nfs_client;
257 spin_lock(&clp->cl_lock);
258 /* List does not take a reference, so no need for put here */
259 list_del_init(&lo->layouts);
260 spin_unlock(&clp->cl_lock);
261 write_seqlock(&lo->seqlock);
262 clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 write_sequnlock(&lo->seqlock);
264
265 dprintk("%s:Return\n", __func__);
266}
267
268static void
269pnfs_free_lseg_list(struct list_head *tmp_list)
270{
271 struct pnfs_layout_segment *lseg;
272
273 while (!list_empty(tmp_list)) {
274 lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
275 fi_list);
276 dprintk("%s calling put_lseg on %p\n", __func__, lseg);
277 list_del(&lseg->fi_list);
278 put_lseg(lseg);
279 }
280}
281
282void
283pnfs_destroy_layout(struct nfs_inode *nfsi)
284{
285 struct pnfs_layout_hdr *lo;
286 LIST_HEAD(tmp_list);
287
288 spin_lock(&nfsi->vfs_inode.i_lock);
289 lo = nfsi->layout;
290 if (lo) {
291 pnfs_clear_lseg_list(lo, &tmp_list);
292 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
293 put_layout_hdr_locked(lo);
294 }
295 spin_unlock(&nfsi->vfs_inode.i_lock);
296 pnfs_free_lseg_list(&tmp_list);
297}
298
299/*
300 * Called by the state manger to remove all layouts established under an
301 * expired lease.
302 */
303void
304pnfs_destroy_all_layouts(struct nfs_client *clp)
305{
306 struct pnfs_layout_hdr *lo;
307 LIST_HEAD(tmp_list);
308
309 spin_lock(&clp->cl_lock);
310 list_splice_init(&clp->cl_layouts, &tmp_list);
311 spin_unlock(&clp->cl_lock);
312
313 while (!list_empty(&tmp_list)) {
314 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
315 layouts);
316 dprintk("%s freeing layout for inode %lu\n", __func__,
317 lo->inode->i_ino);
318 pnfs_destroy_layout(NFS_I(lo->inode));
319 }
320}
321
322/* update lo->stateid with new if is more recent
323 *
324 * lo->stateid could be the open stateid, in which case we just use what given.
325 */
326static void
327pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
328 const nfs4_stateid *new)
329{
330 nfs4_stateid *old = &lo->stateid;
331 bool overwrite = false;
332
333 write_seqlock(&lo->seqlock);
334 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
335 memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
336 overwrite = true;
337 else {
338 u32 oldseq, newseq;
339
340 oldseq = be32_to_cpu(old->stateid.seqid);
341 newseq = be32_to_cpu(new->stateid.seqid);
342 if ((int)(newseq - oldseq) > 0)
343 overwrite = true;
344 }
345 if (overwrite)
346 memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 write_sequnlock(&lo->seqlock);
348}
349
350static void
351pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
352 struct nfs4_state *state)
353{
354 int seq;
355
356 dprintk("--> %s\n", __func__);
357 write_seqlock(&lo->seqlock);
358 do {
359 seq = read_seqbegin(&state->seqlock);
360 memcpy(lo->stateid.data, state->stateid.data,
361 sizeof(state->stateid.data));
362 } while (read_seqretry(&state->seqlock, seq));
363 set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 write_sequnlock(&lo->seqlock);
365 dprintk("<-- %s\n", __func__);
366}
367
368void
369pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 struct nfs4_state *open_state)
371{
372 int seq;
373
374 dprintk("--> %s\n", __func__);
375 do {
376 seq = read_seqbegin(&lo->seqlock);
377 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
378 /* This will trigger retry of the read */
379 pnfs_layout_from_open_stateid(lo, open_state);
380 } else
381 memcpy(dst->data, lo->stateid.data,
382 sizeof(lo->stateid.data));
383 } while (read_seqretry(&lo->seqlock, seq));
384 dprintk("<-- %s\n", __func__);
385}
386
387/*
388* Get layout from server.
389* for now, assume that whole file layouts are requested.
390* arg->offset: 0
391* arg->length: all ones
392*/
393static struct pnfs_layout_segment *
394send_layoutget(struct pnfs_layout_hdr *lo,
395 struct nfs_open_context *ctx,
396 u32 iomode)
397{
398 struct inode *ino = lo->inode;
399 struct nfs_server *server = NFS_SERVER(ino);
400 struct nfs4_layoutget *lgp;
401 struct pnfs_layout_segment *lseg = NULL;
402
403 dprintk("--> %s\n", __func__);
404
405 BUG_ON(ctx == NULL);
406 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 if (lgp == NULL) {
408 put_layout_hdr(lo->inode);
409 return NULL;
410 }
411 lgp->args.minlength = NFS4_MAX_UINT64;
412 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 lgp->args.range.iomode = iomode;
414 lgp->args.range.offset = 0;
415 lgp->args.range.length = NFS4_MAX_UINT64;
416 lgp->args.type = server->pnfs_curr_ld->id;
417 lgp->args.inode = ino;
418 lgp->args.ctx = get_nfs_open_context(ctx);
419 lgp->lsegpp = &lseg;
420
421 /* Synchronously retrieve layout information from server and
422 * store in lseg.
423 */
424 nfs4_proc_layoutget(lgp);
425 if (!lseg) {
426 /* remember that LAYOUTGET failed and suspend trying */
427 set_bit(lo_fail_bit(iomode), &lo->state);
428 }
429 return lseg;
430}
431
432/*
433 * Compare two layout segments for sorting into layout cache.
434 * We want to preferentially return RW over RO layouts, so ensure those
435 * are seen first.
436 */
437static s64
438cmp_layout(u32 iomode1, u32 iomode2)
439{
440 /* read > read/write */
441 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
442}
443
444static void
445pnfs_insert_layout(struct pnfs_layout_hdr *lo,
446 struct pnfs_layout_segment *lseg)
447{
448 struct pnfs_layout_segment *lp;
449 int found = 0;
450
451 dprintk("%s:Begin\n", __func__);
452
453 assert_spin_locked(&lo->inode->i_lock);
454 if (list_empty(&lo->segs)) {
455 struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
456
457 spin_lock(&clp->cl_lock);
458 BUG_ON(!list_empty(&lo->layouts));
459 list_add_tail(&lo->layouts, &clp->cl_layouts);
460 spin_unlock(&clp->cl_lock);
461 }
462 list_for_each_entry(lp, &lo->segs, fi_list) {
463 if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
464 continue;
465 list_add_tail(&lseg->fi_list, &lp->fi_list);
466 dprintk("%s: inserted lseg %p "
467 "iomode %d offset %llu length %llu before "
468 "lp %p iomode %d offset %llu length %llu\n",
469 __func__, lseg, lseg->range.iomode,
470 lseg->range.offset, lseg->range.length,
471 lp, lp->range.iomode, lp->range.offset,
472 lp->range.length);
473 found = 1;
474 break;
475 }
476 if (!found) {
477 list_add_tail(&lseg->fi_list, &lo->segs);
478 dprintk("%s: inserted lseg %p "
479 "iomode %d offset %llu length %llu at tail\n",
480 __func__, lseg, lseg->range.iomode,
481 lseg->range.offset, lseg->range.length);
482 }
483 get_layout_hdr_locked(lo);
484
485 dprintk("%s:Return\n", __func__);
486}
487
488static struct pnfs_layout_hdr *
489alloc_init_layout_hdr(struct inode *ino)
490{
491 struct pnfs_layout_hdr *lo;
492
493 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
494 if (!lo)
495 return NULL;
496 lo->refcount = 1;
497 INIT_LIST_HEAD(&lo->layouts);
498 INIT_LIST_HEAD(&lo->segs);
499 seqlock_init(&lo->seqlock);
500 lo->inode = ino;
501 return lo;
502}
503
504static struct pnfs_layout_hdr *
505pnfs_find_alloc_layout(struct inode *ino)
506{
507 struct nfs_inode *nfsi = NFS_I(ino);
508 struct pnfs_layout_hdr *new = NULL;
509
510 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
511
512 assert_spin_locked(&ino->i_lock);
513 if (nfsi->layout)
514 return nfsi->layout;
515
516 spin_unlock(&ino->i_lock);
517 new = alloc_init_layout_hdr(ino);
518 spin_lock(&ino->i_lock);
519
520 if (likely(nfsi->layout == NULL)) /* Won the race? */
521 nfsi->layout = new;
522 else
523 kfree(new);
524 return nfsi->layout;
525}
526
527/*
528 * iomode matching rules:
529 * iomode lseg match
530 * ----- ----- -----
531 * ANY READ true
532 * ANY RW true
533 * RW READ false
534 * RW RW true
535 * READ READ true
536 * READ RW true
537 */
538static int
539is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540{
541 return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
542}
543
544/*
545 * lookup range in layout
546 */
547static struct pnfs_layout_segment *
548pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
549{
550 struct pnfs_layout_segment *lseg, *ret = NULL;
551
552 dprintk("%s:Begin\n", __func__);
553
554 assert_spin_locked(&lo->inode->i_lock);
555 list_for_each_entry(lseg, &lo->segs, fi_list) {
556 if (is_matching_lseg(lseg, iomode)) {
557 ret = lseg;
558 break;
559 }
560 if (cmp_layout(iomode, lseg->range.iomode) > 0)
561 break;
562 }
563
564 dprintk("%s:Return lseg %p ref %d\n",
565 __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
566 return ret;
567}
568
569/*
570 * Layout segment is retreived from the server if not cached.
571 * The appropriate layout segment is referenced and returned to the caller.
572 */
573struct pnfs_layout_segment *
574pnfs_update_layout(struct inode *ino,
575 struct nfs_open_context *ctx,
576 enum pnfs_iomode iomode)
577{
578 struct nfs_inode *nfsi = NFS_I(ino);
579 struct pnfs_layout_hdr *lo;
580 struct pnfs_layout_segment *lseg = NULL;
581
582 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
583 return NULL;
584 spin_lock(&ino->i_lock);
585 lo = pnfs_find_alloc_layout(ino);
586 if (lo == NULL) {
587 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
588 goto out_unlock;
589 }
590
591 /* Check to see if the layout for the given range already exists */
592 lseg = pnfs_has_layout(lo, iomode);
593 if (lseg) {
594 dprintk("%s: Using cached lseg %p for iomode %d)\n",
595 __func__, lseg, iomode);
596 goto out_unlock;
597 }
598
599 /* if LAYOUTGET already failed once we don't try again */
600 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
601 goto out_unlock;
602
603 get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
604 spin_unlock(&ino->i_lock);
605
606 lseg = send_layoutget(lo, ctx, iomode);
607out:
608 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
609 nfsi->layout->state, lseg);
610 return lseg;
611out_unlock:
612 spin_unlock(&ino->i_lock);
613 goto out;
614}
615
616int
617pnfs_layout_process(struct nfs4_layoutget *lgp)
618{
619 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 struct nfs4_layoutget_res *res = &lgp->res;
621 struct pnfs_layout_segment *lseg;
622 struct inode *ino = lo->inode;
623 int status = 0;
624
625 /* Inject layout blob into I/O device driver */
626 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 if (!lseg || IS_ERR(lseg)) {
628 if (!lseg)
629 status = -ENOMEM;
630 else
631 status = PTR_ERR(lseg);
632 dprintk("%s: Could not allocate layout: error %d\n",
633 __func__, status);
634 goto out;
635 }
636
637 spin_lock(&ino->i_lock);
638 init_lseg(lo, lseg);
639 lseg->range = res->range;
640 *lgp->lsegpp = lseg;
641 pnfs_insert_layout(lo, lseg);
642
643 /* Done processing layoutget. Set the layout stateid */
644 pnfs_set_layout_stateid(lo, &res->stateid);
645 spin_unlock(&ino->i_lock);
646out:
647 return status;
648}
649
650/*
651 * Device ID cache. Currently supports one layout type per struct nfs_client.
652 * Add layout type to the lookup key to expand to support multiple types.
653 */
654int
655pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
656 void (*free_callback)(struct pnfs_deviceid_node *))
657{
658 struct pnfs_deviceid_cache *c;
659
660 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
661 if (!c)
662 return -ENOMEM;
663 spin_lock(&clp->cl_lock);
664 if (clp->cl_devid_cache != NULL) {
665 atomic_inc(&clp->cl_devid_cache->dc_ref);
666 dprintk("%s [kref [%d]]\n", __func__,
667 atomic_read(&clp->cl_devid_cache->dc_ref));
668 kfree(c);
669 } else {
670 /* kzalloc initializes hlists */
671 spin_lock_init(&c->dc_lock);
672 atomic_set(&c->dc_ref, 1);
673 c->dc_free_callback = free_callback;
674 clp->cl_devid_cache = c;
675 dprintk("%s [new]\n", __func__);
676 }
677 spin_unlock(&clp->cl_lock);
678 return 0;
679}
680EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
681
682/*
683 * Called from pnfs_layoutdriver_type->free_lseg
684 * last layout segment reference frees deviceid
685 */
686void
687pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
688 struct pnfs_deviceid_node *devid)
689{
690 struct nfs4_deviceid *id = &devid->de_id;
691 struct pnfs_deviceid_node *d;
692 struct hlist_node *n;
693 long h = nfs4_deviceid_hash(id);
694
695 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
696 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
697 return;
698
699 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
700 if (!memcmp(&d->de_id, id, sizeof(*id))) {
701 hlist_del_rcu(&d->de_node);
702 spin_unlock(&c->dc_lock);
703 synchronize_rcu();
704 c->dc_free_callback(devid);
705 return;
706 }
707 spin_unlock(&c->dc_lock);
708 /* Why wasn't it found in the list? */
709 BUG();
710}
711EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
712
713/* Find and reference a deviceid */
714struct pnfs_deviceid_node *
715pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
716{
717 struct pnfs_deviceid_node *d;
718 struct hlist_node *n;
719 long hash = nfs4_deviceid_hash(id);
720
721 dprintk("--> %s hash %ld\n", __func__, hash);
722 rcu_read_lock();
723 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
724 if (!memcmp(&d->de_id, id, sizeof(*id))) {
725 if (!atomic_inc_not_zero(&d->de_ref)) {
726 goto fail;
727 } else {
728 rcu_read_unlock();
729 return d;
730 }
731 }
732 }
733fail:
734 rcu_read_unlock();
735 return NULL;
736}
737EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
738
739/*
740 * Add a deviceid to the cache.
741 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
742 */
743struct pnfs_deviceid_node *
744pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
745{
746 struct pnfs_deviceid_node *d;
747 long hash = nfs4_deviceid_hash(&new->de_id);
748
749 dprintk("--> %s hash %ld\n", __func__, hash);
750 spin_lock(&c->dc_lock);
751 d = pnfs_find_get_deviceid(c, &new->de_id);
752 if (d) {
753 spin_unlock(&c->dc_lock);
754 dprintk("%s [discard]\n", __func__);
755 c->dc_free_callback(new);
756 return d;
757 }
758 INIT_HLIST_NODE(&new->de_node);
759 atomic_set(&new->de_ref, 1);
760 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
761 spin_unlock(&c->dc_lock);
762 dprintk("%s [new]\n", __func__);
763 return new;
764}
765EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
766
767void
768pnfs_put_deviceid_cache(struct nfs_client *clp)
769{
770 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
771
772 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
773 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
774 int i;
775 /* Verify cache is empty */
776 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
777 BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
778 clp->cl_devid_cache = NULL;
779 spin_unlock(&clp->cl_lock);
780 kfree(local);
781 }
782}
783EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
new file mode 100644
index 000000000000..e12367d50489
--- /dev/null
+++ b/fs/nfs/pnfs.h
@@ -0,0 +1,189 @@
1/*
2 * pNFS client data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H
32
33struct pnfs_layout_segment {
34 struct list_head fi_list;
35 struct pnfs_layout_range range;
36 struct kref kref;
37 struct pnfs_layout_hdr *layout;
38};
39
40#ifdef CONFIG_NFS_V4_1
41
42#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
43
44enum {
45 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
46 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
47 NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
48};
49
50/* Per-layout driver specific registration structure */
51struct pnfs_layoutdriver_type {
52 struct list_head pnfs_tblid;
53 const u32 id;
54 const char *name;
55 struct module *owner;
56 int (*set_layoutdriver) (struct nfs_server *);
57 int (*clear_layoutdriver) (struct nfs_server *);
58 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
59 void (*free_lseg) (struct pnfs_layout_segment *lseg);
60};
61
62struct pnfs_layout_hdr {
63 unsigned long refcount;
64 struct list_head layouts; /* other client layouts */
65 struct list_head segs; /* layout segments list */
66 seqlock_t seqlock; /* Protects the stateid */
67 nfs4_stateid stateid;
68 unsigned long state;
69 struct inode *inode;
70};
71
72struct pnfs_device {
73 struct nfs4_deviceid dev_id;
74 unsigned int layout_type;
75 unsigned int mincount;
76 struct page **pages;
77 void *area;
78 unsigned int pgbase;
79 unsigned int pglen;
80};
81
82/*
83 * Device ID RCU cache. A device ID is unique per client ID and layout type.
84 */
85#define NFS4_DEVICE_ID_HASH_BITS 5
86#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
87#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
88
89static inline u32
90nfs4_deviceid_hash(struct nfs4_deviceid *id)
91{
92 unsigned char *cptr = (unsigned char *)id->data;
93 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
94 u32 x = 0;
95
96 while (nbytes--) {
97 x *= 37;
98 x += *cptr++;
99 }
100 return x & NFS4_DEVICE_ID_HASH_MASK;
101}
102
103struct pnfs_deviceid_node {
104 struct hlist_node de_node;
105 struct nfs4_deviceid de_id;
106 atomic_t de_ref;
107};
108
109struct pnfs_deviceid_cache {
110 spinlock_t dc_lock;
111 atomic_t dc_ref;
112 void (*dc_free_callback)(struct pnfs_deviceid_node *);
113 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
114};
115
116extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
117 void (*free_callback)(struct pnfs_deviceid_node *));
118extern void pnfs_put_deviceid_cache(struct nfs_client *);
119extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
120 struct pnfs_deviceid_cache *,
121 struct nfs4_deviceid *);
122extern struct pnfs_deviceid_node *pnfs_add_deviceid(
123 struct pnfs_deviceid_cache *,
124 struct pnfs_deviceid_node *);
125extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
126 struct pnfs_deviceid_node *devid);
127
128extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
129extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
130
131/* nfs4proc.c */
132extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
133 struct pnfs_device *dev);
134extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
135
136/* pnfs.c */
137struct pnfs_layout_segment *
138pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
139 enum pnfs_iomode access_type);
140void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
141void unset_pnfs_layoutdriver(struct nfs_server *);
142int pnfs_layout_process(struct nfs4_layoutget *lgp);
143void pnfs_destroy_layout(struct nfs_inode *);
144void pnfs_destroy_all_layouts(struct nfs_client *);
145void put_layout_hdr(struct inode *inode);
146void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
147 struct nfs4_state *open_state);
148
149
150static inline int lo_fail_bit(u32 iomode)
151{
152 return iomode == IOMODE_RW ?
153 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
154}
155
156/* Return true if a layout driver is being used for this mountpoint */
157static inline int pnfs_enabled_sb(struct nfs_server *nfss)
158{
159 return nfss->pnfs_curr_ld != NULL;
160}
161
162#else /* CONFIG_NFS_V4_1 */
163
164static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
165{
166}
167
168static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
169{
170}
171
172static inline struct pnfs_layout_segment *
173pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
174 enum pnfs_iomode access_type)
175{
176 return NULL;
177}
178
179static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
180{
181}
182
183static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
184{
185}
186
187#endif /* CONFIG_NFS_V4_1 */
188
189#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 79859c81a943..e4b62c6f5a6e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -25,6 +25,7 @@
25#include "internal.h" 25#include "internal.h"
26#include "iostat.h" 26#include "iostat.h"
27#include "fscache.h" 27#include "fscache.h"
28#include "pnfs.h"
28 29
29#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
30 31
@@ -120,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
120 len = nfs_page_length(page); 121 len = nfs_page_length(page);
121 if (len == 0) 122 if (len == 0)
122 return nfs_return_empty_page(page); 123 return nfs_return_empty_page(page);
124 pnfs_update_layout(inode, ctx, IOMODE_READ);
123 new = nfs_create_request(ctx, inode, page, 0, len); 125 new = nfs_create_request(ctx, inode, page, 0, len);
124 if (IS_ERR(new)) { 126 if (IS_ERR(new)) {
125 unlock_page(page); 127 unlock_page(page);
@@ -624,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
624 if (ret == 0) 626 if (ret == 0)
625 goto read_complete; /* all pages were read */ 627 goto read_complete; /* all pages were read */
626 628
629 pnfs_update_layout(inode, desc.ctx, IOMODE_READ);
627 if (rsize < PAGE_CACHE_SIZE) 630 if (rsize < PAGE_CACHE_SIZE)
628 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 631 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
629 else 632 else
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 7cf4ddafb4ab..31a78fce4732 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -29,6 +29,18 @@ config NFSD
29 29
30 If unsure, say N. 30 If unsure, say N.
31 31
32config NFSD_DEPRECATED
33 bool "Include support for deprecated syscall interface to NFSD"
34 depends on NFSD
35 default y
36 help
37 The syscall interface to nfsd was obsoleted in 2.6.0 by a new
38 filesystem based interface. The old interface is due for removal
39 in 2.6.40. If you wish to remove the interface before then
40 say N.
41
42 In unsure, say Y.
43
32config NFSD_V2_ACL 44config NFSD_V2_ACL
33 bool 45 bool
34 depends on NFSD 46 depends on NFSD
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c2a4f71d87dd..c0fcb7ab7f6d 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -28,9 +28,6 @@
28typedef struct auth_domain svc_client; 28typedef struct auth_domain svc_client;
29typedef struct svc_export svc_export; 29typedef struct svc_export svc_export;
30 30
31static void exp_do_unexport(svc_export *unexp);
32static int exp_verify_string(char *cp, int max);
33
34/* 31/*
35 * We have two caches. 32 * We have two caches.
36 * One maps client+vfsmnt+dentry to export options - the export map 33 * One maps client+vfsmnt+dentry to export options - the export map
@@ -802,6 +799,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
802 return ek; 799 return ek;
803} 800}
804 801
802#ifdef CONFIG_NFSD_DEPRECATED
805static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, 803static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
806 struct svc_export *exp) 804 struct svc_export *exp)
807{ 805{
@@ -852,6 +850,7 @@ exp_get_fsid_key(svc_client *clp, int fsid)
852 850
853 return exp_find_key(clp, FSID_NUM, fsidv, NULL); 851 return exp_find_key(clp, FSID_NUM, fsidv, NULL);
854} 852}
853#endif
855 854
856static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, 855static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
857 struct cache_req *reqp) 856 struct cache_req *reqp)
@@ -893,6 +892,7 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path)
893 return exp; 892 return exp;
894} 893}
895 894
895#ifdef CONFIG_NFSD_DEPRECATED
896/* 896/*
897 * Hashtable locking. Write locks are placed only by user processes 897 * Hashtable locking. Write locks are placed only by user processes
898 * wanting to modify export information. 898 * wanting to modify export information.
@@ -925,6 +925,19 @@ exp_writeunlock(void)
925{ 925{
926 up_write(&hash_sem); 926 up_write(&hash_sem);
927} 927}
928#else
929
930/* hash_sem not needed once deprecated interface is removed */
931void exp_readlock(void) {}
932static inline void exp_writelock(void){}
933void exp_readunlock(void) {}
934static inline void exp_writeunlock(void){}
935
936#endif
937
938#ifdef CONFIG_NFSD_DEPRECATED
939static void exp_do_unexport(svc_export *unexp);
940static int exp_verify_string(char *cp, int max);
928 941
929static void exp_fsid_unhash(struct svc_export *exp) 942static void exp_fsid_unhash(struct svc_export *exp)
930{ 943{
@@ -935,10 +948,9 @@ static void exp_fsid_unhash(struct svc_export *exp)
935 948
936 ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); 949 ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
937 if (!IS_ERR(ek)) { 950 if (!IS_ERR(ek)) {
938 ek->h.expiry_time = get_seconds()-1; 951 sunrpc_invalidate(&ek->h, &svc_expkey_cache);
939 cache_put(&ek->h, &svc_expkey_cache); 952 cache_put(&ek->h, &svc_expkey_cache);
940 } 953 }
941 svc_expkey_cache.nextcheck = get_seconds();
942} 954}
943 955
944static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) 956static int exp_fsid_hash(svc_client *clp, struct svc_export *exp)
@@ -973,10 +985,9 @@ static void exp_unhash(struct svc_export *exp)
973 985
974 ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); 986 ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
975 if (!IS_ERR(ek)) { 987 if (!IS_ERR(ek)) {
976 ek->h.expiry_time = get_seconds()-1; 988 sunrpc_invalidate(&ek->h, &svc_expkey_cache);
977 cache_put(&ek->h, &svc_expkey_cache); 989 cache_put(&ek->h, &svc_expkey_cache);
978 } 990 }
979 svc_expkey_cache.nextcheck = get_seconds();
980} 991}
981 992
982/* 993/*
@@ -1097,8 +1108,7 @@ out:
1097static void 1108static void
1098exp_do_unexport(svc_export *unexp) 1109exp_do_unexport(svc_export *unexp)
1099{ 1110{
1100 unexp->h.expiry_time = get_seconds()-1; 1111 sunrpc_invalidate(&unexp->h, &svc_export_cache);
1101 svc_export_cache.nextcheck = get_seconds();
1102 exp_unhash(unexp); 1112 exp_unhash(unexp);
1103 exp_fsid_unhash(unexp); 1113 exp_fsid_unhash(unexp);
1104} 1114}
@@ -1150,6 +1160,7 @@ out_unlock:
1150 exp_writeunlock(); 1160 exp_writeunlock();
1151 return err; 1161 return err;
1152} 1162}
1163#endif /* CONFIG_NFSD_DEPRECATED */
1153 1164
1154/* 1165/*
1155 * Obtain the root fh on behalf of a client. 1166 * Obtain the root fh on behalf of a client.
@@ -1459,25 +1470,43 @@ static void show_secinfo_flags(struct seq_file *m, int flags)
1459 show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); 1470 show_expflags(m, flags, NFSEXP_SECINFO_FLAGS);
1460} 1471}
1461 1472
1473static bool secinfo_flags_equal(int f, int g)
1474{
1475 f &= NFSEXP_SECINFO_FLAGS;
1476 g &= NFSEXP_SECINFO_FLAGS;
1477 return f == g;
1478}
1479
1480static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end)
1481{
1482 int flags;
1483
1484 flags = (*fp)->flags;
1485 seq_printf(m, ",sec=%d", (*fp)->pseudoflavor);
1486 (*fp)++;
1487 while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) {
1488 seq_printf(m, ":%d", (*fp)->pseudoflavor);
1489 (*fp)++;
1490 }
1491 return flags;
1492}
1493
1462static void show_secinfo(struct seq_file *m, struct svc_export *exp) 1494static void show_secinfo(struct seq_file *m, struct svc_export *exp)
1463{ 1495{
1464 struct exp_flavor_info *f; 1496 struct exp_flavor_info *f;
1465 struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; 1497 struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
1466 int lastflags = 0, first = 0; 1498 int flags;
1467 1499
1468 if (exp->ex_nflavors == 0) 1500 if (exp->ex_nflavors == 0)
1469 return; 1501 return;
1470 for (f = exp->ex_flavors; f < end; f++) { 1502 f = exp->ex_flavors;
1471 if (first || f->flags != lastflags) { 1503 flags = show_secinfo_run(m, &f, end);
1472 if (!first) 1504 if (!secinfo_flags_equal(flags, exp->ex_flags))
1473 show_secinfo_flags(m, lastflags); 1505 show_secinfo_flags(m, flags);
1474 seq_printf(m, ",sec=%d", f->pseudoflavor); 1506 while (f != end) {
1475 lastflags = f->flags; 1507 flags = show_secinfo_run(m, &f, end);
1476 } else { 1508 show_secinfo_flags(m, flags);
1477 seq_printf(m, ":%d", f->pseudoflavor);
1478 }
1479 } 1509 }
1480 show_secinfo_flags(m, lastflags);
1481} 1510}
1482 1511
1483static void exp_flags(struct seq_file *m, int flag, int fsid, 1512static void exp_flags(struct seq_file *m, int flag, int fsid,
@@ -1532,6 +1561,7 @@ const struct seq_operations nfs_exports_op = {
1532 .show = e_show, 1561 .show = e_show,
1533}; 1562};
1534 1563
1564#ifdef CONFIG_NFSD_DEPRECATED
1535/* 1565/*
1536 * Add or modify a client. 1566 * Add or modify a client.
1537 * Change requests may involve the list of host addresses. The list of 1567 * Change requests may involve the list of host addresses. The list of
@@ -1563,7 +1593,7 @@ exp_addclient(struct nfsctl_client *ncp)
1563 /* Insert client into hashtable. */ 1593 /* Insert client into hashtable. */
1564 for (i = 0; i < ncp->cl_naddr; i++) { 1594 for (i = 0; i < ncp->cl_naddr; i++) {
1565 ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); 1595 ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6);
1566 auth_unix_add_addr(&addr6, dom); 1596 auth_unix_add_addr(&init_net, &addr6, dom);
1567 } 1597 }
1568 auth_unix_forget_old(dom); 1598 auth_unix_forget_old(dom);
1569 auth_domain_put(dom); 1599 auth_domain_put(dom);
@@ -1621,6 +1651,7 @@ exp_verify_string(char *cp, int max)
1621 printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); 1651 printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp);
1622 return 0; 1652 return 0;
1623} 1653}
1654#endif /* CONFIG_NFSD_DEPRECATED */
1624 1655
1625/* 1656/*
1626 * Initialize the exports module. 1657 * Initialize the exports module.
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 988cbb3a19b6..143da2eecd7b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -41,7 +41,6 @@
41 41
42#define NFSPROC4_CB_NULL 0 42#define NFSPROC4_CB_NULL 0
43#define NFSPROC4_CB_COMPOUND 1 43#define NFSPROC4_CB_COMPOUND 1
44#define NFS4_STATEID_SIZE 16
45 44
46/* Index of predefined Linux callback client operations */ 45/* Index of predefined Linux callback client operations */
47 46
@@ -248,10 +247,11 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
248} 247}
249 248
250static void 249static void
251encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, 250encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
252 struct nfs4_cb_compound_hdr *hdr) 251 struct nfs4_cb_compound_hdr *hdr)
253{ 252{
254 __be32 *p; 253 __be32 *p;
254 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
255 255
256 if (hdr->minorversion == 0) 256 if (hdr->minorversion == 0)
257 return; 257 return;
@@ -259,8 +259,8 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
259 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); 259 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
260 260
261 WRITE32(OP_CB_SEQUENCE); 261 WRITE32(OP_CB_SEQUENCE);
262 WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); 262 WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN);
263 WRITE32(args->cbs_clp->cl_cb_seq_nr); 263 WRITE32(ses->se_cb_seq_nr);
264 WRITE32(0); /* slotid, always 0 */ 264 WRITE32(0); /* slotid, always 0 */
265 WRITE32(0); /* highest slotid always 0 */ 265 WRITE32(0); /* highest slotid always 0 */
266 WRITE32(0); /* cachethis always 0 */ 266 WRITE32(0); /* cachethis always 0 */
@@ -280,18 +280,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
280 280
281static int 281static int
282nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, 282nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
283 struct nfs4_rpc_args *rpc_args) 283 struct nfsd4_callback *cb)
284{ 284{
285 struct xdr_stream xdr; 285 struct xdr_stream xdr;
286 struct nfs4_delegation *args = rpc_args->args_op; 286 struct nfs4_delegation *args = cb->cb_op;
287 struct nfs4_cb_compound_hdr hdr = { 287 struct nfs4_cb_compound_hdr hdr = {
288 .ident = args->dl_ident, 288 .ident = cb->cb_clp->cl_cb_ident,
289 .minorversion = rpc_args->args_seq.cbs_minorversion, 289 .minorversion = cb->cb_minorversion,
290 }; 290 };
291 291
292 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 292 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
293 encode_cb_compound_hdr(&xdr, &hdr); 293 encode_cb_compound_hdr(&xdr, &hdr);
294 encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr); 294 encode_cb_sequence(&xdr, cb, &hdr);
295 encode_cb_recall(&xdr, args, &hdr); 295 encode_cb_recall(&xdr, args, &hdr);
296 encode_cb_nops(&hdr); 296 encode_cb_nops(&hdr);
297 return 0; 297 return 0;
@@ -339,15 +339,16 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
339 * with a single slot. 339 * with a single slot.
340 */ 340 */
341static int 341static int
342decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, 342decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
343 struct rpc_rqst *rqstp) 343 struct rpc_rqst *rqstp)
344{ 344{
345 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
345 struct nfs4_sessionid id; 346 struct nfs4_sessionid id;
346 int status; 347 int status;
347 u32 dummy; 348 u32 dummy;
348 __be32 *p; 349 __be32 *p;
349 350
350 if (res->cbs_minorversion == 0) 351 if (cb->cb_minorversion == 0)
351 return 0; 352 return 0;
352 353
353 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); 354 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
@@ -363,13 +364,12 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
363 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); 364 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
364 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 365 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
365 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); 366 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
366 if (memcmp(id.data, res->cbs_clp->cl_sessionid.data, 367 if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
367 NFS4_MAX_SESSIONID_LEN)) {
368 dprintk("%s Invalid session id\n", __func__); 368 dprintk("%s Invalid session id\n", __func__);
369 goto out; 369 goto out;
370 } 370 }
371 READ32(dummy); 371 READ32(dummy);
372 if (dummy != res->cbs_clp->cl_cb_seq_nr) { 372 if (dummy != ses->se_cb_seq_nr) {
373 dprintk("%s Invalid sequence number\n", __func__); 373 dprintk("%s Invalid sequence number\n", __func__);
374 goto out; 374 goto out;
375 } 375 }
@@ -393,7 +393,7 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
393 393
394static int 394static int
395nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, 395nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
396 struct nfsd4_cb_sequence *seq) 396 struct nfsd4_callback *cb)
397{ 397{
398 struct xdr_stream xdr; 398 struct xdr_stream xdr;
399 struct nfs4_cb_compound_hdr hdr; 399 struct nfs4_cb_compound_hdr hdr;
@@ -403,8 +403,8 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
403 status = decode_cb_compound_hdr(&xdr, &hdr); 403 status = decode_cb_compound_hdr(&xdr, &hdr);
404 if (status) 404 if (status)
405 goto out; 405 goto out;
406 if (seq) { 406 if (cb) {
407 status = decode_cb_sequence(&xdr, seq, rqstp); 407 status = decode_cb_sequence(&xdr, cb, rqstp);
408 if (status) 408 if (status)
409 goto out; 409 goto out;
410 } 410 }
@@ -473,30 +473,34 @@ static int max_cb_time(void)
473/* Reference counting, callback cleanup, etc., all look racy as heck. 473/* Reference counting, callback cleanup, etc., all look racy as heck.
474 * And why is cl_cb_set an atomic? */ 474 * And why is cl_cb_set an atomic? */
475 475
476int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) 476int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
477{ 477{
478 struct rpc_timeout timeparms = { 478 struct rpc_timeout timeparms = {
479 .to_initval = max_cb_time(), 479 .to_initval = max_cb_time(),
480 .to_retries = 0, 480 .to_retries = 0,
481 }; 481 };
482 struct rpc_create_args args = { 482 struct rpc_create_args args = {
483 .protocol = XPRT_TRANSPORT_TCP, 483 .net = &init_net,
484 .address = (struct sockaddr *) &cb->cb_addr, 484 .address = (struct sockaddr *) &conn->cb_addr,
485 .addrsize = cb->cb_addrlen, 485 .addrsize = conn->cb_addrlen,
486 .timeout = &timeparms, 486 .timeout = &timeparms,
487 .program = &cb_program, 487 .program = &cb_program,
488 .prognumber = cb->cb_prog,
489 .version = 0, 488 .version = 0,
490 .authflavor = clp->cl_flavor, 489 .authflavor = clp->cl_flavor,
491 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 490 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
492 .client_name = clp->cl_principal,
493 }; 491 };
494 struct rpc_clnt *client; 492 struct rpc_clnt *client;
495 493
496 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 494 if (clp->cl_minorversion == 0) {
497 return -EINVAL; 495 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
498 if (cb->cb_minorversion) { 496 return -EINVAL;
499 args.bc_xprt = cb->cb_xprt; 497 args.client_name = clp->cl_principal;
498 args.prognumber = conn->cb_prog,
499 args.protocol = XPRT_TRANSPORT_TCP;
500 clp->cl_cb_ident = conn->cb_ident;
501 } else {
502 args.bc_xprt = conn->cb_xprt;
503 args.prognumber = clp->cl_cb_session->se_cb_prog;
500 args.protocol = XPRT_TRANSPORT_BC_TCP; 504 args.protocol = XPRT_TRANSPORT_BC_TCP;
501 } 505 }
502 /* Create RPC client */ 506 /* Create RPC client */
@@ -506,7 +510,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
506 PTR_ERR(client)); 510 PTR_ERR(client));
507 return PTR_ERR(client); 511 return PTR_ERR(client);
508 } 512 }
509 nfsd4_set_callback_client(clp, client); 513 clp->cl_cb_client = client;
510 return 0; 514 return 0;
511 515
512} 516}
@@ -519,7 +523,7 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason)
519 523
520static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) 524static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
521{ 525{
522 struct nfs4_client *clp = calldata; 526 struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
523 527
524 if (task->tk_status) 528 if (task->tk_status)
525 warn_no_callback_path(clp, task->tk_status); 529 warn_no_callback_path(clp, task->tk_status);
@@ -528,6 +532,8 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
528} 532}
529 533
530static const struct rpc_call_ops nfsd4_cb_probe_ops = { 534static const struct rpc_call_ops nfsd4_cb_probe_ops = {
535 /* XXX: release method to ensure we set the cb channel down if
536 * necessary on early failure? */
531 .rpc_call_done = nfsd4_cb_probe_done, 537 .rpc_call_done = nfsd4_cb_probe_done,
532}; 538};
533 539
@@ -543,38 +549,42 @@ int set_callback_cred(void)
543 return 0; 549 return 0;
544} 550}
545 551
552static struct workqueue_struct *callback_wq;
546 553
547void do_probe_callback(struct nfs4_client *clp) 554static void do_probe_callback(struct nfs4_client *clp)
548{ 555{
549 struct rpc_message msg = { 556 struct nfsd4_callback *cb = &clp->cl_cb_null;
550 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
551 .rpc_argp = clp,
552 .rpc_cred = callback_cred
553 };
554 int status;
555 557
556 status = rpc_call_async(clp->cl_cb_client, &msg, 558 cb->cb_op = NULL;
557 RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 559 cb->cb_clp = clp;
558 &nfsd4_cb_probe_ops, (void *)clp); 560
559 if (status) 561 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
560 warn_no_callback_path(clp, status); 562 cb->cb_msg.rpc_argp = NULL;
563 cb->cb_msg.rpc_resp = NULL;
564 cb->cb_msg.rpc_cred = callback_cred;
565
566 cb->cb_ops = &nfsd4_cb_probe_ops;
567
568 queue_work(callback_wq, &cb->cb_work);
561} 569}
562 570
563/* 571/*
564 * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... 572 * Poke the callback thread to process any updates to the callback
573 * parameters, and send a null probe.
565 */ 574 */
566void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb) 575void nfsd4_probe_callback(struct nfs4_client *clp)
567{ 576{
568 int status; 577 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
578 do_probe_callback(clp);
579}
569 580
581void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
582{
570 BUG_ON(atomic_read(&clp->cl_cb_set)); 583 BUG_ON(atomic_read(&clp->cl_cb_set));
571 584
572 status = setup_callback_client(clp, cb); 585 spin_lock(&clp->cl_lock);
573 if (status) { 586 memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
574 warn_no_callback_path(clp, status); 587 spin_unlock(&clp->cl_lock);
575 return;
576 }
577 do_probe_callback(clp);
578} 588}
579 589
580/* 590/*
@@ -585,8 +595,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
585static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, 595static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
586 struct rpc_task *task) 596 struct rpc_task *task)
587{ 597{
588 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; 598 u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data;
589 u32 *ptr = (u32 *)clp->cl_sessionid.data;
590 int status = 0; 599 int status = 0;
591 600
592 dprintk("%s: %u:%u:%u:%u\n", __func__, 601 dprintk("%s: %u:%u:%u:%u\n", __func__,
@@ -598,14 +607,6 @@ static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
598 status = -EAGAIN; 607 status = -EAGAIN;
599 goto out; 608 goto out;
600 } 609 }
601
602 /*
603 * We'll need the clp during XDR encoding and decoding,
604 * and the sequence during decoding to verify the reply
605 */
606 args->args_seq.cbs_clp = clp;
607 task->tk_msg.rpc_resp = &args->args_seq;
608
609out: 610out:
610 dprintk("%s status=%d\n", __func__, status); 611 dprintk("%s status=%d\n", __func__, status);
611 return status; 612 return status;
@@ -617,13 +618,13 @@ out:
617 */ 618 */
618static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) 619static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
619{ 620{
620 struct nfs4_delegation *dp = calldata; 621 struct nfsd4_callback *cb = calldata;
622 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
621 struct nfs4_client *clp = dp->dl_client; 623 struct nfs4_client *clp = dp->dl_client;
622 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; 624 u32 minorversion = clp->cl_minorversion;
623 u32 minorversion = clp->cl_cb_conn.cb_minorversion;
624 int status = 0; 625 int status = 0;
625 626
626 args->args_seq.cbs_minorversion = minorversion; 627 cb->cb_minorversion = minorversion;
627 if (minorversion) { 628 if (minorversion) {
628 status = nfsd41_cb_setup_sequence(clp, task); 629 status = nfsd41_cb_setup_sequence(clp, task);
629 if (status) { 630 if (status) {
@@ -640,19 +641,20 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
640 641
641static void nfsd4_cb_done(struct rpc_task *task, void *calldata) 642static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
642{ 643{
643 struct nfs4_delegation *dp = calldata; 644 struct nfsd4_callback *cb = calldata;
645 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
644 struct nfs4_client *clp = dp->dl_client; 646 struct nfs4_client *clp = dp->dl_client;
645 647
646 dprintk("%s: minorversion=%d\n", __func__, 648 dprintk("%s: minorversion=%d\n", __func__,
647 clp->cl_cb_conn.cb_minorversion); 649 clp->cl_minorversion);
648 650
649 if (clp->cl_cb_conn.cb_minorversion) { 651 if (clp->cl_minorversion) {
650 /* No need for lock, access serialized in nfsd4_cb_prepare */ 652 /* No need for lock, access serialized in nfsd4_cb_prepare */
651 ++clp->cl_cb_seq_nr; 653 ++clp->cl_cb_session->se_cb_seq_nr;
652 clear_bit(0, &clp->cl_cb_slot_busy); 654 clear_bit(0, &clp->cl_cb_slot_busy);
653 rpc_wake_up_next(&clp->cl_cb_waitq); 655 rpc_wake_up_next(&clp->cl_cb_waitq);
654 dprintk("%s: freed slot, new seqid=%d\n", __func__, 656 dprintk("%s: freed slot, new seqid=%d\n", __func__,
655 clp->cl_cb_seq_nr); 657 clp->cl_cb_session->se_cb_seq_nr);
656 658
657 /* We're done looking into the sequence information */ 659 /* We're done looking into the sequence information */
658 task->tk_msg.rpc_resp = NULL; 660 task->tk_msg.rpc_resp = NULL;
@@ -662,7 +664,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
662 664
663static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 665static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
664{ 666{
665 struct nfs4_delegation *dp = calldata; 667 struct nfsd4_callback *cb = calldata;
668 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
666 struct nfs4_client *clp = dp->dl_client; 669 struct nfs4_client *clp = dp->dl_client;
667 struct rpc_clnt *current_rpc_client = clp->cl_cb_client; 670 struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
668 671
@@ -707,7 +710,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
707 710
708static void nfsd4_cb_recall_release(void *calldata) 711static void nfsd4_cb_recall_release(void *calldata)
709{ 712{
710 struct nfs4_delegation *dp = calldata; 713 struct nfsd4_callback *cb = calldata;
714 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
711 715
712 nfs4_put_delegation(dp); 716 nfs4_put_delegation(dp);
713} 717}
@@ -718,8 +722,6 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
718 .rpc_release = nfsd4_cb_recall_release, 722 .rpc_release = nfsd4_cb_recall_release,
719}; 723};
720 724
721static struct workqueue_struct *callback_wq;
722
723int nfsd4_create_callback_queue(void) 725int nfsd4_create_callback_queue(void)
724{ 726{
725 callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); 727 callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
@@ -734,57 +736,88 @@ void nfsd4_destroy_callback_queue(void)
734} 736}
735 737
736/* must be called under the state lock */ 738/* must be called under the state lock */
737void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) 739void nfsd4_shutdown_callback(struct nfs4_client *clp)
738{ 740{
739 struct rpc_clnt *old = clp->cl_cb_client; 741 set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags);
740
741 clp->cl_cb_client = new;
742 /* 742 /*
743 * After this, any work that saw the old value of cl_cb_client will 743 * Note this won't actually result in a null callback;
744 * be gone: 744 * instead, nfsd4_do_callback_rpc() will detect the killed
745 * client, destroy the rpc client, and stop:
745 */ 746 */
747 do_probe_callback(clp);
746 flush_workqueue(callback_wq); 748 flush_workqueue(callback_wq);
747 /* So we can safely shut it down: */
748 if (old)
749 rpc_shutdown_client(old);
750} 749}
751 750
752/* 751void nfsd4_release_cb(struct nfsd4_callback *cb)
753 * called with dp->dl_count inc'ed.
754 */
755static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
756{ 752{
757 struct nfs4_client *clp = dp->dl_client; 753 if (cb->cb_ops->rpc_release)
758 struct rpc_clnt *clnt = clp->cl_cb_client; 754 cb->cb_ops->rpc_release(cb);
759 struct nfs4_rpc_args *args = &dp->dl_recall.cb_args; 755}
760 struct rpc_message msg = {
761 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
762 .rpc_cred = callback_cred
763 };
764 756
765 if (clnt == NULL) { 757void nfsd4_process_cb_update(struct nfsd4_callback *cb)
766 nfs4_put_delegation(dp); 758{
767 return; /* Client is shutting down; give up. */ 759 struct nfs4_cb_conn conn;
760 struct nfs4_client *clp = cb->cb_clp;
761 int err;
762
763 /*
764 * This is either an update, or the client dying; in either case,
765 * kill the old client:
766 */
767 if (clp->cl_cb_client) {
768 rpc_shutdown_client(clp->cl_cb_client);
769 clp->cl_cb_client = NULL;
768 } 770 }
771 if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags))
772 return;
773 spin_lock(&clp->cl_lock);
774 /*
775 * Only serialized callback code is allowed to clear these
776 * flags; main nfsd code can only set them:
777 */
778 BUG_ON(!clp->cl_cb_flags);
779 clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
780 memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
781 spin_unlock(&clp->cl_lock);
769 782
770 args->args_op = dp; 783 err = setup_callback_client(clp, &conn);
771 msg.rpc_argp = args; 784 if (err)
772 dp->dl_retries = 1; 785 warn_no_callback_path(clp, err);
773 rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp);
774} 786}
775 787
776void nfsd4_do_callback_rpc(struct work_struct *w) 788void nfsd4_do_callback_rpc(struct work_struct *w)
777{ 789{
778 /* XXX: for now, just send off delegation recall. */ 790 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
779 /* In future, generalize to handle any sort of callback. */ 791 struct nfs4_client *clp = cb->cb_clp;
780 struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work); 792 struct rpc_clnt *clnt;
781 struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
782 793
783 _nfsd4_cb_recall(dp); 794 if (clp->cl_cb_flags)
784} 795 nfsd4_process_cb_update(cb);
785 796
797 clnt = clp->cl_cb_client;
798 if (!clnt) {
799 /* Callback channel broken, or client killed; give up: */
800 nfsd4_release_cb(cb);
801 return;
802 }
803 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
804 cb->cb_ops, cb);
805}
786 806
787void nfsd4_cb_recall(struct nfs4_delegation *dp) 807void nfsd4_cb_recall(struct nfs4_delegation *dp)
788{ 808{
809 struct nfsd4_callback *cb = &dp->dl_recall;
810
811 dp->dl_retries = 1;
812 cb->cb_op = dp;
813 cb->cb_clp = dp->dl_client;
814 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL];
815 cb->cb_msg.rpc_argp = cb;
816 cb->cb_msg.rpc_resp = cb;
817 cb->cb_msg.rpc_cred = callback_cred;
818
819 cb->cb_ops = &nfsd4_cb_recall_ops;
820 dp->dl_retries = 1;
821
789 queue_work(callback_wq, &dp->dl_recall.cb_work); 822 queue_work(callback_wq, &dp->dl_recall.cb_work);
790} 823}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index c78dbf493424..f0695e815f0e 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -482,109 +482,26 @@ nfsd_idmap_shutdown(void)
482 cache_unregister(&nametoid_cache); 482 cache_unregister(&nametoid_cache);
483} 483}
484 484
485/*
486 * Deferred request handling
487 */
488
489struct idmap_defer_req {
490 struct cache_req req;
491 struct cache_deferred_req deferred_req;
492 wait_queue_head_t waitq;
493 atomic_t count;
494};
495
496static inline void
497put_mdr(struct idmap_defer_req *mdr)
498{
499 if (atomic_dec_and_test(&mdr->count))
500 kfree(mdr);
501}
502
503static inline void
504get_mdr(struct idmap_defer_req *mdr)
505{
506 atomic_inc(&mdr->count);
507}
508
509static void
510idmap_revisit(struct cache_deferred_req *dreq, int toomany)
511{
512 struct idmap_defer_req *mdr =
513 container_of(dreq, struct idmap_defer_req, deferred_req);
514
515 wake_up(&mdr->waitq);
516 put_mdr(mdr);
517}
518
519static struct cache_deferred_req *
520idmap_defer(struct cache_req *req)
521{
522 struct idmap_defer_req *mdr =
523 container_of(req, struct idmap_defer_req, req);
524
525 mdr->deferred_req.revisit = idmap_revisit;
526 get_mdr(mdr);
527 return (&mdr->deferred_req);
528}
529
530static inline int
531do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key,
532 struct cache_detail *detail, struct ent **item,
533 struct idmap_defer_req *mdr)
534{
535 *item = lookup_fn(key);
536 if (!*item)
537 return -ENOMEM;
538 return cache_check(detail, &(*item)->h, &mdr->req);
539}
540
541static inline int
542do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
543 struct ent *key, struct cache_detail *detail,
544 struct ent **item)
545{
546 int ret = -ENOMEM;
547
548 *item = lookup_fn(key);
549 if (!*item)
550 goto out_err;
551 ret = -ETIMEDOUT;
552 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
553 || (*item)->h.expiry_time < get_seconds()
554 || detail->flush_time > (*item)->h.last_refresh)
555 goto out_put;
556 ret = -ENOENT;
557 if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
558 goto out_put;
559 return 0;
560out_put:
561 cache_put(&(*item)->h, detail);
562out_err:
563 *item = NULL;
564 return ret;
565}
566
567static int 485static int
568idmap_lookup(struct svc_rqst *rqstp, 486idmap_lookup(struct svc_rqst *rqstp,
569 struct ent *(*lookup_fn)(struct ent *), struct ent *key, 487 struct ent *(*lookup_fn)(struct ent *), struct ent *key,
570 struct cache_detail *detail, struct ent **item) 488 struct cache_detail *detail, struct ent **item)
571{ 489{
572 struct idmap_defer_req *mdr;
573 int ret; 490 int ret;
574 491
575 mdr = kzalloc(sizeof(*mdr), GFP_KERNEL); 492 *item = lookup_fn(key);
576 if (!mdr) 493 if (!*item)
577 return -ENOMEM; 494 return -ENOMEM;
578 atomic_set(&mdr->count, 1); 495 retry:
579 init_waitqueue_head(&mdr->waitq); 496 ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle);
580 mdr->req.defer = idmap_defer; 497
581 ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr); 498 if (ret == -ETIMEDOUT) {
582 if (ret == -EAGAIN) { 499 struct ent *prev_item = *item;
583 wait_event_interruptible_timeout(mdr->waitq, 500 *item = lookup_fn(key);
584 test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ); 501 if (*item != prev_item)
585 ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item); 502 goto retry;
503 cache_put(&(*item)->h, detail);
586 } 504 }
587 put_mdr(mdr);
588 return ret; 505 return ret;
589} 506}
590 507
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 59ec449b0c7f..0cdfd022bb7b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1031,8 +1031,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1031 resp->cstate.session = NULL; 1031 resp->cstate.session = NULL;
1032 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); 1032 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
1033 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); 1033 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
1034 /* Use the deferral mechanism only for NFSv4.0 compounds */ 1034 /*
1035 rqstp->rq_usedeferral = (args->minorversion == 0); 1035 * Don't use the deferral mechanism for NFSv4; compounds make it
1036 * too hard to avoid non-idempotency problems.
1037 */
1038 rqstp->rq_usedeferral = 0;
1036 1039
1037 /* 1040 /*
1038 * According to RFC3010, this takes precedence over all other errors. 1041 * According to RFC3010, this takes precedence over all other errors.
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a7292fcf7718..9019e8ec9dc8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -207,7 +207,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
207{ 207{
208 struct nfs4_delegation *dp; 208 struct nfs4_delegation *dp;
209 struct nfs4_file *fp = stp->st_file; 209 struct nfs4_file *fp = stp->st_file;
210 struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
211 210
212 dprintk("NFSD alloc_init_deleg\n"); 211 dprintk("NFSD alloc_init_deleg\n");
213 /* 212 /*
@@ -234,7 +233,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
234 nfs4_file_get_access(fp, O_RDONLY); 233 nfs4_file_get_access(fp, O_RDONLY);
235 dp->dl_flock = NULL; 234 dp->dl_flock = NULL;
236 dp->dl_type = type; 235 dp->dl_type = type;
237 dp->dl_ident = cb->cb_ident;
238 dp->dl_stateid.si_boot = boot_time; 236 dp->dl_stateid.si_boot = boot_time;
239 dp->dl_stateid.si_stateownerid = current_delegid++; 237 dp->dl_stateid.si_stateownerid = current_delegid++;
240 dp->dl_stateid.si_fileid = 0; 238 dp->dl_stateid.si_fileid = 0;
@@ -535,171 +533,258 @@ gen_sessionid(struct nfsd4_session *ses)
535 */ 533 */
536#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) 534#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
537 535
536static void
537free_session_slots(struct nfsd4_session *ses)
538{
539 int i;
540
541 for (i = 0; i < ses->se_fchannel.maxreqs; i++)
542 kfree(ses->se_slots[i]);
543}
544
538/* 545/*
539 * Give the client the number of ca_maxresponsesize_cached slots it 546 * We don't actually need to cache the rpc and session headers, so we
540 * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, 547 * can allocate a little less for each slot:
541 * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more 548 */
542 * than NFSD_MAX_SLOTS_PER_SESSION. 549static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
543 * 550{
544 * If we run out of reserved DRC memory we should (up to a point) 551 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
552}
553
554static int nfsd4_sanitize_slot_size(u32 size)
555{
556 size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */
557 size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE);
558
559 return size;
560}
561
562/*
563 * XXX: If we run out of reserved DRC memory we could (up to a point)
545 * re-negotiate active sessions and reduce their slot usage to make 564 * re-negotiate active sessions and reduce their slot usage to make
546 * rooom for new connections. For now we just fail the create session. 565 * rooom for new connections. For now we just fail the create session.
547 */ 566 */
548static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) 567static int nfsd4_get_drc_mem(int slotsize, u32 num)
549{ 568{
550 int mem, size = fchan->maxresp_cached; 569 int avail;
551 570
552 if (fchan->maxreqs < 1) 571 num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
553 return nfserr_inval;
554 572
555 if (size < NFSD_MIN_HDR_SEQ_SZ) 573 spin_lock(&nfsd_drc_lock);
556 size = NFSD_MIN_HDR_SEQ_SZ; 574 avail = min_t(int, NFSD_MAX_MEM_PER_SESSION,
557 size -= NFSD_MIN_HDR_SEQ_SZ; 575 nfsd_drc_max_mem - nfsd_drc_mem_used);
558 if (size > NFSD_SLOT_CACHE_SIZE) 576 num = min_t(int, num, avail / slotsize);
559 size = NFSD_SLOT_CACHE_SIZE; 577 nfsd_drc_mem_used += num * slotsize;
560 578 spin_unlock(&nfsd_drc_lock);
561 /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
562 mem = fchan->maxreqs * size;
563 if (mem > NFSD_MAX_MEM_PER_SESSION) {
564 fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
565 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
566 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
567 mem = fchan->maxreqs * size;
568 }
569 579
580 return num;
581}
582
583static void nfsd4_put_drc_mem(int slotsize, int num)
584{
570 spin_lock(&nfsd_drc_lock); 585 spin_lock(&nfsd_drc_lock);
571 /* bound the total session drc memory ussage */ 586 nfsd_drc_mem_used -= slotsize * num;
572 if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
573 fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
574 mem = fchan->maxreqs * size;
575 }
576 nfsd_drc_mem_used += mem;
577 spin_unlock(&nfsd_drc_lock); 587 spin_unlock(&nfsd_drc_lock);
588}
578 589
579 if (fchan->maxreqs == 0) 590static struct nfsd4_session *alloc_session(int slotsize, int numslots)
580 return nfserr_jukebox; 591{
592 struct nfsd4_session *new;
593 int mem, i;
581 594
582 fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; 595 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
583 return 0; 596 + sizeof(struct nfsd4_session) > PAGE_SIZE);
597 mem = numslots * sizeof(struct nfsd4_slot *);
598
599 new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
600 if (!new)
601 return NULL;
602 /* allocate each struct nfsd4_slot and data cache in one piece */
603 for (i = 0; i < numslots; i++) {
604 mem = sizeof(struct nfsd4_slot) + slotsize;
605 new->se_slots[i] = kzalloc(mem, GFP_KERNEL);
606 if (!new->se_slots[i])
607 goto out_free;
608 }
609 return new;
610out_free:
611 while (i--)
612 kfree(new->se_slots[i]);
613 kfree(new);
614 return NULL;
584} 615}
585 616
586/* 617static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize)
587 * fchan holds the client values on input, and the server values on output
588 * sv_max_mesg is the maximum payload plus one page for overhead.
589 */
590static int init_forechannel_attrs(struct svc_rqst *rqstp,
591 struct nfsd4_channel_attrs *session_fchan,
592 struct nfsd4_channel_attrs *fchan)
593{ 618{
594 int status = 0; 619 u32 maxrpc = nfsd_serv->sv_max_mesg;
595 __u32 maxcount = nfsd_serv->sv_max_mesg;
596 620
597 /* headerpadsz set to zero in encode routine */ 621 new->maxreqs = numslots;
622 new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ;
623 new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc);
624 new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc);
625 new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND);
626}
598 627
599 /* Use the client's max request and max response size if possible */ 628static void free_conn(struct nfsd4_conn *c)
600 if (fchan->maxreq_sz > maxcount) 629{
601 fchan->maxreq_sz = maxcount; 630 svc_xprt_put(c->cn_xprt);
602 session_fchan->maxreq_sz = fchan->maxreq_sz; 631 kfree(c);
632}
603 633
604 if (fchan->maxresp_sz > maxcount) 634static void nfsd4_conn_lost(struct svc_xpt_user *u)
605 fchan->maxresp_sz = maxcount; 635{
606 session_fchan->maxresp_sz = fchan->maxresp_sz; 636 struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
637 struct nfs4_client *clp = c->cn_session->se_client;
607 638
608 /* Use the client's maxops if possible */ 639 spin_lock(&clp->cl_lock);
609 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) 640 if (!list_empty(&c->cn_persession)) {
610 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; 641 list_del(&c->cn_persession);
611 session_fchan->maxops = fchan->maxops; 642 free_conn(c);
643 }
644 spin_unlock(&clp->cl_lock);
645}
612 646
613 /* FIXME: Error means no more DRC pages so the server should 647static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
614 * recover pages from existing sessions. For now fail session 648{
615 * creation. 649 struct nfsd4_conn *conn;
616 */
617 status = set_forechannel_drc_size(fchan);
618 650
619 session_fchan->maxresp_cached = fchan->maxresp_cached; 651 conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL);
620 session_fchan->maxreqs = fchan->maxreqs; 652 if (!conn)
653 return NULL;
654 svc_xprt_get(rqstp->rq_xprt);
655 conn->cn_xprt = rqstp->rq_xprt;
656 conn->cn_flags = flags;
657 INIT_LIST_HEAD(&conn->cn_xpt_user.list);
658 return conn;
659}
621 660
622 dprintk("%s status %d\n", __func__, status); 661static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
623 return status; 662{
663 conn->cn_session = ses;
664 list_add(&conn->cn_persession, &ses->se_conns);
624} 665}
625 666
626static void 667static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
627free_session_slots(struct nfsd4_session *ses)
628{ 668{
629 int i; 669 struct nfs4_client *clp = ses->se_client;
630 670
631 for (i = 0; i < ses->se_fchannel.maxreqs; i++) 671 spin_lock(&clp->cl_lock);
632 kfree(ses->se_slots[i]); 672 __nfsd4_hash_conn(conn, ses);
673 spin_unlock(&clp->cl_lock);
633} 674}
634 675
635/* 676static void nfsd4_register_conn(struct nfsd4_conn *conn)
636 * We don't actually need to cache the rpc and session headers, so we
637 * can allocate a little less for each slot:
638 */
639static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
640{ 677{
641 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; 678 conn->cn_xpt_user.callback = nfsd4_conn_lost;
679 register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
642} 680}
643 681
644static int 682static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
645alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
646 struct nfsd4_create_session *cses)
647{ 683{
648 struct nfsd4_session *new, tmp; 684 struct nfsd4_conn *conn;
649 struct nfsd4_slot *sp; 685 u32 flags = NFS4_CDFC4_FORE;
650 int idx, slotsize, cachesize, i;
651 int status;
652 686
653 memset(&tmp, 0, sizeof(tmp)); 687 if (ses->se_flags & SESSION4_BACK_CHAN)
688 flags |= NFS4_CDFC4_BACK;
689 conn = alloc_conn(rqstp, flags);
690 if (!conn)
691 return nfserr_jukebox;
692 nfsd4_hash_conn(conn, ses);
693 nfsd4_register_conn(conn);
694 return nfs_ok;
695}
654 696
655 /* FIXME: For now, we just accept the client back channel attributes. */ 697static void nfsd4_del_conns(struct nfsd4_session *s)
656 tmp.se_bchannel = cses->back_channel; 698{
657 status = init_forechannel_attrs(rqstp, &tmp.se_fchannel, 699 struct nfs4_client *clp = s->se_client;
658 &cses->fore_channel); 700 struct nfsd4_conn *c;
659 if (status)
660 goto out;
661 701
662 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) 702 spin_lock(&clp->cl_lock);
663 + sizeof(struct nfsd4_session) > PAGE_SIZE); 703 while (!list_empty(&s->se_conns)) {
704 c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession);
705 list_del_init(&c->cn_persession);
706 spin_unlock(&clp->cl_lock);
664 707
665 status = nfserr_jukebox; 708 unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user);
666 /* allocate struct nfsd4_session and slot table pointers in one piece */ 709 free_conn(c);
667 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
668 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
669 if (!new)
670 goto out;
671 710
672 memcpy(new, &tmp, sizeof(*new)); 711 spin_lock(&clp->cl_lock);
712 }
713 spin_unlock(&clp->cl_lock);
714}
673 715
674 /* allocate each struct nfsd4_slot and data cache in one piece */ 716void free_session(struct kref *kref)
675 cachesize = slot_bytes(&new->se_fchannel); 717{
676 for (i = 0; i < new->se_fchannel.maxreqs; i++) { 718 struct nfsd4_session *ses;
677 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); 719 int mem;
678 if (!sp) 720
679 goto out_free; 721 ses = container_of(kref, struct nfsd4_session, se_ref);
680 new->se_slots[i] = sp; 722 nfsd4_del_conns(ses);
723 spin_lock(&nfsd_drc_lock);
724 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
725 nfsd_drc_mem_used -= mem;
726 spin_unlock(&nfsd_drc_lock);
727 free_session_slots(ses);
728 kfree(ses);
729}
730
731static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses)
732{
733 struct nfsd4_session *new;
734 struct nfsd4_channel_attrs *fchan = &cses->fore_channel;
735 int numslots, slotsize;
736 int status;
737 int idx;
738
739 /*
740 * Note decreasing slot size below client's request may
741 * make it difficult for client to function correctly, whereas
742 * decreasing the number of slots will (just?) affect
743 * performance. When short on memory we therefore prefer to
744 * decrease number of slots instead of their size.
745 */
746 slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached);
747 numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs);
748
749 new = alloc_session(slotsize, numslots);
750 if (!new) {
751 nfsd4_put_drc_mem(slotsize, fchan->maxreqs);
752 return NULL;
681 } 753 }
754 init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize);
682 755
683 new->se_client = clp; 756 new->se_client = clp;
684 gen_sessionid(new); 757 gen_sessionid(new);
685 idx = hash_sessionid(&new->se_sessionid);
686 memcpy(clp->cl_sessionid.data, new->se_sessionid.data,
687 NFS4_MAX_SESSIONID_LEN);
688 758
759 INIT_LIST_HEAD(&new->se_conns);
760
761 new->se_cb_seq_nr = 1;
689 new->se_flags = cses->flags; 762 new->se_flags = cses->flags;
763 new->se_cb_prog = cses->callback_prog;
690 kref_init(&new->se_ref); 764 kref_init(&new->se_ref);
765 idx = hash_sessionid(&new->se_sessionid);
691 spin_lock(&client_lock); 766 spin_lock(&client_lock);
692 list_add(&new->se_hash, &sessionid_hashtbl[idx]); 767 list_add(&new->se_hash, &sessionid_hashtbl[idx]);
693 list_add(&new->se_perclnt, &clp->cl_sessions); 768 list_add(&new->se_perclnt, &clp->cl_sessions);
694 spin_unlock(&client_lock); 769 spin_unlock(&client_lock);
695 770
696 status = nfs_ok; 771 status = nfsd4_new_conn(rqstp, new);
697out: 772 /* whoops: benny points out, status is ignored! (err, or bogus) */
698 return status; 773 if (status) {
699out_free: 774 free_session(&new->se_ref);
700 free_session_slots(new); 775 return NULL;
701 kfree(new); 776 }
702 goto out; 777 if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) {
778 struct sockaddr *sa = svc_addr(rqstp);
779
780 clp->cl_cb_session = new;
781 clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
782 svc_xprt_get(rqstp->rq_xprt);
783 rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
784 clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
785 nfsd4_probe_callback(clp);
786 }
787 return new;
703} 788}
704 789
705/* caller must hold client_lock */ 790/* caller must hold client_lock */
@@ -731,21 +816,6 @@ unhash_session(struct nfsd4_session *ses)
731 list_del(&ses->se_perclnt); 816 list_del(&ses->se_perclnt);
732} 817}
733 818
734void
735free_session(struct kref *kref)
736{
737 struct nfsd4_session *ses;
738 int mem;
739
740 ses = container_of(kref, struct nfsd4_session, se_ref);
741 spin_lock(&nfsd_drc_lock);
742 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
743 nfsd_drc_mem_used -= mem;
744 spin_unlock(&nfsd_drc_lock);
745 free_session_slots(ses);
746 kfree(ses);
747}
748
749/* must be called under the client_lock */ 819/* must be called under the client_lock */
750static inline void 820static inline void
751renew_client_locked(struct nfs4_client *clp) 821renew_client_locked(struct nfs4_client *clp)
@@ -812,6 +882,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
812static inline void 882static inline void
813free_client(struct nfs4_client *clp) 883free_client(struct nfs4_client *clp)
814{ 884{
885 while (!list_empty(&clp->cl_sessions)) {
886 struct nfsd4_session *ses;
887 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
888 se_perclnt);
889 list_del(&ses->se_perclnt);
890 nfsd4_put_session(ses);
891 }
815 if (clp->cl_cred.cr_group_info) 892 if (clp->cl_cred.cr_group_info)
816 put_group_info(clp->cl_cred.cr_group_info); 893 put_group_info(clp->cl_cred.cr_group_info);
817 kfree(clp->cl_principal); 894 kfree(clp->cl_principal);
@@ -838,15 +915,12 @@ release_session_client(struct nfsd4_session *session)
838static inline void 915static inline void
839unhash_client_locked(struct nfs4_client *clp) 916unhash_client_locked(struct nfs4_client *clp)
840{ 917{
918 struct nfsd4_session *ses;
919
841 mark_client_expired(clp); 920 mark_client_expired(clp);
842 list_del(&clp->cl_lru); 921 list_del(&clp->cl_lru);
843 while (!list_empty(&clp->cl_sessions)) { 922 list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
844 struct nfsd4_session *ses; 923 list_del_init(&ses->se_hash);
845 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
846 se_perclnt);
847 unhash_session(ses);
848 nfsd4_put_session(ses);
849 }
850} 924}
851 925
852static void 926static void
@@ -875,7 +949,7 @@ expire_client(struct nfs4_client *clp)
875 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); 949 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
876 release_openowner(sop); 950 release_openowner(sop);
877 } 951 }
878 nfsd4_set_callback_client(clp, NULL); 952 nfsd4_shutdown_callback(clp);
879 if (clp->cl_cb_conn.cb_xprt) 953 if (clp->cl_cb_conn.cb_xprt)
880 svc_xprt_put(clp->cl_cb_conn.cb_xprt); 954 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
881 list_del(&clp->cl_idhash); 955 list_del(&clp->cl_idhash);
@@ -960,6 +1034,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
960 if (clp == NULL) 1034 if (clp == NULL)
961 return NULL; 1035 return NULL;
962 1036
1037 INIT_LIST_HEAD(&clp->cl_sessions);
1038
963 princ = svc_gss_principal(rqstp); 1039 princ = svc_gss_principal(rqstp);
964 if (princ) { 1040 if (princ) {
965 clp->cl_principal = kstrdup(princ, GFP_KERNEL); 1041 clp->cl_principal = kstrdup(princ, GFP_KERNEL);
@@ -976,8 +1052,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
976 INIT_LIST_HEAD(&clp->cl_strhash); 1052 INIT_LIST_HEAD(&clp->cl_strhash);
977 INIT_LIST_HEAD(&clp->cl_openowners); 1053 INIT_LIST_HEAD(&clp->cl_openowners);
978 INIT_LIST_HEAD(&clp->cl_delegations); 1054 INIT_LIST_HEAD(&clp->cl_delegations);
979 INIT_LIST_HEAD(&clp->cl_sessions);
980 INIT_LIST_HEAD(&clp->cl_lru); 1055 INIT_LIST_HEAD(&clp->cl_lru);
1056 spin_lock_init(&clp->cl_lock);
1057 INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc);
981 clp->cl_time = get_seconds(); 1058 clp->cl_time = get_seconds();
982 clear_bit(0, &clp->cl_cb_slot_busy); 1059 clear_bit(0, &clp->cl_cb_slot_busy);
983 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 1060 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
@@ -986,7 +1063,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
986 clp->cl_flavor = rqstp->rq_flavor; 1063 clp->cl_flavor = rqstp->rq_flavor;
987 copy_cred(&clp->cl_cred, &rqstp->rq_cred); 1064 copy_cred(&clp->cl_cred, &rqstp->rq_cred);
988 gen_confirm(clp); 1065 gen_confirm(clp);
989 1066 clp->cl_cb_session = NULL;
990 return clp; 1067 return clp;
991} 1068}
992 1069
@@ -1098,7 +1175,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
1098static void 1175static void
1099gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) 1176gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
1100{ 1177{
1101 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 1178 struct nfs4_cb_conn *conn = &clp->cl_cb_conn;
1102 unsigned short expected_family; 1179 unsigned short expected_family;
1103 1180
1104 /* Currently, we only support tcp and tcp6 for the callback channel */ 1181 /* Currently, we only support tcp and tcp6 for the callback channel */
@@ -1111,24 +1188,23 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
1111 else 1188 else
1112 goto out_err; 1189 goto out_err;
1113 1190
1114 cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, 1191 conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
1115 se->se_callback_addr_len, 1192 se->se_callback_addr_len,
1116 (struct sockaddr *) &cb->cb_addr, 1193 (struct sockaddr *)&conn->cb_addr,
1117 sizeof(cb->cb_addr)); 1194 sizeof(conn->cb_addr));
1118 1195
1119 if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) 1196 if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family)
1120 goto out_err; 1197 goto out_err;
1121 1198
1122 if (cb->cb_addr.ss_family == AF_INET6) 1199 if (conn->cb_addr.ss_family == AF_INET6)
1123 ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; 1200 ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid;
1124 1201
1125 cb->cb_minorversion = 0; 1202 conn->cb_prog = se->se_callback_prog;
1126 cb->cb_prog = se->se_callback_prog; 1203 conn->cb_ident = se->se_callback_ident;
1127 cb->cb_ident = se->se_callback_ident;
1128 return; 1204 return;
1129out_err: 1205out_err:
1130 cb->cb_addr.ss_family = AF_UNSPEC; 1206 conn->cb_addr.ss_family = AF_UNSPEC;
1131 cb->cb_addrlen = 0; 1207 conn->cb_addrlen = 0;
1132 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " 1208 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
1133 "will not receive delegations\n", 1209 "will not receive delegations\n",
1134 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 1210 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -1415,7 +1491,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1415{ 1491{
1416 struct sockaddr *sa = svc_addr(rqstp); 1492 struct sockaddr *sa = svc_addr(rqstp);
1417 struct nfs4_client *conf, *unconf; 1493 struct nfs4_client *conf, *unconf;
1494 struct nfsd4_session *new;
1418 struct nfsd4_clid_slot *cs_slot = NULL; 1495 struct nfsd4_clid_slot *cs_slot = NULL;
1496 bool confirm_me = false;
1419 int status = 0; 1497 int status = 0;
1420 1498
1421 nfs4_lock_state(); 1499 nfs4_lock_state();
@@ -1438,7 +1516,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1438 cs_slot->sl_seqid, cr_ses->seqid); 1516 cs_slot->sl_seqid, cr_ses->seqid);
1439 goto out; 1517 goto out;
1440 } 1518 }
1441 cs_slot->sl_seqid++;
1442 } else if (unconf) { 1519 } else if (unconf) {
1443 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || 1520 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
1444 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { 1521 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
@@ -1451,25 +1528,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1451 if (status) { 1528 if (status) {
1452 /* an unconfirmed replay returns misordered */ 1529 /* an unconfirmed replay returns misordered */
1453 status = nfserr_seq_misordered; 1530 status = nfserr_seq_misordered;
1454 goto out_cache; 1531 goto out;
1455 } 1532 }
1456 1533
1457 cs_slot->sl_seqid++; /* from 0 to 1 */ 1534 confirm_me = true;
1458 move_to_confirmed(unconf);
1459
1460 if (cr_ses->flags & SESSION4_BACK_CHAN) {
1461 unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
1462 svc_xprt_get(rqstp->rq_xprt);
1463 rpc_copy_addr(
1464 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
1465 sa);
1466 unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
1467 unconf->cl_cb_conn.cb_minorversion =
1468 cstate->minorversion;
1469 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
1470 unconf->cl_cb_seq_nr = 1;
1471 nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
1472 }
1473 conf = unconf; 1535 conf = unconf;
1474 } else { 1536 } else {
1475 status = nfserr_stale_clientid; 1537 status = nfserr_stale_clientid;
@@ -1477,22 +1539,30 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1477 } 1539 }
1478 1540
1479 /* 1541 /*
1542 * XXX: we should probably set this at creation time, and check
1543 * for consistent minorversion use throughout:
1544 */
1545 conf->cl_minorversion = 1;
1546 /*
1480 * We do not support RDMA or persistent sessions 1547 * We do not support RDMA or persistent sessions
1481 */ 1548 */
1482 cr_ses->flags &= ~SESSION4_PERSIST; 1549 cr_ses->flags &= ~SESSION4_PERSIST;
1483 cr_ses->flags &= ~SESSION4_RDMA; 1550 cr_ses->flags &= ~SESSION4_RDMA;
1484 1551
1485 status = alloc_init_session(rqstp, conf, cr_ses); 1552 status = nfserr_jukebox;
1486 if (status) 1553 new = alloc_init_session(rqstp, conf, cr_ses);
1554 if (!new)
1487 goto out; 1555 goto out;
1488 1556 status = nfs_ok;
1489 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, 1557 memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
1490 NFS4_MAX_SESSIONID_LEN); 1558 NFS4_MAX_SESSIONID_LEN);
1559 cs_slot->sl_seqid++;
1491 cr_ses->seqid = cs_slot->sl_seqid; 1560 cr_ses->seqid = cs_slot->sl_seqid;
1492 1561
1493out_cache:
1494 /* cache solo and embedded create sessions under the state lock */ 1562 /* cache solo and embedded create sessions under the state lock */
1495 nfsd4_cache_create_session(cr_ses, cs_slot, status); 1563 nfsd4_cache_create_session(cr_ses, cs_slot, status);
1564 if (confirm_me)
1565 move_to_confirmed(conf);
1496out: 1566out:
1497 nfs4_unlock_state(); 1567 nfs4_unlock_state();
1498 dprintk("%s returns %d\n", __func__, ntohl(status)); 1568 dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1546,8 +1616,11 @@ nfsd4_destroy_session(struct svc_rqst *r,
1546 1616
1547 nfs4_lock_state(); 1617 nfs4_lock_state();
1548 /* wait for callbacks */ 1618 /* wait for callbacks */
1549 nfsd4_set_callback_client(ses->se_client, NULL); 1619 nfsd4_shutdown_callback(ses->se_client);
1550 nfs4_unlock_state(); 1620 nfs4_unlock_state();
1621
1622 nfsd4_del_conns(ses);
1623
1551 nfsd4_put_session(ses); 1624 nfsd4_put_session(ses);
1552 status = nfs_ok; 1625 status = nfs_ok;
1553out: 1626out:
@@ -1555,6 +1628,36 @@ out:
1555 return status; 1628 return status;
1556} 1629}
1557 1630
1631static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
1632{
1633 struct nfsd4_conn *c;
1634
1635 list_for_each_entry(c, &s->se_conns, cn_persession) {
1636 if (c->cn_xprt == xpt) {
1637 return c;
1638 }
1639 }
1640 return NULL;
1641}
1642
1643static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
1644{
1645 struct nfs4_client *clp = ses->se_client;
1646 struct nfsd4_conn *c;
1647
1648 spin_lock(&clp->cl_lock);
1649 c = __nfsd4_find_conn(new->cn_xprt, ses);
1650 if (c) {
1651 spin_unlock(&clp->cl_lock);
1652 free_conn(new);
1653 return;
1654 }
1655 __nfsd4_hash_conn(new, ses);
1656 spin_unlock(&clp->cl_lock);
1657 nfsd4_register_conn(new);
1658 return;
1659}
1660
1558__be32 1661__be32
1559nfsd4_sequence(struct svc_rqst *rqstp, 1662nfsd4_sequence(struct svc_rqst *rqstp,
1560 struct nfsd4_compound_state *cstate, 1663 struct nfsd4_compound_state *cstate,
@@ -1563,11 +1666,20 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1563 struct nfsd4_compoundres *resp = rqstp->rq_resp; 1666 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1564 struct nfsd4_session *session; 1667 struct nfsd4_session *session;
1565 struct nfsd4_slot *slot; 1668 struct nfsd4_slot *slot;
1669 struct nfsd4_conn *conn;
1566 int status; 1670 int status;
1567 1671
1568 if (resp->opcnt != 1) 1672 if (resp->opcnt != 1)
1569 return nfserr_sequence_pos; 1673 return nfserr_sequence_pos;
1570 1674
1675 /*
1676 * Will be either used or freed by nfsd4_sequence_check_conn
1677 * below.
1678 */
1679 conn = alloc_conn(rqstp, NFS4_CDFC4_FORE);
1680 if (!conn)
1681 return nfserr_jukebox;
1682
1571 spin_lock(&client_lock); 1683 spin_lock(&client_lock);
1572 status = nfserr_badsession; 1684 status = nfserr_badsession;
1573 session = find_in_sessionid_hashtbl(&seq->sessionid); 1685 session = find_in_sessionid_hashtbl(&seq->sessionid);
@@ -1599,6 +1711,9 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1599 if (status) 1711 if (status)
1600 goto out; 1712 goto out;
1601 1713
1714 nfsd4_sequence_check_conn(conn, session);
1715 conn = NULL;
1716
1602 /* Success! bump slot seqid */ 1717 /* Success! bump slot seqid */
1603 slot->sl_inuse = true; 1718 slot->sl_inuse = true;
1604 slot->sl_seqid = seq->seqid; 1719 slot->sl_seqid = seq->seqid;
@@ -1613,6 +1728,7 @@ out:
1613 nfsd4_get_session(cstate->session); 1728 nfsd4_get_session(cstate->session);
1614 atomic_inc(&session->se_client->cl_refcount); 1729 atomic_inc(&session->se_client->cl_refcount);
1615 } 1730 }
1731 kfree(conn);
1616 spin_unlock(&client_lock); 1732 spin_unlock(&client_lock);
1617 dprintk("%s: return %d\n", __func__, ntohl(status)); 1733 dprintk("%s: return %d\n", __func__, ntohl(status));
1618 return status; 1734 return status;
@@ -1747,6 +1863,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1747 goto out; 1863 goto out;
1748 gen_clid(new); 1864 gen_clid(new);
1749 } 1865 }
1866 /*
1867 * XXX: we should probably set this at creation time, and check
1868 * for consistent minorversion use throughout:
1869 */
1870 new->cl_minorversion = 0;
1750 gen_callback(new, setclid, rpc_get_scope_id(sa)); 1871 gen_callback(new, setclid, rpc_get_scope_id(sa));
1751 add_to_unconfirmed(new, strhashval); 1872 add_to_unconfirmed(new, strhashval);
1752 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 1873 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
@@ -1807,7 +1928,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1807 status = nfserr_clid_inuse; 1928 status = nfserr_clid_inuse;
1808 else { 1929 else {
1809 atomic_set(&conf->cl_cb_set, 0); 1930 atomic_set(&conf->cl_cb_set, 0);
1810 nfsd4_probe_callback(conf, &unconf->cl_cb_conn); 1931 nfsd4_change_callback(conf, &unconf->cl_cb_conn);
1932 nfsd4_probe_callback(conf);
1811 expire_client(unconf); 1933 expire_client(unconf);
1812 status = nfs_ok; 1934 status = nfs_ok;
1813 1935
@@ -1841,7 +1963,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1841 } 1963 }
1842 move_to_confirmed(unconf); 1964 move_to_confirmed(unconf);
1843 conf = unconf; 1965 conf = unconf;
1844 nfsd4_probe_callback(conf, &conf->cl_cb_conn); 1966 nfsd4_probe_callback(conf);
1845 status = nfs_ok; 1967 status = nfs_ok;
1846 } 1968 }
1847 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) 1969 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
@@ -2944,7 +3066,11 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2944 if (STALE_STATEID(stateid)) 3066 if (STALE_STATEID(stateid))
2945 goto out; 3067 goto out;
2946 3068
2947 status = nfserr_bad_stateid; 3069 /*
3070 * We assume that any stateid that has the current boot time,
3071 * but that we can't find, is expired:
3072 */
3073 status = nfserr_expired;
2948 if (is_delegation_stateid(stateid)) { 3074 if (is_delegation_stateid(stateid)) {
2949 dp = find_delegation_stateid(ino, stateid); 3075 dp = find_delegation_stateid(ino, stateid);
2950 if (!dp) 3076 if (!dp)
@@ -2964,6 +3090,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2964 stp = find_stateid(stateid, flags); 3090 stp = find_stateid(stateid, flags);
2965 if (!stp) 3091 if (!stp)
2966 goto out; 3092 goto out;
3093 status = nfserr_bad_stateid;
2967 if (nfs4_check_fh(current_fh, stp)) 3094 if (nfs4_check_fh(current_fh, stp))
2968 goto out; 3095 goto out;
2969 if (!stp->st_stateowner->so_confirmed) 3096 if (!stp->st_stateowner->so_confirmed)
@@ -3038,8 +3165,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
3038 * a replayed close: 3165 * a replayed close:
3039 */ 3166 */
3040 sop = search_close_lru(stateid->si_stateownerid, flags); 3167 sop = search_close_lru(stateid->si_stateownerid, flags);
3168 /* It's not stale; let's assume it's expired: */
3041 if (sop == NULL) 3169 if (sop == NULL)
3042 return nfserr_bad_stateid; 3170 return nfserr_expired;
3043 *sopp = sop; 3171 *sopp = sop;
3044 goto check_replay; 3172 goto check_replay;
3045 } 3173 }
@@ -3304,6 +3432,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3304 status = nfserr_bad_stateid; 3432 status = nfserr_bad_stateid;
3305 if (!is_delegation_stateid(stateid)) 3433 if (!is_delegation_stateid(stateid))
3306 goto out; 3434 goto out;
3435 status = nfserr_expired;
3307 dp = find_delegation_stateid(inode, stateid); 3436 dp = find_delegation_stateid(inode, stateid);
3308 if (!dp) 3437 if (!dp)
3309 goto out; 3438 goto out;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1a468bbd330f..f35a94a04026 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1805,19 +1805,23 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1805 goto out_nfserr; 1805 goto out_nfserr;
1806 } 1806 }
1807 } 1807 }
1808 if ((buflen -= 16) < 0)
1809 goto out_resource;
1810 1808
1811 if (unlikely(bmval2)) { 1809 if (bmval2) {
1810 if ((buflen -= 16) < 0)
1811 goto out_resource;
1812 WRITE32(3); 1812 WRITE32(3);
1813 WRITE32(bmval0); 1813 WRITE32(bmval0);
1814 WRITE32(bmval1); 1814 WRITE32(bmval1);
1815 WRITE32(bmval2); 1815 WRITE32(bmval2);
1816 } else if (likely(bmval1)) { 1816 } else if (bmval1) {
1817 if ((buflen -= 12) < 0)
1818 goto out_resource;
1817 WRITE32(2); 1819 WRITE32(2);
1818 WRITE32(bmval0); 1820 WRITE32(bmval0);
1819 WRITE32(bmval1); 1821 WRITE32(bmval1);
1820 } else { 1822 } else {
1823 if ((buflen -= 8) < 0)
1824 goto out_resource;
1821 WRITE32(1); 1825 WRITE32(1);
1822 WRITE32(bmval0); 1826 WRITE32(bmval0);
1823 } 1827 }
@@ -1828,15 +1832,17 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1828 u32 word1 = nfsd_suppattrs1(minorversion); 1832 u32 word1 = nfsd_suppattrs1(minorversion);
1829 u32 word2 = nfsd_suppattrs2(minorversion); 1833 u32 word2 = nfsd_suppattrs2(minorversion);
1830 1834
1831 if ((buflen -= 12) < 0)
1832 goto out_resource;
1833 if (!aclsupport) 1835 if (!aclsupport)
1834 word0 &= ~FATTR4_WORD0_ACL; 1836 word0 &= ~FATTR4_WORD0_ACL;
1835 if (!word2) { 1837 if (!word2) {
1838 if ((buflen -= 12) < 0)
1839 goto out_resource;
1836 WRITE32(2); 1840 WRITE32(2);
1837 WRITE32(word0); 1841 WRITE32(word0);
1838 WRITE32(word1); 1842 WRITE32(word1);
1839 } else { 1843 } else {
1844 if ((buflen -= 16) < 0)
1845 goto out_resource;
1840 WRITE32(3); 1846 WRITE32(3);
1841 WRITE32(word0); 1847 WRITE32(word0);
1842 WRITE32(word1); 1848 WRITE32(word1);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 06fa87e52e82..d6dc3f61f8ba 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -22,6 +22,7 @@
22 */ 22 */
23enum { 23enum {
24 NFSD_Root = 1, 24 NFSD_Root = 1,
25#ifdef CONFIG_NFSD_DEPRECATED
25 NFSD_Svc, 26 NFSD_Svc,
26 NFSD_Add, 27 NFSD_Add,
27 NFSD_Del, 28 NFSD_Del,
@@ -29,6 +30,7 @@ enum {
29 NFSD_Unexport, 30 NFSD_Unexport,
30 NFSD_Getfd, 31 NFSD_Getfd,
31 NFSD_Getfs, 32 NFSD_Getfs,
33#endif
32 NFSD_List, 34 NFSD_List,
33 NFSD_Export_features, 35 NFSD_Export_features,
34 NFSD_Fh, 36 NFSD_Fh,
@@ -54,6 +56,7 @@ enum {
54/* 56/*
55 * write() for these nodes. 57 * write() for these nodes.
56 */ 58 */
59#ifdef CONFIG_NFSD_DEPRECATED
57static ssize_t write_svc(struct file *file, char *buf, size_t size); 60static ssize_t write_svc(struct file *file, char *buf, size_t size);
58static ssize_t write_add(struct file *file, char *buf, size_t size); 61static ssize_t write_add(struct file *file, char *buf, size_t size);
59static ssize_t write_del(struct file *file, char *buf, size_t size); 62static ssize_t write_del(struct file *file, char *buf, size_t size);
@@ -61,6 +64,7 @@ static ssize_t write_export(struct file *file, char *buf, size_t size);
61static ssize_t write_unexport(struct file *file, char *buf, size_t size); 64static ssize_t write_unexport(struct file *file, char *buf, size_t size);
62static ssize_t write_getfd(struct file *file, char *buf, size_t size); 65static ssize_t write_getfd(struct file *file, char *buf, size_t size);
63static ssize_t write_getfs(struct file *file, char *buf, size_t size); 66static ssize_t write_getfs(struct file *file, char *buf, size_t size);
67#endif
64static ssize_t write_filehandle(struct file *file, char *buf, size_t size); 68static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
65static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); 69static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size);
66static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); 70static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size);
@@ -76,6 +80,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
76#endif 80#endif
77 81
78static ssize_t (*write_op[])(struct file *, char *, size_t) = { 82static ssize_t (*write_op[])(struct file *, char *, size_t) = {
83#ifdef CONFIG_NFSD_DEPRECATED
79 [NFSD_Svc] = write_svc, 84 [NFSD_Svc] = write_svc,
80 [NFSD_Add] = write_add, 85 [NFSD_Add] = write_add,
81 [NFSD_Del] = write_del, 86 [NFSD_Del] = write_del,
@@ -83,6 +88,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
83 [NFSD_Unexport] = write_unexport, 88 [NFSD_Unexport] = write_unexport,
84 [NFSD_Getfd] = write_getfd, 89 [NFSD_Getfd] = write_getfd,
85 [NFSD_Getfs] = write_getfs, 90 [NFSD_Getfs] = write_getfs,
91#endif
86 [NFSD_Fh] = write_filehandle, 92 [NFSD_Fh] = write_filehandle,
87 [NFSD_FO_UnlockIP] = write_unlock_ip, 93 [NFSD_FO_UnlockIP] = write_unlock_ip,
88 [NFSD_FO_UnlockFS] = write_unlock_fs, 94 [NFSD_FO_UnlockFS] = write_unlock_fs,
@@ -121,6 +127,14 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
121 127
122static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) 128static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
123{ 129{
130 static int warned;
131 if (file->f_dentry->d_name.name[0] == '.' && !warned) {
132 printk(KERN_INFO
133 "Warning: \"%s\" uses deprecated NFSD interface: %s."
134 " This will be removed in 2.6.40\n",
135 current->comm, file->f_dentry->d_name.name);
136 warned = 1;
137 }
124 if (! file->private_data) { 138 if (! file->private_data) {
125 /* An attempt to read a transaction file without writing 139 /* An attempt to read a transaction file without writing
126 * causes a 0-byte write so that the file can return 140 * causes a 0-byte write so that the file can return
@@ -187,6 +201,7 @@ static const struct file_operations pool_stats_operations = {
187 * payload - write methods 201 * payload - write methods
188 */ 202 */
189 203
204#ifdef CONFIG_NFSD_DEPRECATED
190/** 205/**
191 * write_svc - Start kernel's NFSD server 206 * write_svc - Start kernel's NFSD server
192 * 207 *
@@ -402,7 +417,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
402 417
403 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); 418 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
404 419
405 clp = auth_unix_lookup(&in6); 420 clp = auth_unix_lookup(&init_net, &in6);
406 if (!clp) 421 if (!clp)
407 err = -EPERM; 422 err = -EPERM;
408 else { 423 else {
@@ -465,7 +480,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
465 480
466 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); 481 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
467 482
468 clp = auth_unix_lookup(&in6); 483 clp = auth_unix_lookup(&init_net, &in6);
469 if (!clp) 484 if (!clp)
470 err = -EPERM; 485 err = -EPERM;
471 else { 486 else {
@@ -482,6 +497,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
482 out: 497 out:
483 return err; 498 return err;
484} 499}
500#endif /* CONFIG_NFSD_DEPRECATED */
485 501
486/** 502/**
487 * write_unlock_ip - Release all locks used by a client 503 * write_unlock_ip - Release all locks used by a client
@@ -1000,12 +1016,12 @@ static ssize_t __write_ports_addxprt(char *buf)
1000 if (err != 0) 1016 if (err != 0)
1001 return err; 1017 return err;
1002 1018
1003 err = svc_create_xprt(nfsd_serv, transport, 1019 err = svc_create_xprt(nfsd_serv, transport, &init_net,
1004 PF_INET, port, SVC_SOCK_ANONYMOUS); 1020 PF_INET, port, SVC_SOCK_ANONYMOUS);
1005 if (err < 0) 1021 if (err < 0)
1006 goto out_err; 1022 goto out_err;
1007 1023
1008 err = svc_create_xprt(nfsd_serv, transport, 1024 err = svc_create_xprt(nfsd_serv, transport, &init_net,
1009 PF_INET6, port, SVC_SOCK_ANONYMOUS); 1025 PF_INET6, port, SVC_SOCK_ANONYMOUS);
1010 if (err < 0 && err != -EAFNOSUPPORT) 1026 if (err < 0 && err != -EAFNOSUPPORT)
1011 goto out_close; 1027 goto out_close;
@@ -1356,6 +1372,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
1356static int nfsd_fill_super(struct super_block * sb, void * data, int silent) 1372static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1357{ 1373{
1358 static struct tree_descr nfsd_files[] = { 1374 static struct tree_descr nfsd_files[] = {
1375#ifdef CONFIG_NFSD_DEPRECATED
1359 [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, 1376 [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR},
1360 [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, 1377 [NFSD_Add] = {".add", &transaction_ops, S_IWUSR},
1361 [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, 1378 [NFSD_Del] = {".del", &transaction_ops, S_IWUSR},
@@ -1363,6 +1380,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1363 [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, 1380 [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR},
1364 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, 1381 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
1365 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, 1382 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
1383#endif
1366 [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, 1384 [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
1367 [NFSD_Export_features] = {"export_features", 1385 [NFSD_Export_features] = {"export_features",
1368 &export_features_operations, S_IRUGO}, 1386 &export_features_operations, S_IRUGO},
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index b76ac3a82e39..6b641cf2c19a 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -249,7 +249,7 @@ extern time_t nfsd4_grace;
249#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ 249#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
250#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ 250#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
251 251
252#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ 252#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
253 253
254/* 254/*
255 * The following attributes are currently not supported by the NFSv4 server: 255 * The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e2c43464f237..2bae1d86f5f2 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -16,6 +16,7 @@
16#include <linux/lockd/bind.h> 16#include <linux/lockd/bind.h>
17#include <linux/nfsacl.h> 17#include <linux/nfsacl.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <net/net_namespace.h>
19#include "nfsd.h" 20#include "nfsd.h"
20#include "cache.h" 21#include "cache.h"
21#include "vfs.h" 22#include "vfs.h"
@@ -186,12 +187,12 @@ static int nfsd_init_socks(int port)
186 if (!list_empty(&nfsd_serv->sv_permsocks)) 187 if (!list_empty(&nfsd_serv->sv_permsocks))
187 return 0; 188 return 0;
188 189
189 error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, 190 error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port,
190 SVC_SOCK_DEFAULTS); 191 SVC_SOCK_DEFAULTS);
191 if (error < 0) 192 if (error < 0)
192 return error; 193 return error;
193 194
194 error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, 195 error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port,
195 SVC_SOCK_DEFAULTS); 196 SVC_SOCK_DEFAULTS);
196 if (error < 0) 197 if (error < 0)
197 return error; 198 return error;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 322518c88e4b..39adc27b0685 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -35,6 +35,7 @@
35#ifndef _NFSD4_STATE_H 35#ifndef _NFSD4_STATE_H
36#define _NFSD4_STATE_H 36#define _NFSD4_STATE_H
37 37
38#include <linux/sunrpc/svc_xprt.h>
38#include <linux/nfsd/nfsfh.h> 39#include <linux/nfsd/nfsfh.h>
39#include "nfsfh.h" 40#include "nfsfh.h"
40 41
@@ -64,19 +65,12 @@ typedef struct {
64 (s)->si_fileid, \ 65 (s)->si_fileid, \
65 (s)->si_generation 66 (s)->si_generation
66 67
67struct nfsd4_cb_sequence {
68 /* args/res */
69 u32 cbs_minorversion;
70 struct nfs4_client *cbs_clp;
71};
72
73struct nfs4_rpc_args {
74 void *args_op;
75 struct nfsd4_cb_sequence args_seq;
76};
77
78struct nfsd4_callback { 68struct nfsd4_callback {
79 struct nfs4_rpc_args cb_args; 69 void *cb_op;
70 struct nfs4_client *cb_clp;
71 u32 cb_minorversion;
72 struct rpc_message cb_msg;
73 const struct rpc_call_ops *cb_ops;
80 struct work_struct cb_work; 74 struct work_struct cb_work;
81}; 75};
82 76
@@ -91,7 +85,6 @@ struct nfs4_delegation {
91 u32 dl_type; 85 u32 dl_type;
92 time_t dl_time; 86 time_t dl_time;
93/* For recall: */ 87/* For recall: */
94 u32 dl_ident;
95 stateid_t dl_stateid; 88 stateid_t dl_stateid;
96 struct knfsd_fh dl_fh; 89 struct knfsd_fh dl_fh;
97 int dl_retries; 90 int dl_retries;
@@ -103,8 +96,8 @@ struct nfs4_cb_conn {
103 /* SETCLIENTID info */ 96 /* SETCLIENTID info */
104 struct sockaddr_storage cb_addr; 97 struct sockaddr_storage cb_addr;
105 size_t cb_addrlen; 98 size_t cb_addrlen;
106 u32 cb_prog; 99 u32 cb_prog; /* used only in 4.0 case;
107 u32 cb_minorversion; 100 per-session otherwise */
108 u32 cb_ident; /* minorversion 0 only */ 101 u32 cb_ident; /* minorversion 0 only */
109 struct svc_xprt *cb_xprt; /* minorversion 1 only */ 102 struct svc_xprt *cb_xprt; /* minorversion 1 only */
110}; 103};
@@ -160,6 +153,15 @@ struct nfsd4_clid_slot {
160 struct nfsd4_create_session sl_cr_ses; 153 struct nfsd4_create_session sl_cr_ses;
161}; 154};
162 155
156struct nfsd4_conn {
157 struct list_head cn_persession;
158 struct svc_xprt *cn_xprt;
159 struct svc_xpt_user cn_xpt_user;
160 struct nfsd4_session *cn_session;
161/* CDFC4_FORE, CDFC4_BACK: */
162 unsigned char cn_flags;
163};
164
163struct nfsd4_session { 165struct nfsd4_session {
164 struct kref se_ref; 166 struct kref se_ref;
165 struct list_head se_hash; /* hash by sessionid */ 167 struct list_head se_hash; /* hash by sessionid */
@@ -169,6 +171,9 @@ struct nfsd4_session {
169 struct nfs4_sessionid se_sessionid; 171 struct nfs4_sessionid se_sessionid;
170 struct nfsd4_channel_attrs se_fchannel; 172 struct nfsd4_channel_attrs se_fchannel;
171 struct nfsd4_channel_attrs se_bchannel; 173 struct nfsd4_channel_attrs se_bchannel;
174 struct list_head se_conns;
175 u32 se_cb_prog;
176 u32 se_cb_seq_nr;
172 struct nfsd4_slot *se_slots[]; /* forward channel slots */ 177 struct nfsd4_slot *se_slots[]; /* forward channel slots */
173}; 178};
174 179
@@ -221,24 +226,32 @@ struct nfs4_client {
221 clientid_t cl_clientid; /* generated by server */ 226 clientid_t cl_clientid; /* generated by server */
222 nfs4_verifier cl_confirm; /* generated by server */ 227 nfs4_verifier cl_confirm; /* generated by server */
223 u32 cl_firststate; /* recovery dir creation */ 228 u32 cl_firststate; /* recovery dir creation */
229 u32 cl_minorversion;
224 230
225 /* for v4.0 and v4.1 callbacks: */ 231 /* for v4.0 and v4.1 callbacks: */
226 struct nfs4_cb_conn cl_cb_conn; 232 struct nfs4_cb_conn cl_cb_conn;
233#define NFSD4_CLIENT_CB_UPDATE 1
234#define NFSD4_CLIENT_KILL 2
235 unsigned long cl_cb_flags;
227 struct rpc_clnt *cl_cb_client; 236 struct rpc_clnt *cl_cb_client;
237 u32 cl_cb_ident;
228 atomic_t cl_cb_set; 238 atomic_t cl_cb_set;
239 struct nfsd4_callback cl_cb_null;
240 struct nfsd4_session *cl_cb_session;
241
242 /* for all client information that callback code might need: */
243 spinlock_t cl_lock;
229 244
230 /* for nfs41 */ 245 /* for nfs41 */
231 struct list_head cl_sessions; 246 struct list_head cl_sessions;
232 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ 247 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
233 u32 cl_exchange_flags; 248 u32 cl_exchange_flags;
234 struct nfs4_sessionid cl_sessionid;
235 /* number of rpc's in progress over an associated session: */ 249 /* number of rpc's in progress over an associated session: */
236 atomic_t cl_refcount; 250 atomic_t cl_refcount;
237 251
238 /* for nfs41 callbacks */ 252 /* for nfs41 callbacks */
239 /* We currently support a single back channel with a single slot */ 253 /* We currently support a single back channel with a single slot */
240 unsigned long cl_cb_slot_busy; 254 unsigned long cl_cb_slot_busy;
241 u32 cl_cb_seq_nr;
242 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ 255 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
243 /* wait here for slots */ 256 /* wait here for slots */
244}; 257};
@@ -440,12 +453,13 @@ extern int nfs4_in_grace(void);
440extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 453extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
441extern void nfs4_free_stateowner(struct kref *kref); 454extern void nfs4_free_stateowner(struct kref *kref);
442extern int set_callback_cred(void); 455extern int set_callback_cred(void);
443extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); 456extern void nfsd4_probe_callback(struct nfs4_client *clp);
457extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
444extern void nfsd4_do_callback_rpc(struct work_struct *); 458extern void nfsd4_do_callback_rpc(struct work_struct *);
445extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 459extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
446extern int nfsd4_create_callback_queue(void); 460extern int nfsd4_create_callback_queue(void);
447extern void nfsd4_destroy_callback_queue(void); 461extern void nfsd4_destroy_callback_queue(void);
448extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *); 462extern void nfsd4_shutdown_callback(struct nfs4_client *);
449extern void nfs4_put_delegation(struct nfs4_delegation *dp); 463extern void nfs4_put_delegation(struct nfs4_delegation *dp);
450extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); 464extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
451extern void nfsd4_init_recdir(char *recdir_name); 465extern void nfsd4_init_recdir(char *recdir_name);