aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Kconfig48
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/callback.c15
-rw-r--r--fs/nfs/callback_proc.c27
-rw-r--r--fs/nfs/client.c51
-rw-r--r--fs/nfs/delegation.c26
-rw-r--r--fs/nfs/delegation.h4
-rw-r--r--fs/nfs/dir.c1035
-rw-r--r--fs/nfs/direct.c29
-rw-r--r--fs/nfs/dns_resolve.c30
-rw-r--r--fs/nfs/dns_resolve.h12
-rw-r--r--fs/nfs/file.c141
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/idmap.c211
-rw-r--r--fs/nfs/inode.c124
-rw-r--r--fs/nfs/internal.h23
-rw-r--r--fs/nfs/mount_clnt.c4
-rw-r--r--fs/nfs/nfs2xdr.c114
-rw-r--r--fs/nfs/nfs3proc.c62
-rw-r--r--fs/nfs/nfs3xdr.c204
-rw-r--r--fs/nfs/nfs4_fs.h61
-rw-r--r--fs/nfs/nfs4filelayout.c280
-rw-r--r--fs/nfs/nfs4filelayout.h94
-rw-r--r--fs/nfs/nfs4filelayoutdev.c448
-rw-r--r--fs/nfs/nfs4proc.c950
-rw-r--r--fs/nfs/nfs4renewd.c4
-rw-r--r--fs/nfs/nfs4state.c132
-rw-r--r--fs/nfs/nfs4xdr.c807
-rw-r--r--fs/nfs/nfsroot.c568
-rw-r--r--fs/nfs/pagelist.c14
-rw-r--r--fs/nfs/pnfs.c783
-rw-r--r--fs/nfs/pnfs.h189
-rw-r--r--fs/nfs/proc.c35
-rw-r--r--fs/nfs/read.c7
-rw-r--r--fs/nfs/super.c95
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/unlink.c261
-rw-r--r--fs/nfs/write.c31
38 files changed, 5100 insertions, 1828 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index a43d07e7b924..ba306658a6db 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -61,9 +61,9 @@ config NFS_V3_ACL
61 If unsure, say N. 61 If unsure, say N.
62 62
63config NFS_V4 63config NFS_V4
64 bool "NFS client support for NFS version 4 (EXPERIMENTAL)" 64 bool "NFS client support for NFS version 4"
65 depends on NFS_FS && EXPERIMENTAL 65 depends on NFS_FS
66 select RPCSEC_GSS_KRB5 66 select SUNRPC_GSS
67 help 67 help
68 This option enables support for version 4 of the NFS protocol 68 This option enables support for version 4 of the NFS protocol
69 (RFC 3530) in the kernel's NFS client. 69 (RFC 3530) in the kernel's NFS client.
@@ -72,16 +72,20 @@ config NFS_V4
72 space programs which can be found in the Linux nfs-utils package, 72 space programs which can be found in the Linux nfs-utils package,
73 available from http://linux-nfs.org/. 73 available from http://linux-nfs.org/.
74 74
75 If unsure, say N. 75 If unsure, say Y.
76 76
77config NFS_V4_1 77config NFS_V4_1
78 bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)" 78 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
79 depends on NFS_V4 && EXPERIMENTAL 79 depends on NFS_FS && NFS_V4 && EXPERIMENTAL
80 select PNFS_FILE_LAYOUT
80 help 81 help
81 This option enables support for minor version 1 of the NFSv4 protocol 82 This option enables support for minor version 1 of the NFSv4 protocol
82 (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. 83 (RFC 5661) in the kernel's NFS client.
84
85 If unsure, say N.
83 86
84 Unless you're an NFS developer, say N. 87config PNFS_FILE_LAYOUT
88 tristate
85 89
86config ROOT_NFS 90config ROOT_NFS
87 bool "Root file system on NFS" 91 bool "Root file system on NFS"
@@ -100,3 +104,31 @@ config NFS_FSCACHE
100 help 104 help
101 Say Y here if you want NFS data to be cached locally on disc through 105 Say Y here if you want NFS data to be cached locally on disc through
102 the general filesystem cache manager 106 the general filesystem cache manager
107
108config NFS_USE_LEGACY_DNS
109 bool "Use the legacy NFS DNS resolver"
110 depends on NFS_V4
111 help
112 The kernel now provides a method for translating a host name into an
113 IP address. Select Y here if you would rather use your own DNS
114 resolver script.
115
116 If unsure, say N
117
118config NFS_USE_KERNEL_DNS
119 bool
120 depends on NFS_V4 && !NFS_USE_LEGACY_DNS
121 select DNS_RESOLVER
122 select KEYS
123 default y
124
125config NFS_USE_NEW_IDMAPPER
126 bool "Use the new idmapper upcall routine"
127 depends on NFS_V4 && KEYS
128 help
129 Say Y here if you want NFS to use the new idmapper upcall functions.
130 You will need /sbin/request-key (usually provided by the keyutils
131 package). For details, read
132 <file:Documentation/filesystems/nfs/idmapper.txt>.
133
134 If you are unsure, say N.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index da7fda639eac..4776ff9e3814 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
15 delegation.o idmap.o \ 15 delegation.o idmap.o \
16 callback.o callback_xdr.o callback_proc.o \ 16 callback.o callback_xdr.o callback_proc.o \
17 nfs4namespace.o 17 nfs4namespace.o
18nfs-$(CONFIG_NFS_V4_1) += pnfs.o
18nfs-$(CONFIG_SYSCTL) += sysctl.o 19nfs-$(CONFIG_SYSCTL) += sysctl.o
19nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o 20nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
21
22obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
23nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 36dfdae95123..aeec017fe814 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -45,7 +45,7 @@ unsigned short nfs_callback_tcpport;
45unsigned short nfs_callback_tcpport6; 45unsigned short nfs_callback_tcpport6;
46#define NFS_CALLBACK_MAXPORTNR (65535U) 46#define NFS_CALLBACK_MAXPORTNR (65535U)
47 47
48static int param_set_portnr(const char *val, struct kernel_param *kp) 48static int param_set_portnr(const char *val, const struct kernel_param *kp)
49{ 49{
50 unsigned long num; 50 unsigned long num;
51 int ret; 51 int ret;
@@ -58,11 +58,10 @@ static int param_set_portnr(const char *val, struct kernel_param *kp)
58 *((unsigned int *)kp->arg) = num; 58 *((unsigned int *)kp->arg) = num;
59 return 0; 59 return 0;
60} 60}
61 61static struct kernel_param_ops param_ops_portnr = {
62static int param_get_portnr(char *buffer, struct kernel_param *kp) 62 .set = param_set_portnr,
63{ 63 .get = param_get_uint,
64 return param_get_uint(buffer, kp); 64};
65}
66#define param_check_portnr(name, p) __param_check(name, p, unsigned int); 65#define param_check_portnr(name, p) __param_check(name, p, unsigned int);
67 66
68module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); 67module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
@@ -110,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv)
110{ 109{
111 int ret; 110 int ret;
112 111
113 ret = svc_create_xprt(serv, "tcp", PF_INET, 112 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
114 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
115 if (ret <= 0) 114 if (ret <= 0)
116 goto out_err; 115 goto out_err;
@@ -118,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv)
118 dprintk("NFS: Callback listener port = %u (af %u)\n", 117 dprintk("NFS: Callback listener port = %u (af %u)\n",
119 nfs_callback_tcpport, PF_INET); 118 nfs_callback_tcpport, PF_INET);
120 119
121 ret = svc_create_xprt(serv, "tcp", PF_INET6, 120 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
122 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
123 if (ret > 0) { 122 if (ret > 0) {
124 nfs_callback_tcpport6 = ret; 123 nfs_callback_tcpport6 = ret;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index a08770a7e857..2950fca0c61b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -37,8 +37,8 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
37 if (inode == NULL) 37 if (inode == NULL)
38 goto out_putclient; 38 goto out_putclient;
39 nfsi = NFS_I(inode); 39 nfsi = NFS_I(inode);
40 down_read(&nfsi->rwsem); 40 rcu_read_lock();
41 delegation = nfsi->delegation; 41 delegation = rcu_dereference(nfsi->delegation);
42 if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) 42 if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
43 goto out_iput; 43 goto out_iput;
44 res->size = i_size_read(inode); 44 res->size = i_size_read(inode);
@@ -53,7 +53,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
53 args->bitmap[1]; 53 args->bitmap[1];
54 res->status = 0; 54 res->status = 0;
55out_iput: 55out_iput:
56 up_read(&nfsi->rwsem); 56 rcu_read_unlock();
57 iput(inode); 57 iput(inode);
58out_putclient: 58out_putclient:
59 nfs_put_client(clp); 59 nfs_put_client(clp);
@@ -62,16 +62,6 @@ out:
62 return res->status; 62 return res->status;
63} 63}
64 64
65static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *)
66{
67#if defined(CONFIG_NFS_V4_1)
68 if (clp->cl_minorversion > 0)
69 return nfs41_validate_delegation_stateid;
70#endif
71 return nfs4_validate_delegation_stateid;
72}
73
74
75__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) 65__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
76{ 66{
77 struct nfs_client *clp; 67 struct nfs_client *clp;
@@ -92,8 +82,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
92 inode = nfs_delegation_find_inode(clp, &args->fh); 82 inode = nfs_delegation_find_inode(clp, &args->fh);
93 if (inode != NULL) { 83 if (inode != NULL) {
94 /* Set up a helper thread to actually return the delegation */ 84 /* Set up a helper thread to actually return the delegation */
95 switch (nfs_async_inode_return_delegation(inode, &args->stateid, 85 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
96 nfs_validate_delegation_stateid(clp))) {
97 case 0: 86 case 0:
98 res = 0; 87 res = 0;
99 break; 88 break;
@@ -129,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
129 if (delegation == NULL) 118 if (delegation == NULL)
130 return 0; 119 return 0;
131 120
132 /* seqid is 4-bytes long */ 121 if (stateid->stateid.seqid != 0)
133 if (((u32 *) &stateid->data)[0] != 0)
134 return 0; 122 return 0;
135 if (memcmp(&delegation->stateid.data[4], &stateid->data[4], 123 if (memcmp(&delegation->stateid.stateid.other,
136 sizeof(stateid->data)-4)) 124 &stateid->stateid.other,
125 NFS4_STATEID_OTHER_SIZE))
137 return 0; 126 return 0;
138 127
139 return 1; 128 return 1;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d25b5257b7a1..0870d0d4efc0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -48,6 +48,7 @@
48#include "iostat.h" 48#include "iostat.h"
49#include "internal.h" 49#include "internal.h"
50#include "fscache.h" 50#include "fscache.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_CLIENT 53#define NFSDBG_FACILITY NFSDBG_CLIENT
53 54
@@ -150,11 +151,14 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
150 clp->cl_boot_time = CURRENT_TIME; 151 clp->cl_boot_time = CURRENT_TIME;
151 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; 152 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
152 clp->cl_minorversion = cl_init->minorversion; 153 clp->cl_minorversion = cl_init->minorversion;
154 clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
153#endif 155#endif
154 cred = rpc_lookup_machine_cred(); 156 cred = rpc_lookup_machine_cred();
155 if (!IS_ERR(cred)) 157 if (!IS_ERR(cred))
156 clp->cl_machine_cred = cred; 158 clp->cl_machine_cred = cred;
157 159#if defined(CONFIG_NFS_V4_1)
160 INIT_LIST_HEAD(&clp->cl_layouts);
161#endif
158 nfs_fscache_get_client_cookie(clp); 162 nfs_fscache_get_client_cookie(clp);
159 163
160 return clp; 164 return clp;
@@ -178,7 +182,7 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp)
178 clp->cl_session = NULL; 182 clp->cl_session = NULL;
179 } 183 }
180 184
181 clp->cl_call_sync = _nfs4_call_sync; 185 clp->cl_mvops = nfs_v4_minor_ops[0];
182#endif /* CONFIG_NFS_V4_1 */ 186#endif /* CONFIG_NFS_V4_1 */
183} 187}
184 188
@@ -188,7 +192,7 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp)
188static void nfs4_destroy_callback(struct nfs_client *clp) 192static void nfs4_destroy_callback(struct nfs_client *clp)
189{ 193{
190 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) 194 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
191 nfs_callback_down(clp->cl_minorversion); 195 nfs_callback_down(clp->cl_mvops->minor_version);
192} 196}
193 197
194static void nfs4_shutdown_client(struct nfs_client *clp) 198static void nfs4_shutdown_client(struct nfs_client *clp)
@@ -251,6 +255,7 @@ void nfs_put_client(struct nfs_client *clp)
251 nfs_free_client(clp); 255 nfs_free_client(clp);
252 } 256 }
253} 257}
258EXPORT_SYMBOL_GPL(nfs_put_client);
254 259
255#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 260#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
256/* 261/*
@@ -274,7 +279,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
274 sin1->sin6_scope_id != sin2->sin6_scope_id) 279 sin1->sin6_scope_id != sin2->sin6_scope_id)
275 return 0; 280 return 0;
276 281
277 return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr); 282 return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
278} 283}
279#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ 284#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
280static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, 285static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
@@ -600,6 +605,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
600{ 605{
601 struct rpc_clnt *clnt = NULL; 606 struct rpc_clnt *clnt = NULL;
602 struct rpc_create_args args = { 607 struct rpc_create_args args = {
608 .net = &init_net,
603 .protocol = clp->cl_proto, 609 .protocol = clp->cl_proto,
604 .address = (struct sockaddr *)&clp->cl_addr, 610 .address = (struct sockaddr *)&clp->cl_addr,
605 .addrsize = clp->cl_addrlen, 611 .addrsize = clp->cl_addrlen,
@@ -634,7 +640,8 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
634 */ 640 */
635static void nfs_destroy_server(struct nfs_server *server) 641static void nfs_destroy_server(struct nfs_server *server)
636{ 642{
637 if (!(server->flags & NFS_MOUNT_NONLM)) 643 if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) ||
644 !(server->flags & NFS_MOUNT_LOCAL_FCNTL))
638 nlmclnt_done(server->nlm_host); 645 nlmclnt_done(server->nlm_host);
639} 646}
640 647
@@ -656,7 +663,8 @@ static int nfs_start_lockd(struct nfs_server *server)
656 663
657 if (nlm_init.nfs_version > 3) 664 if (nlm_init.nfs_version > 3)
658 return 0; 665 return 0;
659 if (server->flags & NFS_MOUNT_NONLM) 666 if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) &&
667 (server->flags & NFS_MOUNT_LOCAL_FCNTL))
660 return 0; 668 return 0;
661 669
662 switch (clp->cl_proto) { 670 switch (clp->cl_proto) {
@@ -897,11 +905,13 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
897 if (server->wsize > NFS_MAX_FILE_IO_SIZE) 905 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
898 server->wsize = NFS_MAX_FILE_IO_SIZE; 906 server->wsize = NFS_MAX_FILE_IO_SIZE;
899 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 907 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
908 set_pnfs_layoutdriver(server, fsinfo->layouttype);
909
900 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); 910 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
901 911
902 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); 912 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
903 if (server->dtsize > PAGE_CACHE_SIZE) 913 if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES)
904 server->dtsize = PAGE_CACHE_SIZE; 914 server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES;
905 if (server->dtsize > server->rsize) 915 if (server->dtsize > server->rsize)
906 server->dtsize = server->rsize; 916 server->dtsize = server->rsize;
907 917
@@ -912,6 +922,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
912 922
913 server->maxfilesize = fsinfo->maxfilesize; 923 server->maxfilesize = fsinfo->maxfilesize;
914 924
925 server->time_delta = fsinfo->time_delta;
926
915 /* We're airborne Set socket buffersize */ 927 /* We're airborne Set socket buffersize */
916 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); 928 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
917} 929}
@@ -934,6 +946,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
934 } 946 }
935 947
936 fsinfo.fattr = fattr; 948 fsinfo.fattr = fattr;
949 fsinfo.layouttype = 0;
937 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); 950 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
938 if (error < 0) 951 if (error < 0)
939 goto out_error; 952 goto out_error;
@@ -1016,6 +1029,7 @@ void nfs_free_server(struct nfs_server *server)
1016{ 1029{
1017 dprintk("--> nfs_free_server()\n"); 1030 dprintk("--> nfs_free_server()\n");
1018 1031
1032 unset_pnfs_layoutdriver(server);
1019 spin_lock(&nfs_client_lock); 1033 spin_lock(&nfs_client_lock);
1020 list_del(&server->client_link); 1034 list_del(&server->client_link);
1021 list_del(&server->master_link); 1035 list_del(&server->master_link);
@@ -1126,7 +1140,7 @@ static int nfs4_init_callback(struct nfs_client *clp)
1126 return error; 1140 return error;
1127 } 1141 }
1128 1142
1129 error = nfs_callback_up(clp->cl_minorversion, 1143 error = nfs_callback_up(clp->cl_mvops->minor_version,
1130 clp->cl_rpcclient->cl_xprt); 1144 clp->cl_rpcclient->cl_xprt);
1131 if (error < 0) { 1145 if (error < 0) {
1132 dprintk("%s: failed to start callback. Error = %d\n", 1146 dprintk("%s: failed to start callback. Error = %d\n",
@@ -1143,10 +1157,8 @@ static int nfs4_init_callback(struct nfs_client *clp)
1143 */ 1157 */
1144static int nfs4_init_client_minor_version(struct nfs_client *clp) 1158static int nfs4_init_client_minor_version(struct nfs_client *clp)
1145{ 1159{
1146 clp->cl_call_sync = _nfs4_call_sync;
1147
1148#if defined(CONFIG_NFS_V4_1) 1160#if defined(CONFIG_NFS_V4_1)
1149 if (clp->cl_minorversion) { 1161 if (clp->cl_mvops->minor_version) {
1150 struct nfs4_session *session = NULL; 1162 struct nfs4_session *session = NULL;
1151 /* 1163 /*
1152 * Create the session and mark it expired. 1164 * Create the session and mark it expired.
@@ -1158,7 +1170,13 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
1158 return -ENOMEM; 1170 return -ENOMEM;
1159 1171
1160 clp->cl_session = session; 1172 clp->cl_session = session;
1161 clp->cl_call_sync = _nfs4_call_sync_session; 1173 /*
1174 * The create session reply races with the server back
1175 * channel probe. Mark the client NFS_CS_SESSION_INITING
1176 * so that the client back channel can find the
1177 * nfs_client struct
1178 */
1179 clp->cl_cons_state = NFS_CS_SESSION_INITING;
1162 } 1180 }
1163#endif /* CONFIG_NFS_V4_1 */ 1181#endif /* CONFIG_NFS_V4_1 */
1164 1182
@@ -1351,8 +1369,9 @@ static int nfs4_init_server(struct nfs_server *server,
1351 1369
1352 /* Initialise the client representation from the mount data */ 1370 /* Initialise the client representation from the mount data */
1353 server->flags = data->flags; 1371 server->flags = data->flags;
1354 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| 1372 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
1355 NFS_CAP_POSIX_LOCK; 1373 if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
1374 server->caps |= NFS_CAP_READDIRPLUS;
1356 server->options = data->options; 1375 server->options = data->options;
1357 1376
1358 /* Get a client record */ 1377 /* Get a client record */
@@ -1454,7 +1473,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1454 data->authflavor, 1473 data->authflavor,
1455 parent_server->client->cl_xprt->prot, 1474 parent_server->client->cl_xprt->prot,
1456 parent_server->client->cl_timeout, 1475 parent_server->client->cl_timeout,
1457 parent_client->cl_minorversion); 1476 parent_client->cl_mvops->minor_version);
1458 if (error < 0) 1477 if (error < 0)
1459 goto error; 1478 goto error;
1460 1479
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 301634543974..232a7eead33a 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -71,20 +71,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
71 if (inode->i_flock == NULL) 71 if (inode->i_flock == NULL)
72 goto out; 72 goto out;
73 73
74 /* Protect inode->i_flock using the BKL */ 74 /* Protect inode->i_flock using the file locks lock */
75 lock_kernel(); 75 lock_flocks();
76 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 76 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
77 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 77 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
78 continue; 78 continue;
79 if (nfs_file_open_context(fl->fl_file) != ctx) 79 if (nfs_file_open_context(fl->fl_file) != ctx)
80 continue; 80 continue;
81 unlock_kernel(); 81 unlock_flocks();
82 status = nfs4_lock_delegation_recall(state, fl); 82 status = nfs4_lock_delegation_recall(state, fl);
83 if (status < 0) 83 if (status < 0)
84 goto out; 84 goto out;
85 lock_kernel(); 85 lock_flocks();
86 } 86 }
87 unlock_kernel(); 87 unlock_flocks();
88out: 88out:
89 return status; 89 return status;
90} 90}
@@ -268,14 +268,6 @@ out:
268 return status; 268 return status;
269} 269}
270 270
271/* Sync all data to disk upon delegation return */
272static void nfs_msync_inode(struct inode *inode)
273{
274 filemap_fdatawrite(inode->i_mapping);
275 nfs_wb_all(inode);
276 filemap_fdatawait(inode->i_mapping);
277}
278
279/* 271/*
280 * Basic procedure for returning a delegation to the server 272 * Basic procedure for returning a delegation to the server
281 */ 273 */
@@ -367,7 +359,7 @@ int nfs_inode_return_delegation(struct inode *inode)
367 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); 359 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
368 spin_unlock(&clp->cl_lock); 360 spin_unlock(&clp->cl_lock);
369 if (delegation != NULL) { 361 if (delegation != NULL) {
370 nfs_msync_inode(inode); 362 nfs_wb_all(inode);
371 err = __nfs_inode_return_delegation(inode, delegation, 1); 363 err = __nfs_inode_return_delegation(inode, delegation, 1);
372 } 364 }
373 } 365 }
@@ -471,9 +463,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
471/* 463/*
472 * Asynchronous delegation recall! 464 * Asynchronous delegation recall!
473 */ 465 */
474int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid, 466int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
475 int (*validate_stateid)(struct nfs_delegation *delegation,
476 const nfs4_stateid *stateid))
477{ 467{
478 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 468 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
479 struct nfs_delegation *delegation; 469 struct nfs_delegation *delegation;
@@ -481,7 +471,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
481 rcu_read_lock(); 471 rcu_read_lock();
482 delegation = rcu_dereference(NFS_I(inode)->delegation); 472 delegation = rcu_dereference(NFS_I(inode)->delegation);
483 473
484 if (!validate_stateid(delegation, stateid)) { 474 if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
485 rcu_read_unlock(); 475 rcu_read_unlock();
486 return -ENOENT; 476 return -ENOENT;
487 } 477 }
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 69e7b8140122..2026304bda19 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -34,9 +34,7 @@ enum {
34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
35void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 35void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
36int nfs_inode_return_delegation(struct inode *inode); 36int nfs_inode_return_delegation(struct inode *inode);
37int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid, 37int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
38 int (*validate_stateid)(struct nfs_delegation *delegation,
39 const nfs4_stateid *stateid));
40void nfs_inode_return_delegation_noreclaim(struct inode *inode); 38void nfs_inode_return_delegation_noreclaim(struct inode *inode);
41 39
42struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); 40struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e60416d3f818..07ac3847e562 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,11 +33,12 @@
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/mount.h> 34#include <linux/mount.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/vmalloc.h>
36 37
37#include "nfs4_fs.h"
38#include "delegation.h" 38#include "delegation.h"
39#include "iostat.h" 39#include "iostat.h"
40#include "internal.h" 40#include "internal.h"
41#include "fscache.h"
41 42
42/* #define NFS_DEBUG_VERBOSE 1 */ 43/* #define NFS_DEBUG_VERBOSE 1 */
43 44
@@ -55,6 +56,7 @@ static int nfs_rename(struct inode *, struct dentry *,
55 struct inode *, struct dentry *); 56 struct inode *, struct dentry *);
56static int nfs_fsync_dir(struct file *, int); 57static int nfs_fsync_dir(struct file *, int);
57static loff_t nfs_llseek_dir(struct file *, loff_t, int); 58static loff_t nfs_llseek_dir(struct file *, loff_t, int);
59static int nfs_readdir_clear_array(struct page*, gfp_t);
58 60
59const struct file_operations nfs_dir_operations = { 61const struct file_operations nfs_dir_operations = {
60 .llseek = nfs_llseek_dir, 62 .llseek = nfs_llseek_dir,
@@ -80,6 +82,10 @@ const struct inode_operations nfs_dir_inode_operations = {
80 .setattr = nfs_setattr, 82 .setattr = nfs_setattr,
81}; 83};
82 84
85const struct address_space_operations nfs_dir_addr_space_ops = {
86 .releasepage = nfs_readdir_clear_array,
87};
88
83#ifdef CONFIG_NFS_V3 89#ifdef CONFIG_NFS_V3
84const struct inode_operations nfs3_dir_inode_operations = { 90const struct inode_operations nfs3_dir_inode_operations = {
85 .create = nfs_create, 91 .create = nfs_create,
@@ -104,8 +110,9 @@ const struct inode_operations nfs3_dir_inode_operations = {
104#ifdef CONFIG_NFS_V4 110#ifdef CONFIG_NFS_V4
105 111
106static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); 112static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
113static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
107const struct inode_operations nfs4_dir_inode_operations = { 114const struct inode_operations nfs4_dir_inode_operations = {
108 .create = nfs_create, 115 .create = nfs_open_create,
109 .lookup = nfs_atomic_lookup, 116 .lookup = nfs_atomic_lookup,
110 .link = nfs_link, 117 .link = nfs_link,
111 .unlink = nfs_unlink, 118 .unlink = nfs_unlink,
@@ -140,54 +147,207 @@ nfs_opendir(struct inode *inode, struct file *filp)
140 147
141 /* Call generic open code in order to cache credentials */ 148 /* Call generic open code in order to cache credentials */
142 res = nfs_open(inode, filp); 149 res = nfs_open(inode, filp);
150 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
151 /* This is a mountpoint, so d_revalidate will never
152 * have been called, so we need to refresh the
153 * inode (for close-open consistency) ourselves.
154 */
155 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
156 }
143 return res; 157 return res;
144} 158}
145 159
146typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int); 160struct nfs_cache_array_entry {
161 u64 cookie;
162 u64 ino;
163 struct qstr string;
164};
165
166struct nfs_cache_array {
167 unsigned int size;
168 int eof_index;
169 u64 last_cookie;
170 struct nfs_cache_array_entry array[0];
171};
172
173#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
174
175typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
147typedef struct { 176typedef struct {
148 struct file *file; 177 struct file *file;
149 struct page *page; 178 struct page *page;
150 unsigned long page_index; 179 unsigned long page_index;
151 __be32 *ptr;
152 u64 *dir_cookie; 180 u64 *dir_cookie;
153 loff_t current_index; 181 loff_t current_index;
154 struct nfs_entry *entry;
155 decode_dirent_t decode; 182 decode_dirent_t decode;
156 int plus; 183
157 unsigned long timestamp; 184 unsigned long timestamp;
158 unsigned long gencount; 185 unsigned long gencount;
159 int timestamp_valid; 186 unsigned int cache_entry_index;
187 unsigned int plus:1;
188 unsigned int eof:1;
160} nfs_readdir_descriptor_t; 189} nfs_readdir_descriptor_t;
161 190
162/* Now we cache directories properly, by stuffing the dirent 191/*
163 * data directly in the page cache. 192 * The caller is responsible for calling nfs_readdir_release_array(page)
164 *
165 * Inode invalidation due to refresh etc. takes care of
166 * _everything_, no sloppy entry flushing logic, no extraneous
167 * copying, network direct to page cache, the way it was meant
168 * to be.
169 *
170 * NOTE: Dirent information verification is done always by the
171 * page-in of the RPC reply, nowhere else, this simplies
172 * things substantially.
173 */ 193 */
174static 194static
175int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) 195struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
196{
197 if (page == NULL)
198 return ERR_PTR(-EIO);
199 return (struct nfs_cache_array *)kmap(page);
200}
201
202static
203void nfs_readdir_release_array(struct page *page)
204{
205 kunmap(page);
206}
207
208/*
209 * we are freeing strings created by nfs_add_to_readdir_array()
210 */
211static
212int nfs_readdir_clear_array(struct page *page, gfp_t mask)
213{
214 struct nfs_cache_array *array = nfs_readdir_get_array(page);
215 int i;
216 for (i = 0; i < array->size; i++)
217 kfree(array->array[i].string.name);
218 nfs_readdir_release_array(page);
219 return 0;
220}
221
222/*
223 * the caller is responsible for freeing qstr.name
224 * when called by nfs_readdir_add_to_array, the strings will be freed in
225 * nfs_clear_readdir_array()
226 */
227static
228int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
229{
230 string->len = len;
231 string->name = kmemdup(name, len, GFP_KERNEL);
232 if (string->name == NULL)
233 return -ENOMEM;
234 string->hash = full_name_hash(name, len);
235 return 0;
236}
237
238static
239int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
240{
241 struct nfs_cache_array *array = nfs_readdir_get_array(page);
242 struct nfs_cache_array_entry *cache_entry;
243 int ret;
244
245 if (IS_ERR(array))
246 return PTR_ERR(array);
247 ret = -EIO;
248 if (array->size >= MAX_READDIR_ARRAY)
249 goto out;
250
251 cache_entry = &array->array[array->size];
252 cache_entry->cookie = entry->prev_cookie;
253 cache_entry->ino = entry->ino;
254 ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
255 if (ret)
256 goto out;
257 array->last_cookie = entry->cookie;
258 if (entry->eof == 1)
259 array->eof_index = array->size;
260 array->size++;
261out:
262 nfs_readdir_release_array(page);
263 return ret;
264}
265
266static
267int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
268{
269 loff_t diff = desc->file->f_pos - desc->current_index;
270 unsigned int index;
271
272 if (diff < 0)
273 goto out_eof;
274 if (diff >= array->size) {
275 if (array->eof_index > 0)
276 goto out_eof;
277 desc->current_index += array->size;
278 return -EAGAIN;
279 }
280
281 index = (unsigned int)diff;
282 *desc->dir_cookie = array->array[index].cookie;
283 desc->cache_entry_index = index;
284 if (index == array->eof_index)
285 desc->eof = 1;
286 return 0;
287out_eof:
288 desc->eof = 1;
289 return -EBADCOOKIE;
290}
291
292static
293int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
294{
295 int i;
296 int status = -EAGAIN;
297
298 for (i = 0; i < array->size; i++) {
299 if (i == array->eof_index) {
300 desc->eof = 1;
301 status = -EBADCOOKIE;
302 }
303 if (array->array[i].cookie == *desc->dir_cookie) {
304 desc->cache_entry_index = i;
305 status = 0;
306 break;
307 }
308 }
309
310 return status;
311}
312
313static
314int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
315{
316 struct nfs_cache_array *array;
317 int status = -EBADCOOKIE;
318
319 if (desc->dir_cookie == NULL)
320 goto out;
321
322 array = nfs_readdir_get_array(desc->page);
323 if (IS_ERR(array)) {
324 status = PTR_ERR(array);
325 goto out;
326 }
327
328 if (*desc->dir_cookie == 0)
329 status = nfs_readdir_search_for_pos(array, desc);
330 else
331 status = nfs_readdir_search_for_cookie(array, desc);
332
333 nfs_readdir_release_array(desc->page);
334out:
335 return status;
336}
337
338/* Fill a page with xdr information before transferring to the cache page */
339static
340int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
341 struct nfs_entry *entry, struct file *file, struct inode *inode)
176{ 342{
177 struct file *file = desc->file;
178 struct inode *inode = file->f_path.dentry->d_inode;
179 struct rpc_cred *cred = nfs_file_cred(file); 343 struct rpc_cred *cred = nfs_file_cred(file);
180 unsigned long timestamp, gencount; 344 unsigned long timestamp, gencount;
181 int error; 345 int error;
182 346
183 dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
184 __func__, (long long)desc->entry->cookie,
185 page->index);
186
187 again: 347 again:
188 timestamp = jiffies; 348 timestamp = jiffies;
189 gencount = nfs_inc_attr_generation_counter(); 349 gencount = nfs_inc_attr_generation_counter();
190 error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, 350 error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages,
191 NFS_SERVER(inode)->dtsize, desc->plus); 351 NFS_SERVER(inode)->dtsize, desc->plus);
192 if (error < 0) { 352 if (error < 0) {
193 /* We requested READDIRPLUS, but the server doesn't grok it */ 353 /* We requested READDIRPLUS, but the server doesn't grok it */
@@ -201,190 +361,292 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
201 } 361 }
202 desc->timestamp = timestamp; 362 desc->timestamp = timestamp;
203 desc->gencount = gencount; 363 desc->gencount = gencount;
204 desc->timestamp_valid = 1; 364error:
205 SetPageUptodate(page); 365 return error;
206 /* Ensure consistent page alignment of the data.
207 * Note: assumes we have exclusive access to this mapping either
208 * through inode->i_mutex or some other mechanism.
209 */
210 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
211 /* Should never happen */
212 nfs_zap_mapping(inode, inode->i_mapping);
213 }
214 unlock_page(page);
215 return 0;
216 error:
217 unlock_page(page);
218 return -EIO;
219} 366}
220 367
221static inline 368/* Fill in an entry based on the xdr code stored in desc->page */
222int dir_decode(nfs_readdir_descriptor_t *desc) 369static
370int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
223{ 371{
224 __be32 *p = desc->ptr; 372 __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
225 p = desc->decode(p, desc->entry, desc->plus);
226 if (IS_ERR(p)) 373 if (IS_ERR(p))
227 return PTR_ERR(p); 374 return PTR_ERR(p);
228 desc->ptr = p; 375
229 if (desc->timestamp_valid) { 376 entry->fattr->time_start = desc->timestamp;
230 desc->entry->fattr->time_start = desc->timestamp; 377 entry->fattr->gencount = desc->gencount;
231 desc->entry->fattr->gencount = desc->gencount;
232 } else
233 desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
234 return 0; 378 return 0;
235} 379}
236 380
237static inline 381static
238void dir_page_release(nfs_readdir_descriptor_t *desc) 382int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
239{ 383{
240 kunmap(desc->page); 384 struct nfs_inode *node;
241 page_cache_release(desc->page); 385 if (dentry->d_inode == NULL)
242 desc->page = NULL; 386 goto different;
243 desc->ptr = NULL; 387 node = NFS_I(dentry->d_inode);
388 if (node->fh.size != entry->fh->size)
389 goto different;
390 if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
391 goto different;
392 return 1;
393different:
394 return 0;
244} 395}
245 396
246/* 397static
247 * Given a pointer to a buffer that has already been filled by a call 398void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
248 * to readdir, find the next entry with cookie '*desc->dir_cookie'.
249 *
250 * If the end of the buffer has been reached, return -EAGAIN, if not,
251 * return the offset within the buffer of the next entry to be
252 * read.
253 */
254static inline
255int find_dirent(nfs_readdir_descriptor_t *desc)
256{ 399{
257 struct nfs_entry *entry = desc->entry; 400 struct qstr filename = {
258 int loop_count = 0, 401 .len = entry->len,
259 status; 402 .name = entry->name,
403 };
404 struct dentry *dentry;
405 struct dentry *alias;
406 struct inode *dir = parent->d_inode;
407 struct inode *inode;
260 408
261 while((status = dir_decode(desc)) == 0) { 409 if (filename.name[0] == '.') {
262 dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", 410 if (filename.len == 1)
263 __func__, (unsigned long long)entry->cookie); 411 return;
264 if (entry->prev_cookie == *desc->dir_cookie) 412 if (filename.len == 2 && filename.name[1] == '.')
265 break; 413 return;
266 if (loop_count++ > 200) { 414 }
267 loop_count = 0; 415 filename.hash = full_name_hash(filename.name, filename.len);
268 schedule(); 416
417 dentry = d_lookup(parent, &filename);
418 if (dentry != NULL) {
419 if (nfs_same_file(dentry, entry)) {
420 nfs_refresh_inode(dentry->d_inode, entry->fattr);
421 goto out;
422 } else {
423 d_drop(dentry);
424 dput(dentry);
269 } 425 }
270 } 426 }
271 return status; 427
428 dentry = d_alloc(parent, &filename);
429 if (dentry == NULL)
430 return;
431
432 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
433 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
434 if (IS_ERR(inode))
435 goto out;
436
437 alias = d_materialise_unique(dentry, inode);
438 if (IS_ERR(alias))
439 goto out;
440 else if (alias) {
441 nfs_set_verifier(alias, nfs_save_change_attribute(dir));
442 dput(alias);
443 } else
444 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
445
446out:
447 dput(dentry);
448}
449
450/* Perform conversion from xdr to cache array */
451static
452void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
453 void *xdr_page, struct page *page, unsigned int buflen)
454{
455 struct xdr_stream stream;
456 struct xdr_buf buf;
457 __be32 *ptr = xdr_page;
458 int status;
459 struct nfs_cache_array *array;
460
461 buf.head->iov_base = xdr_page;
462 buf.head->iov_len = buflen;
463 buf.tail->iov_len = 0;
464 buf.page_base = 0;
465 buf.page_len = 0;
466 buf.buflen = buf.head->iov_len;
467 buf.len = buf.head->iov_len;
468
469 xdr_init_decode(&stream, &buf, ptr);
470
471
472 do {
473 status = xdr_decode(desc, entry, &stream);
474 if (status != 0)
475 break;
476
477 if (nfs_readdir_add_to_array(entry, page) == -1)
478 break;
479 if (desc->plus == 1)
480 nfs_prime_dcache(desc->file->f_path.dentry, entry);
481 } while (!entry->eof);
482
483 if (status == -EBADCOOKIE && entry->eof) {
484 array = nfs_readdir_get_array(page);
485 array->eof_index = array->size - 1;
486 status = 0;
487 nfs_readdir_release_array(page);
488 }
489}
490
491static
492void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages)
493{
494 unsigned int i;
495 for (i = 0; i < npages; i++)
496 put_page(pages[i]);
497}
498
499static
500void nfs_readdir_free_large_page(void *ptr, struct page **pages,
501 unsigned int npages)
502{
503 vm_unmap_ram(ptr, npages);
504 nfs_readdir_free_pagearray(pages, npages);
272} 505}
273 506
274/* 507/*
275 * Given a pointer to a buffer that has already been filled by a call 508 * nfs_readdir_large_page will allocate pages that must be freed with a call
276 * to readdir, find the entry at offset 'desc->file->f_pos'. 509 * to nfs_readdir_free_large_page
277 *
278 * If the end of the buffer has been reached, return -EAGAIN, if not,
279 * return the offset within the buffer of the next entry to be
280 * read.
281 */ 510 */
282static inline 511static
283int find_dirent_index(nfs_readdir_descriptor_t *desc) 512void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
284{ 513{
285 struct nfs_entry *entry = desc->entry; 514 void *ptr;
286 int loop_count = 0, 515 unsigned int i;
287 status; 516
517 for (i = 0; i < npages; i++) {
518 struct page *page = alloc_page(GFP_KERNEL);
519 if (page == NULL)
520 goto out_freepages;
521 pages[i] = page;
522 }
288 523
289 for(;;) { 524 ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
290 status = dir_decode(desc); 525 if (!IS_ERR_OR_NULL(ptr))
291 if (status) 526 return ptr;
292 break; 527out_freepages:
528 nfs_readdir_free_pagearray(pages, i);
529 return NULL;
530}
531
532static
533int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
534{
535 struct page *pages[NFS_MAX_READDIR_PAGES];
536 void *pages_ptr = NULL;
537 struct nfs_entry entry;
538 struct file *file = desc->file;
539 struct nfs_cache_array *array;
540 int status = 0;
541 unsigned int array_size = ARRAY_SIZE(pages);
542
543 entry.prev_cookie = 0;
544 entry.cookie = *desc->dir_cookie;
545 entry.eof = 0;
546 entry.fh = nfs_alloc_fhandle();
547 entry.fattr = nfs_alloc_fattr();
548 if (entry.fh == NULL || entry.fattr == NULL)
549 goto out;
550
551 array = nfs_readdir_get_array(page);
552 memset(array, 0, sizeof(struct nfs_cache_array));
553 array->eof_index = -1;
293 554
294 dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", 555 pages_ptr = nfs_readdir_large_page(pages, array_size);
295 (unsigned long long)entry->cookie, desc->current_index); 556 if (!pages_ptr)
557 goto out_release_array;
558 do {
559 status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
296 560
297 if (desc->file->f_pos == desc->current_index) { 561 if (status < 0)
298 *desc->dir_cookie = entry->cookie;
299 break; 562 break;
300 } 563 nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
301 desc->current_index++; 564 } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
302 if (loop_count++ > 200) { 565
303 loop_count = 0; 566 nfs_readdir_free_large_page(pages_ptr, pages, array_size);
304 schedule(); 567out_release_array:
305 } 568 nfs_readdir_release_array(page);
306 } 569out:
570 nfs_free_fattr(entry.fattr);
571 nfs_free_fhandle(entry.fh);
307 return status; 572 return status;
308} 573}
309 574
310/* 575/*
311 * Find the given page, and call find_dirent() or find_dirent_index in 576 * Now we cache directories properly, by converting xdr information
312 * order to try to return the next entry. 577 * to an array that can be used for lookups later. This results in
578 * fewer cache pages, since we can store more information on each page.
579 * We only need to convert from xdr once so future lookups are much simpler
313 */ 580 */
314static inline 581static
315int find_dirent_page(nfs_readdir_descriptor_t *desc) 582int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
316{ 583{
317 struct inode *inode = desc->file->f_path.dentry->d_inode; 584 struct inode *inode = desc->file->f_path.dentry->d_inode;
318 struct page *page;
319 int status;
320 585
321 dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", 586 if (nfs_readdir_xdr_to_array(desc, page, inode) < 0)
322 __func__, desc->page_index, 587 goto error;
323 (long long) *desc->dir_cookie); 588 SetPageUptodate(page);
324 589
325 /* If we find the page in the page_cache, we cannot be sure 590 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
326 * how fresh the data is, so we will ignore readdir_plus attributes. 591 /* Should never happen */
327 */ 592 nfs_zap_mapping(inode, inode->i_mapping);
328 desc->timestamp_valid = 0;
329 page = read_cache_page(inode->i_mapping, desc->page_index,
330 (filler_t *)nfs_readdir_filler, desc);
331 if (IS_ERR(page)) {
332 status = PTR_ERR(page);
333 goto out;
334 } 593 }
594 unlock_page(page);
595 return 0;
596 error:
597 unlock_page(page);
598 return -EIO;
599}
335 600
336 /* NOTE: Someone else may have changed the READDIRPLUS flag */ 601static
337 desc->page = page; 602void cache_page_release(nfs_readdir_descriptor_t *desc)
338 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ 603{
339 if (*desc->dir_cookie != 0) 604 page_cache_release(desc->page);
340 status = find_dirent(desc); 605 desc->page = NULL;
341 else 606}
342 status = find_dirent_index(desc); 607
343 if (status < 0) 608static
344 dir_page_release(desc); 609struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
345 out: 610{
346 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); 611 struct page *page;
347 return status; 612 page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
613 desc->page_index, (filler_t *)nfs_readdir_filler, desc);
614 if (IS_ERR(page))
615 desc->eof = 1;
616 return page;
348} 617}
349 618
350/* 619/*
351 * Recurse through the page cache pages, and return a 620 * Returns 0 if desc->dir_cookie was found on page desc->page_index
352 * filled nfs_entry structure of the next directory entry if possible.
353 *
354 * The target for the search is '*desc->dir_cookie' if non-0,
355 * 'desc->file->f_pos' otherwise
356 */ 621 */
622static
623int find_cache_page(nfs_readdir_descriptor_t *desc)
624{
625 int res;
626
627 desc->page = get_cache_page(desc);
628 if (IS_ERR(desc->page))
629 return PTR_ERR(desc->page);
630
631 res = nfs_readdir_search_array(desc);
632 if (res == 0)
633 return 0;
634 cache_page_release(desc);
635 return res;
636}
637
638/* Search for desc->dir_cookie from the beginning of the page cache */
357static inline 639static inline
358int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) 640int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
359{ 641{
360 int loop_count = 0; 642 int res = -EAGAIN;
361 int res;
362
363 /* Always search-by-index from the beginning of the cache */
364 if (*desc->dir_cookie == 0) {
365 dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
366 (long long)desc->file->f_pos);
367 desc->page_index = 0;
368 desc->entry->cookie = desc->entry->prev_cookie = 0;
369 desc->entry->eof = 0;
370 desc->current_index = 0;
371 } else
372 dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
373 (unsigned long long)*desc->dir_cookie);
374 643
375 for (;;) { 644 while (1) {
376 res = find_dirent_page(desc); 645 res = find_cache_page(desc);
377 if (res != -EAGAIN) 646 if (res != -EAGAIN)
378 break; 647 break;
379 /* Align to beginning of next page */ 648 desc->page_index++;
380 desc->page_index ++;
381 if (loop_count++ > 200) {
382 loop_count = 0;
383 schedule();
384 }
385 } 649 }
386
387 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res);
388 return res; 650 return res;
389} 651}
390 652
@@ -393,8 +655,6 @@ static inline unsigned int dt_type(struct inode *inode)
393 return (inode->i_mode >> 12) & 15; 655 return (inode->i_mode >> 12) & 15;
394} 656}
395 657
396static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
397
398/* 658/*
399 * Once we've found the start of the dirent within a page: fill 'er up... 659 * Once we've found the start of the dirent within a page: fill 'er up...
400 */ 660 */
@@ -403,49 +663,36 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
403 filldir_t filldir) 663 filldir_t filldir)
404{ 664{
405 struct file *file = desc->file; 665 struct file *file = desc->file;
406 struct nfs_entry *entry = desc->entry; 666 int i = 0;
407 struct dentry *dentry = NULL; 667 int res = 0;
408 u64 fileid; 668 struct nfs_cache_array *array = NULL;
409 int loop_count = 0, 669 unsigned int d_type = DT_UNKNOWN;
410 res; 670 struct dentry *dentry = NULL;
411
412 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
413 (unsigned long long)entry->cookie);
414
415 for(;;) {
416 unsigned d_type = DT_UNKNOWN;
417 /* Note: entry->prev_cookie contains the cookie for
418 * retrieving the current dirent on the server */
419 fileid = entry->ino;
420
421 /* Get a dentry if we have one */
422 if (dentry != NULL)
423 dput(dentry);
424 dentry = nfs_readdir_lookup(desc);
425 671
426 /* Use readdirplus info */ 672 array = nfs_readdir_get_array(desc->page);
427 if (dentry != NULL && dentry->d_inode != NULL) {
428 d_type = dt_type(dentry->d_inode);
429 fileid = NFS_FILEID(dentry->d_inode);
430 }
431 673
432 res = filldir(dirent, entry->name, entry->len, 674 for (i = desc->cache_entry_index; i < array->size; i++) {
433 file->f_pos, nfs_compat_user_ino64(fileid), 675 d_type = DT_UNKNOWN;
434 d_type); 676
677 res = filldir(dirent, array->array[i].string.name,
678 array->array[i].string.len, file->f_pos,
679 nfs_compat_user_ino64(array->array[i].ino), d_type);
435 if (res < 0) 680 if (res < 0)
436 break; 681 break;
437 file->f_pos++; 682 file->f_pos++;
438 *desc->dir_cookie = entry->cookie; 683 desc->cache_entry_index = i;
439 if (dir_decode(desc) != 0) { 684 if (i < (array->size-1))
440 desc->page_index ++; 685 *desc->dir_cookie = array->array[i+1].cookie;
686 else
687 *desc->dir_cookie = array->last_cookie;
688 if (i == array->eof_index) {
689 desc->eof = 1;
441 break; 690 break;
442 } 691 }
443 if (loop_count++ > 200) {
444 loop_count = 0;
445 schedule();
446 }
447 } 692 }
448 dir_page_release(desc); 693
694 nfs_readdir_release_array(desc->page);
695 cache_page_release(desc);
449 if (dentry != NULL) 696 if (dentry != NULL)
450 dput(dentry); 697 dput(dentry);
451 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", 698 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
@@ -469,12 +716,9 @@ static inline
469int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, 716int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
470 filldir_t filldir) 717 filldir_t filldir)
471{ 718{
472 struct file *file = desc->file;
473 struct inode *inode = file->f_path.dentry->d_inode;
474 struct rpc_cred *cred = nfs_file_cred(file);
475 struct page *page = NULL; 719 struct page *page = NULL;
476 int status; 720 int status;
477 unsigned long timestamp, gencount; 721 struct inode *inode = desc->file->f_path.dentry->d_inode;
478 722
479 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", 723 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
480 (unsigned long long)*desc->dir_cookie); 724 (unsigned long long)*desc->dir_cookie);
@@ -484,38 +728,22 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
484 status = -ENOMEM; 728 status = -ENOMEM;
485 goto out; 729 goto out;
486 } 730 }
487 timestamp = jiffies; 731
488 gencount = nfs_inc_attr_generation_counter(); 732 if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
489 status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred,
490 *desc->dir_cookie, page,
491 NFS_SERVER(inode)->dtsize,
492 desc->plus);
493 desc->page = page;
494 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
495 if (status >= 0) {
496 desc->timestamp = timestamp;
497 desc->gencount = gencount;
498 desc->timestamp_valid = 1;
499 if ((status = dir_decode(desc)) == 0)
500 desc->entry->prev_cookie = *desc->dir_cookie;
501 } else
502 status = -EIO; 733 status = -EIO;
503 if (status < 0)
504 goto out_release; 734 goto out_release;
735 }
505 736
737 desc->page_index = 0;
738 desc->page = page;
506 status = nfs_do_filldir(desc, dirent, filldir); 739 status = nfs_do_filldir(desc, dirent, filldir);
507 740
508 /* Reset read descriptor so it searches the page cache from
509 * the start upon the next call to readdir_search_pagecache() */
510 desc->page_index = 0;
511 desc->entry->cookie = desc->entry->prev_cookie = 0;
512 desc->entry->eof = 0;
513 out: 741 out:
514 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", 742 dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
515 __func__, status); 743 __func__, status);
516 return status; 744 return status;
517 out_release: 745 out_release:
518 dir_page_release(desc); 746 cache_page_release(desc);
519 goto out; 747 goto out;
520} 748}
521 749
@@ -529,7 +757,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
529 struct inode *inode = dentry->d_inode; 757 struct inode *inode = dentry->d_inode;
530 nfs_readdir_descriptor_t my_desc, 758 nfs_readdir_descriptor_t my_desc,
531 *desc = &my_desc; 759 *desc = &my_desc;
532 struct nfs_entry my_entry;
533 int res = -ENOMEM; 760 int res = -ENOMEM;
534 761
535 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 762 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -550,26 +777,17 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
550 desc->decode = NFS_PROTO(inode)->decode_dirent; 777 desc->decode = NFS_PROTO(inode)->decode_dirent;
551 desc->plus = NFS_USE_READDIRPLUS(inode); 778 desc->plus = NFS_USE_READDIRPLUS(inode);
552 779
553 my_entry.cookie = my_entry.prev_cookie = 0;
554 my_entry.eof = 0;
555 my_entry.fh = nfs_alloc_fhandle();
556 my_entry.fattr = nfs_alloc_fattr();
557 if (my_entry.fh == NULL || my_entry.fattr == NULL)
558 goto out_alloc_failed;
559
560 desc->entry = &my_entry;
561
562 nfs_block_sillyrename(dentry); 780 nfs_block_sillyrename(dentry);
563 res = nfs_revalidate_mapping(inode, filp->f_mapping); 781 res = nfs_revalidate_mapping(inode, filp->f_mapping);
564 if (res < 0) 782 if (res < 0)
565 goto out; 783 goto out;
566 784
567 while(!desc->entry->eof) { 785 while (desc->eof != 1) {
568 res = readdir_search_pagecache(desc); 786 res = readdir_search_pagecache(desc);
569 787
570 if (res == -EBADCOOKIE) { 788 if (res == -EBADCOOKIE) {
571 /* This means either end of directory */ 789 /* This means either end of directory */
572 if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { 790 if (*desc->dir_cookie && desc->eof == 0) {
573 /* Or that the server has 'lost' a cookie */ 791 /* Or that the server has 'lost' a cookie */
574 res = uncached_readdir(desc, dirent, filldir); 792 res = uncached_readdir(desc, dirent, filldir);
575 if (res >= 0) 793 if (res >= 0)
@@ -581,8 +799,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
581 if (res == -ETOOSMALL && desc->plus) { 799 if (res == -ETOOSMALL && desc->plus) {
582 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 800 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
583 nfs_zap_caches(inode); 801 nfs_zap_caches(inode);
802 desc->page_index = 0;
584 desc->plus = 0; 803 desc->plus = 0;
585 desc->entry->eof = 0; 804 desc->eof = 0;
586 continue; 805 continue;
587 } 806 }
588 if (res < 0) 807 if (res < 0)
@@ -598,9 +817,6 @@ out:
598 nfs_unblock_sillyrename(dentry); 817 nfs_unblock_sillyrename(dentry);
599 if (res > 0) 818 if (res > 0)
600 res = 0; 819 res = 0;
601out_alloc_failed:
602 nfs_free_fattr(my_entry.fattr);
603 nfs_free_fhandle(my_entry.fh);
604 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", 820 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
605 dentry->d_parent->d_name.name, dentry->d_name.name, 821 dentry->d_parent->d_name.name, dentry->d_name.name,
606 res); 822 res);
@@ -1022,10 +1238,63 @@ static int is_atomic_open(struct nameidata *nd)
1022 return 1; 1238 return 1;
1023} 1239}
1024 1240
1241static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd)
1242{
1243 struct path path = {
1244 .mnt = nd->path.mnt,
1245 .dentry = dentry,
1246 };
1247 struct nfs_open_context *ctx;
1248 struct rpc_cred *cred;
1249 fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
1250
1251 cred = rpc_lookup_cred();
1252 if (IS_ERR(cred))
1253 return ERR_CAST(cred);
1254 ctx = alloc_nfs_open_context(&path, cred, fmode);
1255 put_rpccred(cred);
1256 if (ctx == NULL)
1257 return ERR_PTR(-ENOMEM);
1258 return ctx;
1259}
1260
1261static int do_open(struct inode *inode, struct file *filp)
1262{
1263 nfs_fscache_set_inode_cookie(inode, filp);
1264 return 0;
1265}
1266
1267static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
1268{
1269 struct file *filp;
1270 int ret = 0;
1271
1272 /* If the open_intent is for execute, we have an extra check to make */
1273 if (ctx->mode & FMODE_EXEC) {
1274 ret = nfs_may_open(ctx->path.dentry->d_inode,
1275 ctx->cred,
1276 nd->intent.open.flags);
1277 if (ret < 0)
1278 goto out;
1279 }
1280 filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open);
1281 if (IS_ERR(filp))
1282 ret = PTR_ERR(filp);
1283 else
1284 nfs_file_set_open_context(filp, ctx);
1285out:
1286 put_nfs_open_context(ctx);
1287 return ret;
1288}
1289
1025static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 1290static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1026{ 1291{
1292 struct nfs_open_context *ctx;
1293 struct iattr attr;
1027 struct dentry *res = NULL; 1294 struct dentry *res = NULL;
1028 int error; 1295 struct inode *inode;
1296 int open_flags;
1297 int err;
1029 1298
1030 dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", 1299 dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
1031 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1300 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1047,13 +1316,32 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1047 goto out; 1316 goto out;
1048 } 1317 }
1049 1318
1319 ctx = nameidata_to_nfs_open_context(dentry, nd);
1320 res = ERR_CAST(ctx);
1321 if (IS_ERR(ctx))
1322 goto out;
1323
1324 open_flags = nd->intent.open.flags;
1325 if (nd->flags & LOOKUP_CREATE) {
1326 attr.ia_mode = nd->intent.open.create_mode;
1327 attr.ia_valid = ATTR_MODE;
1328 if (!IS_POSIXACL(dir))
1329 attr.ia_mode &= ~current_umask();
1330 } else {
1331 open_flags &= ~(O_EXCL | O_CREAT);
1332 attr.ia_valid = 0;
1333 }
1334
1050 /* Open the file on the server */ 1335 /* Open the file on the server */
1051 res = nfs4_atomic_open(dir, dentry, nd); 1336 nfs_block_sillyrename(dentry->d_parent);
1052 if (IS_ERR(res)) { 1337 inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
1053 error = PTR_ERR(res); 1338 if (IS_ERR(inode)) {
1054 switch (error) { 1339 nfs_unblock_sillyrename(dentry->d_parent);
1340 put_nfs_open_context(ctx);
1341 switch (PTR_ERR(inode)) {
1055 /* Make a negative dentry */ 1342 /* Make a negative dentry */
1056 case -ENOENT: 1343 case -ENOENT:
1344 d_add(dentry, NULL);
1057 res = NULL; 1345 res = NULL;
1058 goto out; 1346 goto out;
1059 /* This turned out not to be a regular file */ 1347 /* This turned out not to be a regular file */
@@ -1065,11 +1353,25 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1065 goto no_open; 1353 goto no_open;
1066 /* case -EINVAL: */ 1354 /* case -EINVAL: */
1067 default: 1355 default:
1356 res = ERR_CAST(inode);
1068 goto out; 1357 goto out;
1069 } 1358 }
1070 } else if (res != NULL) 1359 }
1360 res = d_add_unique(dentry, inode);
1361 nfs_unblock_sillyrename(dentry->d_parent);
1362 if (res != NULL) {
1363 dput(ctx->path.dentry);
1364 ctx->path.dentry = dget(res);
1071 dentry = res; 1365 dentry = res;
1366 }
1367 err = nfs_intent_set_file(nd, ctx);
1368 if (err < 0) {
1369 if (res != NULL)
1370 dput(res);
1371 return ERR_PTR(err);
1372 }
1072out: 1373out:
1374 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1073 return res; 1375 return res;
1074no_open: 1376no_open:
1075 return nfs_lookup(dir, dentry, nd); 1377 return nfs_lookup(dir, dentry, nd);
@@ -1080,12 +1382,15 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1080 struct dentry *parent = NULL; 1382 struct dentry *parent = NULL;
1081 struct inode *inode = dentry->d_inode; 1383 struct inode *inode = dentry->d_inode;
1082 struct inode *dir; 1384 struct inode *dir;
1385 struct nfs_open_context *ctx;
1083 int openflags, ret = 0; 1386 int openflags, ret = 0;
1084 1387
1085 if (!is_atomic_open(nd) || d_mountpoint(dentry)) 1388 if (!is_atomic_open(nd) || d_mountpoint(dentry))
1086 goto no_open; 1389 goto no_open;
1390
1087 parent = dget_parent(dentry); 1391 parent = dget_parent(dentry);
1088 dir = parent->d_inode; 1392 dir = parent->d_inode;
1393
1089 /* We can't create new files in nfs_open_revalidate(), so we 1394 /* We can't create new files in nfs_open_revalidate(), so we
1090 * optimize away revalidation of negative dentries. 1395 * optimize away revalidation of negative dentries.
1091 */ 1396 */
@@ -1103,101 +1408,98 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1103 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 1408 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
1104 goto no_open_dput; 1409 goto no_open_dput;
1105 /* We can't create new files, or truncate existing ones here */ 1410 /* We can't create new files, or truncate existing ones here */
1106 openflags &= ~(O_CREAT|O_TRUNC); 1411 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
1107 1412
1413 ctx = nameidata_to_nfs_open_context(dentry, nd);
1414 ret = PTR_ERR(ctx);
1415 if (IS_ERR(ctx))
1416 goto out;
1108 /* 1417 /*
1109 * Note: we're not holding inode->i_mutex and so may be racing with 1418 * Note: we're not holding inode->i_mutex and so may be racing with
1110 * operations that change the directory. We therefore save the 1419 * operations that change the directory. We therefore save the
1111 * change attribute *before* we do the RPC call. 1420 * change attribute *before* we do the RPC call.
1112 */ 1421 */
1113 ret = nfs4_open_revalidate(dir, dentry, openflags, nd); 1422 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL);
1423 if (IS_ERR(inode)) {
1424 ret = PTR_ERR(inode);
1425 switch (ret) {
1426 case -EPERM:
1427 case -EACCES:
1428 case -EDQUOT:
1429 case -ENOSPC:
1430 case -EROFS:
1431 goto out_put_ctx;
1432 default:
1433 goto out_drop;
1434 }
1435 }
1436 iput(inode);
1437 if (inode != dentry->d_inode)
1438 goto out_drop;
1439
1440 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1441 ret = nfs_intent_set_file(nd, ctx);
1442 if (ret >= 0)
1443 ret = 1;
1114out: 1444out:
1115 dput(parent); 1445 dput(parent);
1116 if (!ret)
1117 d_drop(dentry);
1118 return ret; 1446 return ret;
1447out_drop:
1448 d_drop(dentry);
1449 ret = 0;
1450out_put_ctx:
1451 put_nfs_open_context(ctx);
1452 goto out;
1453
1119no_open_dput: 1454no_open_dput:
1120 dput(parent); 1455 dput(parent);
1121no_open: 1456no_open:
1122 return nfs_lookup_revalidate(dentry, nd); 1457 return nfs_lookup_revalidate(dentry, nd);
1123} 1458}
1124#endif /* CONFIG_NFSV4 */
1125 1459
1126static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) 1460static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
1461 struct nameidata *nd)
1127{ 1462{
1128 struct dentry *parent = desc->file->f_path.dentry; 1463 struct nfs_open_context *ctx = NULL;
1129 struct inode *dir = parent->d_inode; 1464 struct iattr attr;
1130 struct nfs_entry *entry = desc->entry; 1465 int error;
1131 struct dentry *dentry, *alias; 1466 int open_flags = 0;
1132 struct qstr name = {
1133 .name = entry->name,
1134 .len = entry->len,
1135 };
1136 struct inode *inode;
1137 unsigned long verf = nfs_save_change_attribute(dir);
1138 1467
1139 switch (name.len) { 1468 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1140 case 2: 1469 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1141 if (name.name[0] == '.' && name.name[1] == '.')
1142 return dget_parent(parent);
1143 break;
1144 case 1:
1145 if (name.name[0] == '.')
1146 return dget(parent);
1147 }
1148 1470
1149 spin_lock(&dir->i_lock); 1471 attr.ia_mode = mode;
1150 if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { 1472 attr.ia_valid = ATTR_MODE;
1151 spin_unlock(&dir->i_lock);
1152 return NULL;
1153 }
1154 spin_unlock(&dir->i_lock);
1155 1473
1156 name.hash = full_name_hash(name.name, name.len); 1474 if ((nd->flags & LOOKUP_CREATE) != 0) {
1157 dentry = d_lookup(parent, &name); 1475 open_flags = nd->intent.open.flags;
1158 if (dentry != NULL) {
1159 /* Is this a positive dentry that matches the readdir info? */
1160 if (dentry->d_inode != NULL &&
1161 (NFS_FILEID(dentry->d_inode) == entry->ino ||
1162 d_mountpoint(dentry))) {
1163 if (!desc->plus || entry->fh->size == 0)
1164 return dentry;
1165 if (nfs_compare_fh(NFS_FH(dentry->d_inode),
1166 entry->fh) == 0)
1167 goto out_renew;
1168 }
1169 /* No, so d_drop to allow one to be created */
1170 d_drop(dentry);
1171 dput(dentry);
1172 }
1173 if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
1174 return NULL;
1175 if (name.len > NFS_SERVER(dir)->namelen)
1176 return NULL;
1177 /* Note: caller is already holding the dir->i_mutex! */
1178 dentry = d_alloc(parent, &name);
1179 if (dentry == NULL)
1180 return NULL;
1181 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
1182 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
1183 if (IS_ERR(inode)) {
1184 dput(dentry);
1185 return NULL;
1186 }
1187 1476
1188 alias = d_materialise_unique(dentry, inode); 1477 ctx = nameidata_to_nfs_open_context(dentry, nd);
1189 if (alias != NULL) { 1478 error = PTR_ERR(ctx);
1190 dput(dentry); 1479 if (IS_ERR(ctx))
1191 if (IS_ERR(alias)) 1480 goto out_err_drop;
1192 return NULL;
1193 dentry = alias;
1194 } 1481 }
1195 1482
1196out_renew: 1483 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
1197 nfs_set_verifier(dentry, verf); 1484 if (error != 0)
1198 return dentry; 1485 goto out_put_ctx;
1486 if (ctx != NULL) {
1487 error = nfs_intent_set_file(nd, ctx);
1488 if (error < 0)
1489 goto out_err;
1490 }
1491 return 0;
1492out_put_ctx:
1493 if (ctx != NULL)
1494 put_nfs_open_context(ctx);
1495out_err_drop:
1496 d_drop(dentry);
1497out_err:
1498 return error;
1199} 1499}
1200 1500
1501#endif /* CONFIG_NFSV4 */
1502
1201/* 1503/*
1202 * Code common to create, mkdir, and mknod. 1504 * Code common to create, mkdir, and mknod.
1203 */ 1505 */
@@ -1251,7 +1553,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1251{ 1553{
1252 struct iattr attr; 1554 struct iattr attr;
1253 int error; 1555 int error;
1254 int open_flags = 0;
1255 1556
1256 dfprintk(VFS, "NFS: create(%s/%ld), %s\n", 1557 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1257 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1558 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1259,10 +1560,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1259 attr.ia_mode = mode; 1560 attr.ia_mode = mode;
1260 attr.ia_valid = ATTR_MODE; 1561 attr.ia_valid = ATTR_MODE;
1261 1562
1262 if ((nd->flags & LOOKUP_CREATE) != 0) 1563 error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL);
1263 open_flags = nd->intent.open.flags;
1264
1265 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
1266 if (error != 0) 1564 if (error != 0)
1267 goto out_err; 1565 goto out_err;
1268 return 0; 1566 return 0;
@@ -1344,76 +1642,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1344 return error; 1642 return error;
1345} 1643}
1346 1644
1347static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
1348{
1349 static unsigned int sillycounter;
1350 const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
1351 const int countersize = sizeof(sillycounter)*2;
1352 const int slen = sizeof(".nfs")+fileidsize+countersize-1;
1353 char silly[slen+1];
1354 struct qstr qsilly;
1355 struct dentry *sdentry;
1356 int error = -EIO;
1357
1358 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
1359 dentry->d_parent->d_name.name, dentry->d_name.name,
1360 atomic_read(&dentry->d_count));
1361 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
1362
1363 /*
1364 * We don't allow a dentry to be silly-renamed twice.
1365 */
1366 error = -EBUSY;
1367 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1368 goto out;
1369
1370 sprintf(silly, ".nfs%*.*Lx",
1371 fileidsize, fileidsize,
1372 (unsigned long long)NFS_FILEID(dentry->d_inode));
1373
1374 /* Return delegation in anticipation of the rename */
1375 nfs_inode_return_delegation(dentry->d_inode);
1376
1377 sdentry = NULL;
1378 do {
1379 char *suffix = silly + slen - countersize;
1380
1381 dput(sdentry);
1382 sillycounter++;
1383 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
1384
1385 dfprintk(VFS, "NFS: trying to rename %s to %s\n",
1386 dentry->d_name.name, silly);
1387
1388 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
1389 /*
1390 * N.B. Better to return EBUSY here ... it could be
1391 * dangerous to delete the file while it's in use.
1392 */
1393 if (IS_ERR(sdentry))
1394 goto out;
1395 } while(sdentry->d_inode != NULL); /* need negative lookup */
1396
1397 qsilly.name = silly;
1398 qsilly.len = strlen(silly);
1399 if (dentry->d_inode) {
1400 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1401 dir, &qsilly);
1402 nfs_mark_for_revalidate(dentry->d_inode);
1403 } else
1404 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1405 dir, &qsilly);
1406 if (!error) {
1407 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1408 d_move(dentry, sdentry);
1409 error = nfs_async_unlink(dir, dentry);
1410 /* If we return 0 we don't unlink */
1411 }
1412 dput(sdentry);
1413out:
1414 return error;
1415}
1416
1417/* 1645/*
1418 * Remove a file after making sure there are no pending writes, 1646 * Remove a file after making sure there are no pending writes,
1419 * and after checking that the file has only one user. 1647 * and after checking that the file has only one user.
@@ -1573,7 +1801,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1573 d_drop(dentry); 1801 d_drop(dentry);
1574 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); 1802 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
1575 if (error == 0) { 1803 if (error == 0) {
1576 atomic_inc(&inode->i_count); 1804 ihold(inode);
1577 d_add(dentry, inode); 1805 d_add(dentry, inode);
1578 } 1806 }
1579 return error; 1807 return error;
@@ -1652,16 +1880,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1652 } 1880 }
1653 } 1881 }
1654 1882
1655 /*
1656 * ... prune child dentries and writebacks if needed.
1657 */
1658 if (atomic_read(&old_dentry->d_count) > 1) {
1659 if (S_ISREG(old_inode->i_mode))
1660 nfs_wb_all(old_inode);
1661 shrink_dcache_parent(old_dentry);
1662 }
1663 nfs_inode_return_delegation(old_inode); 1883 nfs_inode_return_delegation(old_inode);
1664
1665 if (new_inode != NULL) 1884 if (new_inode != NULL)
1666 nfs_inode_return_delegation(new_inode); 1885 nfs_inode_return_delegation(new_inode);
1667 1886
@@ -1713,14 +1932,14 @@ static void nfs_access_free_list(struct list_head *head)
1713int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 1932int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
1714{ 1933{
1715 LIST_HEAD(head); 1934 LIST_HEAD(head);
1716 struct nfs_inode *nfsi; 1935 struct nfs_inode *nfsi, *next;
1717 struct nfs_access_entry *cache; 1936 struct nfs_access_entry *cache;
1718 1937
1719 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 1938 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
1720 return (nr_to_scan == 0) ? 0 : -1; 1939 return (nr_to_scan == 0) ? 0 : -1;
1721 1940
1722 spin_lock(&nfs_access_lru_lock); 1941 spin_lock(&nfs_access_lru_lock);
1723 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { 1942 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
1724 struct inode *inode; 1943 struct inode *inode;
1725 1944
1726 if (nr_to_scan-- == 0) 1945 if (nr_to_scan-- == 0)
@@ -1953,7 +2172,7 @@ int nfs_permission(struct inode *inode, int mask)
1953 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) 2172 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
1954 goto out; 2173 goto out;
1955 /* Is this sys_access() ? */ 2174 /* Is this sys_access() ? */
1956 if (mask & MAY_ACCESS) 2175 if (mask & (MAY_ACCESS | MAY_CHDIR))
1957 goto force_lookup; 2176 goto force_lookup;
1958 2177
1959 switch (inode->i_mode & S_IFMT) { 2178 switch (inode->i_mode & S_IFMT) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index ad4cd31d6050..84d3c8b90206 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -69,6 +69,7 @@ struct nfs_direct_req {
69 69
70 /* I/O parameters */ 70 /* I/O parameters */
71 struct nfs_open_context *ctx; /* file open context info */ 71 struct nfs_open_context *ctx; /* file open context info */
72 struct nfs_lock_context *l_ctx; /* Lock context info */
72 struct kiocb * iocb; /* controlling i/o request */ 73 struct kiocb * iocb; /* controlling i/o request */
73 struct inode * inode; /* target file of i/o */ 74 struct inode * inode; /* target file of i/o */
74 75
@@ -160,6 +161,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
160 INIT_LIST_HEAD(&dreq->rewrite_list); 161 INIT_LIST_HEAD(&dreq->rewrite_list);
161 dreq->iocb = NULL; 162 dreq->iocb = NULL;
162 dreq->ctx = NULL; 163 dreq->ctx = NULL;
164 dreq->l_ctx = NULL;
163 spin_lock_init(&dreq->lock); 165 spin_lock_init(&dreq->lock);
164 atomic_set(&dreq->io_count, 0); 166 atomic_set(&dreq->io_count, 0);
165 dreq->count = 0; 167 dreq->count = 0;
@@ -173,6 +175,8 @@ static void nfs_direct_req_free(struct kref *kref)
173{ 175{
174 struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); 176 struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
175 177
178 if (dreq->l_ctx != NULL)
179 nfs_put_lock_context(dreq->l_ctx);
176 if (dreq->ctx != NULL) 180 if (dreq->ctx != NULL)
177 put_nfs_open_context(dreq->ctx); 181 put_nfs_open_context(dreq->ctx);
178 kmem_cache_free(nfs_direct_cachep, dreq); 182 kmem_cache_free(nfs_direct_cachep, dreq);
@@ -336,6 +340,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
336 data->cred = msg.rpc_cred; 340 data->cred = msg.rpc_cred;
337 data->args.fh = NFS_FH(inode); 341 data->args.fh = NFS_FH(inode);
338 data->args.context = ctx; 342 data->args.context = ctx;
343 data->args.lock_context = dreq->l_ctx;
339 data->args.offset = pos; 344 data->args.offset = pos;
340 data->args.pgbase = pgbase; 345 data->args.pgbase = pgbase;
341 data->args.pages = data->pagevec; 346 data->args.pages = data->pagevec;
@@ -416,24 +421,28 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
416static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, 421static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
417 unsigned long nr_segs, loff_t pos) 422 unsigned long nr_segs, loff_t pos)
418{ 423{
419 ssize_t result = 0; 424 ssize_t result = -ENOMEM;
420 struct inode *inode = iocb->ki_filp->f_mapping->host; 425 struct inode *inode = iocb->ki_filp->f_mapping->host;
421 struct nfs_direct_req *dreq; 426 struct nfs_direct_req *dreq;
422 427
423 dreq = nfs_direct_req_alloc(); 428 dreq = nfs_direct_req_alloc();
424 if (!dreq) 429 if (dreq == NULL)
425 return -ENOMEM; 430 goto out;
426 431
427 dreq->inode = inode; 432 dreq->inode = inode;
428 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 433 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
434 dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
435 if (dreq->l_ctx == NULL)
436 goto out_release;
429 if (!is_sync_kiocb(iocb)) 437 if (!is_sync_kiocb(iocb))
430 dreq->iocb = iocb; 438 dreq->iocb = iocb;
431 439
432 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); 440 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
433 if (!result) 441 if (!result)
434 result = nfs_direct_wait(dreq); 442 result = nfs_direct_wait(dreq);
443out_release:
435 nfs_direct_req_release(dreq); 444 nfs_direct_req_release(dreq);
436 445out:
437 return result; 446 return result;
438} 447}
439 448
@@ -574,6 +583,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
574 data->args.offset = 0; 583 data->args.offset = 0;
575 data->args.count = 0; 584 data->args.count = 0;
576 data->args.context = dreq->ctx; 585 data->args.context = dreq->ctx;
586 data->args.lock_context = dreq->l_ctx;
577 data->res.count = 0; 587 data->res.count = 0;
578 data->res.fattr = &data->fattr; 588 data->res.fattr = &data->fattr;
579 data->res.verf = &data->verf; 589 data->res.verf = &data->verf;
@@ -761,6 +771,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
761 data->cred = msg.rpc_cred; 771 data->cred = msg.rpc_cred;
762 data->args.fh = NFS_FH(inode); 772 data->args.fh = NFS_FH(inode);
763 data->args.context = ctx; 773 data->args.context = ctx;
774 data->args.lock_context = dreq->l_ctx;
764 data->args.offset = pos; 775 data->args.offset = pos;
765 data->args.pgbase = pgbase; 776 data->args.pgbase = pgbase;
766 data->args.pages = data->pagevec; 777 data->args.pages = data->pagevec;
@@ -845,7 +856,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
845 unsigned long nr_segs, loff_t pos, 856 unsigned long nr_segs, loff_t pos,
846 size_t count) 857 size_t count)
847{ 858{
848 ssize_t result = 0; 859 ssize_t result = -ENOMEM;
849 struct inode *inode = iocb->ki_filp->f_mapping->host; 860 struct inode *inode = iocb->ki_filp->f_mapping->host;
850 struct nfs_direct_req *dreq; 861 struct nfs_direct_req *dreq;
851 size_t wsize = NFS_SERVER(inode)->wsize; 862 size_t wsize = NFS_SERVER(inode)->wsize;
@@ -853,7 +864,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
853 864
854 dreq = nfs_direct_req_alloc(); 865 dreq = nfs_direct_req_alloc();
855 if (!dreq) 866 if (!dreq)
856 return -ENOMEM; 867 goto out;
857 nfs_alloc_commit_data(dreq); 868 nfs_alloc_commit_data(dreq);
858 869
859 if (dreq->commit_data == NULL || count < wsize) 870 if (dreq->commit_data == NULL || count < wsize)
@@ -861,14 +872,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
861 872
862 dreq->inode = inode; 873 dreq->inode = inode;
863 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 874 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
875 dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
876 if (dreq->l_ctx == NULL)
877 goto out_release;
864 if (!is_sync_kiocb(iocb)) 878 if (!is_sync_kiocb(iocb))
865 dreq->iocb = iocb; 879 dreq->iocb = iocb;
866 880
867 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); 881 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
868 if (!result) 882 if (!result)
869 result = nfs_direct_wait(dreq); 883 result = nfs_direct_wait(dreq);
884out_release:
870 nfs_direct_req_release(dreq); 885 nfs_direct_req_release(dreq);
871 886out:
872 return result; 887 return result;
873} 888}
874 889
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 76fd235d0024..a6e711ad130f 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -6,6 +6,29 @@
6 * Resolves DNS hostnames into valid ip addresses 6 * Resolves DNS hostnames into valid ip addresses
7 */ 7 */
8 8
9#ifdef CONFIG_NFS_USE_KERNEL_DNS
10
11#include <linux/sunrpc/clnt.h>
12#include <linux/dns_resolver.h>
13
14ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
15 struct sockaddr *sa, size_t salen)
16{
17 ssize_t ret;
18 char *ip_addr = NULL;
19 int ip_len;
20
21 ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL);
22 if (ip_len > 0)
23 ret = rpc_pton(ip_addr, ip_len, sa, salen);
24 else
25 ret = -ESRCH;
26 kfree(ip_addr);
27 return ret;
28}
29
30#else
31
9#include <linux/hash.h> 32#include <linux/hash.h>
10#include <linux/string.h> 33#include <linux/string.h>
11#include <linux/kmod.h> 34#include <linux/kmod.h>
@@ -144,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
144 return 0; 167 return 0;
145 } 168 }
146 item = container_of(h, struct nfs_dns_ent, h); 169 item = container_of(h, struct nfs_dns_ent, h);
147 ttl = (long)item->h.expiry_time - (long)get_seconds(); 170 ttl = item->h.expiry_time - seconds_since_boot();
148 if (ttl < 0) 171 if (ttl < 0)
149 ttl = 0; 172 ttl = 0;
150 173
@@ -216,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
216 ttl = get_expiry(&buf); 239 ttl = get_expiry(&buf);
217 if (ttl == 0) 240 if (ttl == 0)
218 goto out; 241 goto out;
219 key.h.expiry_time = ttl + get_seconds(); 242 key.h.expiry_time = ttl + seconds_since_boot();
220 243
221 ret = -ENOMEM; 244 ret = -ENOMEM;
222 item = nfs_dns_lookup(cd, &key); 245 item = nfs_dns_lookup(cd, &key);
@@ -278,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd,
278 goto out_err; 301 goto out_err;
279 ret = -ETIMEDOUT; 302 ret = -ETIMEDOUT;
280 if (!test_bit(CACHE_VALID, &(*item)->h.flags) 303 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
281 || (*item)->h.expiry_time < get_seconds() 304 || (*item)->h.expiry_time < seconds_since_boot()
282 || cd->flush_time > (*item)->h.last_refresh) 305 || cd->flush_time > (*item)->h.last_refresh)
283 goto out_put; 306 goto out_put;
284 ret = -ENOENT; 307 ret = -ENOENT;
@@ -346,3 +369,4 @@ void nfs_dns_resolver_destroy(void)
346 nfs_cache_unregister(&nfs_dns_resolve); 369 nfs_cache_unregister(&nfs_dns_resolve);
347} 370}
348 371
372#endif
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
index a3f0938babf7..199bb5543a91 100644
--- a/fs/nfs/dns_resolve.h
+++ b/fs/nfs/dns_resolve.h
@@ -6,8 +6,20 @@
6 6
7#define NFS_DNS_HOSTNAME_MAXLEN (128) 7#define NFS_DNS_HOSTNAME_MAXLEN (128)
8 8
9
10#ifdef CONFIG_NFS_USE_KERNEL_DNS
11static inline int nfs_dns_resolver_init(void)
12{
13 return 0;
14}
15
16static inline void nfs_dns_resolver_destroy(void)
17{}
18#else
9extern int nfs_dns_resolver_init(void); 19extern int nfs_dns_resolver_init(void);
10extern void nfs_dns_resolver_destroy(void); 20extern void nfs_dns_resolver_destroy(void);
21#endif
22
11extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, 23extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
12 struct sockaddr *sa, size_t salen); 24 struct sockaddr *sa, size_t salen);
13 25
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f036153d9f50..e756075637b0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -36,6 +36,7 @@
36#include "internal.h" 36#include "internal.h"
37#include "iostat.h" 37#include "iostat.h"
38#include "fscache.h" 38#include "fscache.h"
39#include "pnfs.h"
39 40
40#define NFSDBG_FACILITY NFSDBG_FILE 41#define NFSDBG_FACILITY NFSDBG_FILE
41 42
@@ -203,37 +204,11 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
203} 204}
204 205
205/* 206/*
206 * Helper for nfs_file_flush() and nfs_file_fsync()
207 *
208 * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
209 * disk, but it retrieves and clears ctx->error after synching, despite
210 * the two being set at the same time in nfs_context_set_write_error().
211 * This is because the former is used to notify the _next_ call to
212 * nfs_file_write() that a write error occured, and hence cause it to
213 * fall back to doing a synchronous write.
214 */
215static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
216{
217 int have_error, status;
218 int ret = 0;
219
220 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
221 status = nfs_wb_all(inode);
222 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
223 if (have_error)
224 ret = xchg(&ctx->error, 0);
225 if (!ret)
226 ret = status;
227 return ret;
228}
229
230/*
231 * Flush all dirty pages, and check for write errors. 207 * Flush all dirty pages, and check for write errors.
232 */ 208 */
233static int 209static int
234nfs_file_flush(struct file *file, fl_owner_t id) 210nfs_file_flush(struct file *file, fl_owner_t id)
235{ 211{
236 struct nfs_open_context *ctx = nfs_file_open_context(file);
237 struct dentry *dentry = file->f_path.dentry; 212 struct dentry *dentry = file->f_path.dentry;
238 struct inode *inode = dentry->d_inode; 213 struct inode *inode = dentry->d_inode;
239 214
@@ -246,7 +221,7 @@ nfs_file_flush(struct file *file, fl_owner_t id)
246 return 0; 221 return 0;
247 222
248 /* Flush writes to the server and return any errors */ 223 /* Flush writes to the server and return any errors */
249 return nfs_do_fsync(ctx, inode); 224 return vfs_fsync(file, 0);
250} 225}
251 226
252static ssize_t 227static ssize_t
@@ -321,6 +296,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
321 * Flush any dirty pages for this process, and check for write errors. 296 * Flush any dirty pages for this process, and check for write errors.
322 * The return status from this call provides a reliable indication of 297 * The return status from this call provides a reliable indication of
323 * whether any write errors occurred for this process. 298 * whether any write errors occurred for this process.
299 *
300 * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
301 * disk, but it retrieves and clears ctx->error after synching, despite
302 * the two being set at the same time in nfs_context_set_write_error().
303 * This is because the former is used to notify the _next_ call to
304 * nfs_file_write() that a write error occured, and hence cause it to
305 * fall back to doing a synchronous write.
324 */ 306 */
325static int 307static int
326nfs_file_fsync(struct file *file, int datasync) 308nfs_file_fsync(struct file *file, int datasync)
@@ -328,13 +310,23 @@ nfs_file_fsync(struct file *file, int datasync)
328 struct dentry *dentry = file->f_path.dentry; 310 struct dentry *dentry = file->f_path.dentry;
329 struct nfs_open_context *ctx = nfs_file_open_context(file); 311 struct nfs_open_context *ctx = nfs_file_open_context(file);
330 struct inode *inode = dentry->d_inode; 312 struct inode *inode = dentry->d_inode;
313 int have_error, status;
314 int ret = 0;
315
331 316
332 dprintk("NFS: fsync file(%s/%s) datasync %d\n", 317 dprintk("NFS: fsync file(%s/%s) datasync %d\n",
333 dentry->d_parent->d_name.name, dentry->d_name.name, 318 dentry->d_parent->d_name.name, dentry->d_name.name,
334 datasync); 319 datasync);
335 320
336 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 321 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
337 return nfs_do_fsync(ctx, inode); 322 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
323 status = nfs_commit_inode(inode, FLUSH_SYNC);
324 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
325 if (have_error)
326 ret = xchg(&ctx->error, 0);
327 if (!ret && status < 0)
328 ret = status;
329 return ret;
338} 330}
339 331
340/* 332/*
@@ -395,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
395 file->f_path.dentry->d_name.name, 387 file->f_path.dentry->d_name.name,
396 mapping->host->i_ino, len, (long long) pos); 388 mapping->host->i_ino, len, (long long) pos);
397 389
390 pnfs_update_layout(mapping->host,
391 nfs_file_open_context(file),
392 IOMODE_RW);
393
398start: 394start:
399 /* 395 /*
400 * Prevent starvation issues if someone is doing a consistency 396 * Prevent starvation issues if someone is doing a consistency
@@ -560,7 +556,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
560 struct file *filp = vma->vm_file; 556 struct file *filp = vma->vm_file;
561 struct dentry *dentry = filp->f_path.dentry; 557 struct dentry *dentry = filp->f_path.dentry;
562 unsigned pagelen; 558 unsigned pagelen;
563 int ret = -EINVAL; 559 int ret = VM_FAULT_NOPAGE;
564 struct address_space *mapping; 560 struct address_space *mapping;
565 561
566 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", 562 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
@@ -576,21 +572,20 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
576 if (mapping != dentry->d_inode->i_mapping) 572 if (mapping != dentry->d_inode->i_mapping)
577 goto out_unlock; 573 goto out_unlock;
578 574
579 ret = 0;
580 pagelen = nfs_page_length(page); 575 pagelen = nfs_page_length(page);
581 if (pagelen == 0) 576 if (pagelen == 0)
582 goto out_unlock; 577 goto out_unlock;
583 578
584 ret = nfs_flush_incompatible(filp, page); 579 ret = VM_FAULT_LOCKED;
585 if (ret != 0) 580 if (nfs_flush_incompatible(filp, page) == 0 &&
586 goto out_unlock; 581 nfs_updatepage(filp, page, 0, pagelen) == 0)
582 goto out;
587 583
588 ret = nfs_updatepage(filp, page, 0, pagelen); 584 ret = VM_FAULT_SIGBUS;
589out_unlock: 585out_unlock:
590 if (!ret)
591 return VM_FAULT_LOCKED;
592 unlock_page(page); 586 unlock_page(page);
593 return VM_FAULT_SIGBUS; 587out:
588 return ret;
594} 589}
595 590
596static const struct vm_operations_struct nfs_file_vm_ops = { 591static const struct vm_operations_struct nfs_file_vm_ops = {
@@ -648,7 +643,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
648 643
649 /* Return error values for O_DSYNC and IS_SYNC() */ 644 /* Return error values for O_DSYNC and IS_SYNC() */
650 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { 645 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
651 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); 646 int err = vfs_fsync(iocb->ki_filp, 0);
652 if (err < 0) 647 if (err < 0)
653 result = err; 648 result = err;
654 } 649 }
@@ -684,7 +679,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
684 written = ret; 679 written = ret;
685 680
686 if (ret >= 0 && nfs_need_sync_write(filp, inode)) { 681 if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
687 int err = nfs_do_fsync(nfs_file_open_context(filp), inode); 682 int err = vfs_fsync(filp, 0);
688 if (err < 0) 683 if (err < 0)
689 ret = err; 684 ret = err;
690 } 685 }
@@ -693,7 +688,8 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
693 return ret; 688 return ret;
694} 689}
695 690
696static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) 691static int
692do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
697{ 693{
698 struct inode *inode = filp->f_mapping->host; 694 struct inode *inode = filp->f_mapping->host;
699 int status = 0; 695 int status = 0;
@@ -708,7 +704,7 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
708 if (nfs_have_delegation(inode, FMODE_READ)) 704 if (nfs_have_delegation(inode, FMODE_READ))
709 goto out_noconflict; 705 goto out_noconflict;
710 706
711 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) 707 if (is_local)
712 goto out_noconflict; 708 goto out_noconflict;
713 709
714 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 710 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
@@ -732,14 +728,11 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
732 default: 728 default:
733 BUG(); 729 BUG();
734 } 730 }
735 if (res < 0)
736 dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager"
737 " - error %d!\n",
738 __func__, res);
739 return res; 731 return res;
740} 732}
741 733
742static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) 734static int
735do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
743{ 736{
744 struct inode *inode = filp->f_mapping->host; 737 struct inode *inode = filp->f_mapping->host;
745 int status; 738 int status;
@@ -754,15 +747,24 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
754 * If we're signalled while cleaning up locks on process exit, we 747 * If we're signalled while cleaning up locks on process exit, we
755 * still need to complete the unlock. 748 * still need to complete the unlock.
756 */ 749 */
757 /* Use local locking if mounted with "-onolock" */ 750 /*
758 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 751 * Use local locking if mounted with "-onolock" or with appropriate
752 * "-olocal_lock="
753 */
754 if (!is_local)
759 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 755 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
760 else 756 else
761 status = do_vfs_lock(filp, fl); 757 status = do_vfs_lock(filp, fl);
762 return status; 758 return status;
763} 759}
764 760
765static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) 761static int
762is_time_granular(struct timespec *ts) {
763 return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
764}
765
766static int
767do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
766{ 768{
767 struct inode *inode = filp->f_mapping->host; 769 struct inode *inode = filp->f_mapping->host;
768 int status; 770 int status;
@@ -775,20 +777,31 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
775 if (status != 0) 777 if (status != 0)
776 goto out; 778 goto out;
777 779
778 /* Use local locking if mounted with "-onolock" */ 780 /*
779 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 781 * Use local locking if mounted with "-onolock" or with appropriate
782 * "-olocal_lock="
783 */
784 if (!is_local)
780 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 785 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
781 else 786 else
782 status = do_vfs_lock(filp, fl); 787 status = do_vfs_lock(filp, fl);
783 if (status < 0) 788 if (status < 0)
784 goto out; 789 goto out;
790
785 /* 791 /*
786 * Make sure we clear the cache whenever we try to get the lock. 792 * Revalidate the cache if the server has time stamps granular
793 * enough to detect subsecond changes. Otherwise, clear the
794 * cache to prevent missing any changes.
795 *
787 * This makes locking act as a cache coherency point. 796 * This makes locking act as a cache coherency point.
788 */ 797 */
789 nfs_sync_mapping(filp->f_mapping); 798 nfs_sync_mapping(filp->f_mapping);
790 if (!nfs_have_delegation(inode, FMODE_READ)) 799 if (!nfs_have_delegation(inode, FMODE_READ)) {
791 nfs_zap_caches(inode); 800 if (is_time_granular(&NFS_SERVER(inode)->time_delta))
801 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
802 else
803 nfs_zap_caches(inode);
804 }
792out: 805out:
793 return status; 806 return status;
794} 807}
@@ -800,6 +813,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
800{ 813{
801 struct inode *inode = filp->f_mapping->host; 814 struct inode *inode = filp->f_mapping->host;
802 int ret = -ENOLCK; 815 int ret = -ENOLCK;
816 int is_local = 0;
803 817
804 dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", 818 dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
805 filp->f_path.dentry->d_parent->d_name.name, 819 filp->f_path.dentry->d_parent->d_name.name,
@@ -813,6 +827,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
813 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) 827 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
814 goto out_err; 828 goto out_err;
815 829
830 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
831 is_local = 1;
832
816 if (NFS_PROTO(inode)->lock_check_bounds != NULL) { 833 if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
817 ret = NFS_PROTO(inode)->lock_check_bounds(fl); 834 ret = NFS_PROTO(inode)->lock_check_bounds(fl);
818 if (ret < 0) 835 if (ret < 0)
@@ -820,11 +837,11 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
820 } 837 }
821 838
822 if (IS_GETLK(cmd)) 839 if (IS_GETLK(cmd))
823 ret = do_getlk(filp, cmd, fl); 840 ret = do_getlk(filp, cmd, fl, is_local);
824 else if (fl->fl_type == F_UNLCK) 841 else if (fl->fl_type == F_UNLCK)
825 ret = do_unlk(filp, cmd, fl); 842 ret = do_unlk(filp, cmd, fl, is_local);
826 else 843 else
827 ret = do_setlk(filp, cmd, fl); 844 ret = do_setlk(filp, cmd, fl, is_local);
828out_err: 845out_err:
829 return ret; 846 return ret;
830} 847}
@@ -834,6 +851,9 @@ out_err:
834 */ 851 */
835static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 852static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
836{ 853{
854 struct inode *inode = filp->f_mapping->host;
855 int is_local = 0;
856
837 dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", 857 dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
838 filp->f_path.dentry->d_parent->d_name.name, 858 filp->f_path.dentry->d_parent->d_name.name,
839 filp->f_path.dentry->d_name.name, 859 filp->f_path.dentry->d_name.name,
@@ -842,14 +862,17 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
842 if (!(fl->fl_flags & FL_FLOCK)) 862 if (!(fl->fl_flags & FL_FLOCK))
843 return -ENOLCK; 863 return -ENOLCK;
844 864
865 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
866 is_local = 1;
867
845 /* We're simulating flock() locks using posix locks on the server */ 868 /* We're simulating flock() locks using posix locks on the server */
846 fl->fl_owner = (fl_owner_t)filp; 869 fl->fl_owner = (fl_owner_t)filp;
847 fl->fl_start = 0; 870 fl->fl_start = 0;
848 fl->fl_end = OFFSET_MAX; 871 fl->fl_end = OFFSET_MAX;
849 872
850 if (fl->fl_type == F_UNLCK) 873 if (fl->fl_type == F_UNLCK)
851 return do_unlk(filp, cmd, fl); 874 return do_unlk(filp, cmd, fl, is_local);
852 return do_setlk(filp, cmd, fl); 875 return do_setlk(filp, cmd, fl, is_local);
853} 876}
854 877
855/* 878/*
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index a70e446e1605..ac7b814ce162 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
54 iput(inode); 54 iput(inode);
55 return -ENOMEM; 55 return -ENOMEM;
56 } 56 }
57 /* Circumvent igrab(): we know the inode is not being freed */ 57 ihold(inode);
58 atomic_inc(&inode->i_count);
59 /* 58 /*
60 * Ensure that this dentry is invisible to d_find_alias(). 59 * Ensure that this dentry is invisible to d_find_alias().
61 * Otherwise, it may be spliced into the tree by 60 * Otherwise, it may be spliced into the tree by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 21a84d45916f..4e2d9b6b1380 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -34,6 +34,212 @@
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36 36
37#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
38
39#include <linux/slab.h>
40#include <linux/cred.h>
41#include <linux/nfs_idmap.h>
42#include <linux/keyctl.h>
43#include <linux/key-type.h>
44#include <linux/rcupdate.h>
45#include <linux/kernel.h>
46#include <linux/err.h>
47
48#include <keys/user-type.h>
49
50#define NFS_UINT_MAXLEN 11
51
52const struct cred *id_resolver_cache;
53
54struct key_type key_type_id_resolver = {
55 .name = "id_resolver",
56 .instantiate = user_instantiate,
57 .match = user_match,
58 .revoke = user_revoke,
59 .destroy = user_destroy,
60 .describe = user_describe,
61 .read = user_read,
62};
63
64int nfs_idmap_init(void)
65{
66 struct cred *cred;
67 struct key *keyring;
68 int ret = 0;
69
70 printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name);
71
72 cred = prepare_kernel_cred(NULL);
73 if (!cred)
74 return -ENOMEM;
75
76 keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred,
77 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
78 KEY_USR_VIEW | KEY_USR_READ,
79 KEY_ALLOC_NOT_IN_QUOTA);
80 if (IS_ERR(keyring)) {
81 ret = PTR_ERR(keyring);
82 goto failed_put_cred;
83 }
84
85 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
86 if (ret < 0)
87 goto failed_put_key;
88
89 ret = register_key_type(&key_type_id_resolver);
90 if (ret < 0)
91 goto failed_put_key;
92
93 cred->thread_keyring = keyring;
94 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
95 id_resolver_cache = cred;
96 return 0;
97
98failed_put_key:
99 key_put(keyring);
100failed_put_cred:
101 put_cred(cred);
102 return ret;
103}
104
105void nfs_idmap_quit(void)
106{
107 key_revoke(id_resolver_cache->thread_keyring);
108 unregister_key_type(&key_type_id_resolver);
109 put_cred(id_resolver_cache);
110}
111
112/*
113 * Assemble the description to pass to request_key()
114 * This function will allocate a new string and update dest to point
115 * at it. The caller is responsible for freeing dest.
116 *
117 * On error 0 is returned. Otherwise, the length of dest is returned.
118 */
119static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
120 const char *type, size_t typelen, char **desc)
121{
122 char *cp;
123 size_t desclen = typelen + namelen + 2;
124
125 *desc = kmalloc(desclen, GFP_KERNEL);
126 if (!*desc)
127 return -ENOMEM;
128
129 cp = *desc;
130 memcpy(cp, type, typelen);
131 cp += typelen;
132 *cp++ = ':';
133
134 memcpy(cp, name, namelen);
135 cp += namelen;
136 *cp = '\0';
137 return desclen;
138}
139
140static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
141 const char *type, void *data, size_t data_size)
142{
143 const struct cred *saved_cred;
144 struct key *rkey;
145 char *desc;
146 struct user_key_payload *payload;
147 ssize_t ret;
148
149 ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
150 if (ret <= 0)
151 goto out;
152
153 saved_cred = override_creds(id_resolver_cache);
154 rkey = request_key(&key_type_id_resolver, desc, "");
155 revert_creds(saved_cred);
156 kfree(desc);
157 if (IS_ERR(rkey)) {
158 ret = PTR_ERR(rkey);
159 goto out;
160 }
161
162 rcu_read_lock();
163 rkey->perm |= KEY_USR_VIEW;
164
165 ret = key_validate(rkey);
166 if (ret < 0)
167 goto out_up;
168
169 payload = rcu_dereference(rkey->payload.data);
170 if (IS_ERR_OR_NULL(payload)) {
171 ret = PTR_ERR(payload);
172 goto out_up;
173 }
174
175 ret = payload->datalen;
176 if (ret > 0 && ret <= data_size)
177 memcpy(data, payload->data, ret);
178 else
179 ret = -EINVAL;
180
181out_up:
182 rcu_read_unlock();
183 key_put(rkey);
184out:
185 return ret;
186}
187
188
189/* ID -> Name */
190static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen)
191{
192 char id_str[NFS_UINT_MAXLEN];
193 int id_len;
194 ssize_t ret;
195
196 id_len = snprintf(id_str, sizeof(id_str), "%u", id);
197 ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen);
198 if (ret < 0)
199 return -EINVAL;
200 return ret;
201}
202
203/* Name -> ID */
204static int nfs_idmap_lookup_id(const char *name, size_t namelen,
205 const char *type, __u32 *id)
206{
207 char id_str[NFS_UINT_MAXLEN];
208 long id_long;
209 ssize_t data_size;
210 int ret = 0;
211
212 data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN);
213 if (data_size <= 0) {
214 ret = -EINVAL;
215 } else {
216 ret = strict_strtol(id_str, 10, &id_long);
217 *id = (__u32)id_long;
218 }
219 return ret;
220}
221
222int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
223{
224 return nfs_idmap_lookup_id(name, namelen, "uid", uid);
225}
226
227int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid)
228{
229 return nfs_idmap_lookup_id(name, namelen, "gid", gid);
230}
231
232int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
233{
234 return nfs_idmap_lookup_name(uid, "user", buf, buflen);
235}
236int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen)
237{
238 return nfs_idmap_lookup_name(gid, "group", buf, buflen);
239}
240
241#else /* CONFIG_NFS_USE_IDMAPPER not defined */
242
37#include <linux/module.h> 243#include <linux/module.h>
38#include <linux/mutex.h> 244#include <linux/mutex.h>
39#include <linux/init.h> 245#include <linux/init.h>
@@ -503,16 +709,17 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele
503 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 709 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
504} 710}
505 711
506int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) 712int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
507{ 713{
508 struct idmap *idmap = clp->cl_idmap; 714 struct idmap *idmap = clp->cl_idmap;
509 715
510 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 716 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
511} 717}
512int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) 718int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
513{ 719{
514 struct idmap *idmap = clp->cl_idmap; 720 struct idmap *idmap = clp->cl_idmap;
515 721
516 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); 722 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
517} 723}
518 724
725#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 099b3518feea..314f57164602 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -48,6 +48,7 @@
48#include "internal.h" 48#include "internal.h"
49#include "fscache.h" 49#include "fscache.h"
50#include "dns_resolve.h" 50#include "dns_resolve.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_VFS 53#define NFSDBG_FACILITY NFSDBG_VFS
53 54
@@ -98,7 +99,7 @@ u64 nfs_compat_user_ino64(u64 fileid)
98 return ino; 99 return ino;
99} 100}
100 101
101void nfs_clear_inode(struct inode *inode) 102static void nfs_clear_inode(struct inode *inode)
102{ 103{
103 /* 104 /*
104 * The following should never happen... 105 * The following should never happen...
@@ -110,6 +111,13 @@ void nfs_clear_inode(struct inode *inode)
110 nfs_fscache_release_inode_cookie(inode); 111 nfs_fscache_release_inode_cookie(inode);
111} 112}
112 113
114void nfs_evict_inode(struct inode *inode)
115{
116 truncate_inode_pages(&inode->i_data, 0);
117 end_writeback(inode);
118 nfs_clear_inode(inode);
119}
120
113/** 121/**
114 * nfs_sync_mapping - helper to flush all mmapped dirty data to disk 122 * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
115 */ 123 */
@@ -227,9 +235,6 @@ nfs_init_locked(struct inode *inode, void *opaque)
227 return 0; 235 return 0;
228} 236}
229 237
230/* Don't use READDIRPLUS on directories that we believe are too large */
231#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
232
233/* 238/*
234 * This is our front-end to iget that looks up inodes by file handle 239 * This is our front-end to iget that looks up inodes by file handle
235 * instead of inode number. 240 * instead of inode number.
@@ -284,8 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
284 } else if (S_ISDIR(inode->i_mode)) { 289 } else if (S_ISDIR(inode->i_mode)) {
285 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 290 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
286 inode->i_fop = &nfs_dir_operations; 291 inode->i_fop = &nfs_dir_operations;
287 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) 292 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
288 && fattr->size <= NFS_LIMIT_READDIRPLUS)
289 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 293 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
290 /* Deal with crossing mountpoints */ 294 /* Deal with crossing mountpoints */
291 if ((fattr->valid & NFS_ATTR_FATTR_FSID) 295 if ((fattr->valid & NFS_ATTR_FATTR_FSID)
@@ -413,10 +417,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
413 return 0; 417 return 0;
414 418
415 /* Write all dirty data */ 419 /* Write all dirty data */
416 if (S_ISREG(inode->i_mode)) { 420 if (S_ISREG(inode->i_mode))
417 filemap_write_and_wait(inode->i_mapping);
418 nfs_wb_all(inode); 421 nfs_wb_all(inode);
419 }
420 422
421 fattr = nfs_alloc_fattr(); 423 fattr = nfs_alloc_fattr();
422 if (fattr == NULL) 424 if (fattr == NULL)
@@ -530,6 +532,68 @@ out:
530 return err; 532 return err;
531} 533}
532 534
535static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
536{
537 atomic_set(&l_ctx->count, 1);
538 l_ctx->lockowner = current->files;
539 l_ctx->pid = current->tgid;
540 INIT_LIST_HEAD(&l_ctx->list);
541}
542
543static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
544{
545 struct nfs_lock_context *pos;
546
547 list_for_each_entry(pos, &ctx->lock_context.list, list) {
548 if (pos->lockowner != current->files)
549 continue;
550 if (pos->pid != current->tgid)
551 continue;
552 atomic_inc(&pos->count);
553 return pos;
554 }
555 return NULL;
556}
557
558struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
559{
560 struct nfs_lock_context *res, *new = NULL;
561 struct inode *inode = ctx->path.dentry->d_inode;
562
563 spin_lock(&inode->i_lock);
564 res = __nfs_find_lock_context(ctx);
565 if (res == NULL) {
566 spin_unlock(&inode->i_lock);
567 new = kmalloc(sizeof(*new), GFP_KERNEL);
568 if (new == NULL)
569 return NULL;
570 nfs_init_lock_context(new);
571 spin_lock(&inode->i_lock);
572 res = __nfs_find_lock_context(ctx);
573 if (res == NULL) {
574 list_add_tail(&new->list, &ctx->lock_context.list);
575 new->open_context = ctx;
576 res = new;
577 new = NULL;
578 }
579 }
580 spin_unlock(&inode->i_lock);
581 kfree(new);
582 return res;
583}
584
585void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
586{
587 struct nfs_open_context *ctx = l_ctx->open_context;
588 struct inode *inode = ctx->path.dentry->d_inode;
589
590 if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
591 return;
592 list_del(&l_ctx->list);
593 spin_unlock(&inode->i_lock);
594 kfree(l_ctx);
595}
596
533/** 597/**
534 * nfs_close_context - Common close_context() routine NFSv2/v3 598 * nfs_close_context - Common close_context() routine NFSv2/v3
535 * @ctx: pointer to context 599 * @ctx: pointer to context
@@ -556,7 +620,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
556 nfs_revalidate_inode(server, inode); 620 nfs_revalidate_inode(server, inode);
557} 621}
558 622
559static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) 623struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode)
560{ 624{
561 struct nfs_open_context *ctx; 625 struct nfs_open_context *ctx;
562 626
@@ -566,11 +630,13 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
566 path_get(&ctx->path); 630 path_get(&ctx->path);
567 ctx->cred = get_rpccred(cred); 631 ctx->cred = get_rpccred(cred);
568 ctx->state = NULL; 632 ctx->state = NULL;
569 ctx->lockowner = current->files; 633 ctx->mode = f_mode;
570 ctx->flags = 0; 634 ctx->flags = 0;
571 ctx->error = 0; 635 ctx->error = 0;
572 ctx->dir_cookie = 0; 636 ctx->dir_cookie = 0;
573 atomic_set(&ctx->count, 1); 637 nfs_init_lock_context(&ctx->lock_context);
638 ctx->lock_context.open_context = ctx;
639 INIT_LIST_HEAD(&ctx->list);
574 } 640 }
575 return ctx; 641 return ctx;
576} 642}
@@ -578,7 +644,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
578struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) 644struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
579{ 645{
580 if (ctx != NULL) 646 if (ctx != NULL)
581 atomic_inc(&ctx->count); 647 atomic_inc(&ctx->lock_context.count);
582 return ctx; 648 return ctx;
583} 649}
584 650
@@ -586,11 +652,15 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
586{ 652{
587 struct inode *inode = ctx->path.dentry->d_inode; 653 struct inode *inode = ctx->path.dentry->d_inode;
588 654
589 if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) 655 if (!list_empty(&ctx->list)) {
656 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
657 return;
658 list_del(&ctx->list);
659 spin_unlock(&inode->i_lock);
660 } else if (!atomic_dec_and_test(&ctx->lock_context.count))
590 return; 661 return;
591 list_del(&ctx->list); 662 if (inode != NULL)
592 spin_unlock(&inode->i_lock); 663 NFS_PROTO(inode)->close_context(ctx, is_sync);
593 NFS_PROTO(inode)->close_context(ctx, is_sync);
594 if (ctx->cred != NULL) 664 if (ctx->cred != NULL)
595 put_rpccred(ctx->cred); 665 put_rpccred(ctx->cred);
596 path_put(&ctx->path); 666 path_put(&ctx->path);
@@ -606,7 +676,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
606 * Ensure that mmap has a recent RPC credential for use when writing out 676 * Ensure that mmap has a recent RPC credential for use when writing out
607 * shared pages 677 * shared pages
608 */ 678 */
609static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) 679void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
610{ 680{
611 struct inode *inode = filp->f_path.dentry->d_inode; 681 struct inode *inode = filp->f_path.dentry->d_inode;
612 struct nfs_inode *nfsi = NFS_I(inode); 682 struct nfs_inode *nfsi = NFS_I(inode);
@@ -663,11 +733,10 @@ int nfs_open(struct inode *inode, struct file *filp)
663 cred = rpc_lookup_cred(); 733 cred = rpc_lookup_cred();
664 if (IS_ERR(cred)) 734 if (IS_ERR(cred))
665 return PTR_ERR(cred); 735 return PTR_ERR(cred);
666 ctx = alloc_nfs_open_context(&filp->f_path, cred); 736 ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode);
667 put_rpccred(cred); 737 put_rpccred(cred);
668 if (ctx == NULL) 738 if (ctx == NULL)
669 return -ENOMEM; 739 return -ENOMEM;
670 ctx->mode = filp->f_mode;
671 nfs_file_set_open_context(filp, ctx); 740 nfs_file_set_open_context(filp, ctx);
672 put_nfs_open_context(ctx); 741 put_nfs_open_context(ctx);
673 nfs_fscache_set_inode_cookie(inode, filp); 742 nfs_fscache_set_inode_cookie(inode, filp);
@@ -1338,8 +1407,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1338 * to open() calls that passed nfs_atomic_lookup, but failed to call 1407 * to open() calls that passed nfs_atomic_lookup, but failed to call
1339 * nfs_open(). 1408 * nfs_open().
1340 */ 1409 */
1341void nfs4_clear_inode(struct inode *inode) 1410void nfs4_evict_inode(struct inode *inode)
1342{ 1411{
1412 truncate_inode_pages(&inode->i_data, 0);
1413 end_writeback(inode);
1414 pnfs_destroy_layout(NFS_I(inode));
1343 /* If we are holding a delegation, return it! */ 1415 /* If we are holding a delegation, return it! */
1344 nfs_inode_return_delegation_noreclaim(inode); 1416 nfs_inode_return_delegation_noreclaim(inode);
1345 /* First call standard NFS clear_inode() code */ 1417 /* First call standard NFS clear_inode() code */
@@ -1377,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1377 nfsi->delegation = NULL; 1449 nfsi->delegation = NULL;
1378 nfsi->delegation_state = 0; 1450 nfsi->delegation_state = 0;
1379 init_rwsem(&nfsi->rwsem); 1451 init_rwsem(&nfsi->rwsem);
1452 nfsi->layout = NULL;
1380#endif 1453#endif
1381} 1454}
1382 1455
@@ -1424,7 +1497,7 @@ static int nfsiod_start(void)
1424{ 1497{
1425 struct workqueue_struct *wq; 1498 struct workqueue_struct *wq;
1426 dprintk("RPC: creating workqueue nfsiod\n"); 1499 dprintk("RPC: creating workqueue nfsiod\n");
1427 wq = create_singlethread_workqueue("nfsiod"); 1500 wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0);
1428 if (wq == NULL) 1501 if (wq == NULL)
1429 return -ENOMEM; 1502 return -ENOMEM;
1430 nfsiod_workqueue = wq; 1503 nfsiod_workqueue = wq;
@@ -1452,6 +1525,10 @@ static int __init init_nfs_fs(void)
1452{ 1525{
1453 int err; 1526 int err;
1454 1527
1528 err = nfs_idmap_init();
1529 if (err < 0)
1530 goto out9;
1531
1455 err = nfs_dns_resolver_init(); 1532 err = nfs_dns_resolver_init();
1456 if (err < 0) 1533 if (err < 0)
1457 goto out8; 1534 goto out8;
@@ -1516,6 +1593,8 @@ out6:
1516out7: 1593out7:
1517 nfs_dns_resolver_destroy(); 1594 nfs_dns_resolver_destroy();
1518out8: 1595out8:
1596 nfs_idmap_quit();
1597out9:
1519 return err; 1598 return err;
1520} 1599}
1521 1600
@@ -1528,6 +1607,7 @@ static void __exit exit_nfs_fs(void)
1528 nfs_destroy_nfspagecache(); 1607 nfs_destroy_nfspagecache();
1529 nfs_fscache_unregister(); 1608 nfs_fscache_unregister();
1530 nfs_dns_resolver_destroy(); 1609 nfs_dns_resolver_destroy();
1610 nfs_idmap_quit();
1531#ifdef CONFIG_PROC_FS 1611#ifdef CONFIG_PROC_FS
1532 rpc_proc_unregister("nfs"); 1612 rpc_proc_unregister("nfs");
1533#endif 1613#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e70f44b9b3f4..db08ff3ff454 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -63,6 +63,12 @@ struct nfs_clone_mount {
63#define NFS_UNSPEC_PORT (-1) 63#define NFS_UNSPEC_PORT (-1)
64 64
65/* 65/*
66 * Maximum number of pages that readdir can use for creating
67 * a vmapped array of pages.
68 */
69#define NFS_MAX_READDIR_PAGES 8
70
71/*
66 * In-kernel mount arguments 72 * In-kernel mount arguments
67 */ 73 */
68struct nfs_parsed_mount_data { 74struct nfs_parsed_mount_data {
@@ -181,15 +187,15 @@ extern void nfs_destroy_directcache(void);
181/* nfs2xdr.c */ 187/* nfs2xdr.c */
182extern int nfs_stat_to_errno(int); 188extern int nfs_stat_to_errno(int);
183extern struct rpc_procinfo nfs_procedures[]; 189extern struct rpc_procinfo nfs_procedures[];
184extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); 190extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
185 191
186/* nfs3xdr.c */ 192/* nfs3xdr.c */
187extern struct rpc_procinfo nfs3_procedures[]; 193extern struct rpc_procinfo nfs3_procedures[];
188extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); 194extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
189 195
190/* nfs4xdr.c */ 196/* nfs4xdr.c */
191#ifdef CONFIG_NFS_V4 197#ifdef CONFIG_NFS_V4
192extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); 198extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
193#endif 199#endif
194#ifdef CONFIG_NFS_V4_1 200#ifdef CONFIG_NFS_V4_1
195extern const u32 nfs41_maxread_overhead; 201extern const u32 nfs41_maxread_overhead;
@@ -213,9 +219,9 @@ extern struct workqueue_struct *nfsiod_workqueue;
213extern struct inode *nfs_alloc_inode(struct super_block *sb); 219extern struct inode *nfs_alloc_inode(struct super_block *sb);
214extern void nfs_destroy_inode(struct inode *); 220extern void nfs_destroy_inode(struct inode *);
215extern int nfs_write_inode(struct inode *, struct writeback_control *); 221extern int nfs_write_inode(struct inode *, struct writeback_control *);
216extern void nfs_clear_inode(struct inode *); 222extern void nfs_evict_inode(struct inode *);
217#ifdef CONFIG_NFS_V4 223#ifdef CONFIG_NFS_V4
218extern void nfs4_clear_inode(struct inode *); 224extern void nfs4_evict_inode(struct inode *);
219#endif 225#endif
220void nfs_zap_acl_cache(struct inode *inode); 226void nfs_zap_acl_cache(struct inode *inode);
221extern int nfs_wait_bit_killable(void *word); 227extern int nfs_wait_bit_killable(void *word);
@@ -370,10 +376,9 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
370 * Helper for restarting RPC calls in the possible presence of NFSv4.1 376 * Helper for restarting RPC calls in the possible presence of NFSv4.1
371 * sessions. 377 * sessions.
372 */ 378 */
373static inline void nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp) 379static inline int nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp)
374{ 380{
375 if (nfs4_has_session(clp)) 381 if (nfs4_has_session(clp))
376 rpc_restart_call_prepare(task); 382 return rpc_restart_call_prepare(task);
377 else 383 return rpc_restart_call(task);
378 rpc_restart_call(task);
379} 384}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 59047f8d7d72..eceafe74f473 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
153 .rpc_resp = &result, 153 .rpc_resp = &result,
154 }; 154 };
155 struct rpc_create_args args = { 155 struct rpc_create_args args = {
156 .net = &init_net,
156 .protocol = info->protocol, 157 .protocol = info->protocol,
157 .address = info->sap, 158 .address = info->sap,
158 .addrsize = info->salen, 159 .addrsize = info->salen,
@@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
224 .to_retries = 2, 225 .to_retries = 2,
225 }; 226 };
226 struct rpc_create_args args = { 227 struct rpc_create_args args = {
228 .net = &init_net,
227 .protocol = IPPROTO_UDP, 229 .protocol = IPPROTO_UDP,
228 .address = info->sap, 230 .address = info->sap,
229 .addrsize = info->salen, 231 .addrsize = info->salen,
@@ -436,7 +438,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
436 438
437 for (i = 0; i < entries; i++) { 439 for (i = 0; i < entries; i++) {
438 flavors[i] = ntohl(*p++); 440 flavors[i] = ntohl(*p++);
439 dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]); 441 dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]);
440 } 442 }
441 *count = i; 443 *count = i;
442 444
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 81cf14257916..e6bf45710cc7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -233,7 +233,7 @@ nfs_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs
233static int 233static int
234nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 234nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
235{ 235{
236 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 236 struct rpc_auth *auth = req->rq_cred->cr_auth;
237 unsigned int replen; 237 unsigned int replen;
238 u32 offset = (u32)args->offset; 238 u32 offset = (u32)args->offset;
239 u32 count = args->count; 239 u32 count = args->count;
@@ -337,10 +337,10 @@ nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args)
337static int 337static int
338nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) 338nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
339{ 339{
340 p = xdr_encode_fhandle(p, args->fromfh); 340 p = xdr_encode_fhandle(p, args->old_dir);
341 p = xdr_encode_array(p, args->fromname, args->fromlen); 341 p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
342 p = xdr_encode_fhandle(p, args->tofh); 342 p = xdr_encode_fhandle(p, args->new_dir);
343 p = xdr_encode_array(p, args->toname, args->tolen); 343 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
344 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 344 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
345 return 0; 345 return 0;
346} 346}
@@ -393,8 +393,7 @@ nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *arg
393static int 393static int
394nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) 394nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
395{ 395{
396 struct rpc_task *task = req->rq_task; 396 struct rpc_auth *auth = req->rq_cred->cr_auth;
397 struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth;
398 unsigned int replen; 397 unsigned int replen;
399 u32 count = args->count; 398 u32 count = args->count;
400 399
@@ -424,9 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
424 struct page **page; 423 struct page **page;
425 size_t hdrlen; 424 size_t hdrlen;
426 unsigned int pglen, recvd; 425 unsigned int pglen, recvd;
427 u32 len;
428 int status, nr = 0; 426 int status, nr = 0;
429 __be32 *end, *entry, *kaddr;
430 427
431 if ((status = ntohl(*p++))) 428 if ((status = ntohl(*p++)))
432 return nfs_stat_to_errno(status); 429 return nfs_stat_to_errno(status);
@@ -446,80 +443,59 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
446 if (pglen > recvd) 443 if (pglen > recvd)
447 pglen = recvd; 444 pglen = recvd;
448 page = rcvbuf->pages; 445 page = rcvbuf->pages;
449 kaddr = p = kmap_atomic(*page, KM_USER0);
450 end = (__be32 *)((char *)p + pglen);
451 entry = p;
452
453 /* Make sure the packet actually has a value_follows and EOF entry */
454 if ((entry + 1) > end)
455 goto short_pkt;
456
457 for (; *p++; nr++) {
458 if (p + 2 > end)
459 goto short_pkt;
460 p++; /* fileid */
461 len = ntohl(*p++);
462 p += XDR_QUADLEN(len) + 1; /* name plus cookie */
463 if (len > NFS2_MAXNAMLEN) {
464 dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
465 len);
466 goto err_unmap;
467 }
468 if (p + 2 > end)
469 goto short_pkt;
470 entry = p;
471 }
472
473 /*
474 * Apparently some server sends responses that are a valid size, but
475 * contain no entries, and have value_follows==0 and EOF==0. For
476 * those, just set the EOF marker.
477 */
478 if (!nr && entry[1] == 0) {
479 dprintk("NFS: readdir reply truncated!\n");
480 entry[1] = 1;
481 }
482 out:
483 kunmap_atomic(kaddr, KM_USER0);
484 return nr; 446 return nr;
485 short_pkt: 447}
486 /* 448
487 * When we get a short packet there are 2 possibilities. We can 449static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
488 * return an error, or fix up the response to look like a valid 450{
489 * response and return what we have so far. If there are no 451 dprintk("nfs: %s: prematurely hit end of receive buffer. "
490 * entries and the packet was short, then return -EIO. If there 452 "Remaining buffer length is %tu words.\n",
491 * are valid entries in the response, return them and pretend that 453 func, xdr->end - xdr->p);
492 * the call was successful, but incomplete. The caller can retry the
493 * readdir starting at the last cookie.
494 */
495 entry[0] = entry[1] = 0;
496 if (!nr)
497 nr = -errno_NFSERR_IO;
498 goto out;
499err_unmap:
500 nr = -errno_NFSERR_IO;
501 goto out;
502} 454}
503 455
504__be32 * 456__be32 *
505nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 457nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
506{ 458{
507 if (!*p++) { 459 __be32 *p;
508 if (!*p) 460 p = xdr_inline_decode(xdr, 4);
461 if (unlikely(!p))
462 goto out_overflow;
463 if (!ntohl(*p++)) {
464 p = xdr_inline_decode(xdr, 4);
465 if (unlikely(!p))
466 goto out_overflow;
467 if (!ntohl(*p++))
509 return ERR_PTR(-EAGAIN); 468 return ERR_PTR(-EAGAIN);
510 entry->eof = 1; 469 entry->eof = 1;
511 return ERR_PTR(-EBADCOOKIE); 470 return ERR_PTR(-EBADCOOKIE);
512 } 471 }
513 472
473 p = xdr_inline_decode(xdr, 8);
474 if (unlikely(!p))
475 goto out_overflow;
476
514 entry->ino = ntohl(*p++); 477 entry->ino = ntohl(*p++);
515 entry->len = ntohl(*p++); 478 entry->len = ntohl(*p++);
479
480 p = xdr_inline_decode(xdr, entry->len + 4);
481 if (unlikely(!p))
482 goto out_overflow;
516 entry->name = (const char *) p; 483 entry->name = (const char *) p;
517 p += XDR_QUADLEN(entry->len); 484 p += XDR_QUADLEN(entry->len);
518 entry->prev_cookie = entry->cookie; 485 entry->prev_cookie = entry->cookie;
519 entry->cookie = ntohl(*p++); 486 entry->cookie = ntohl(*p++);
520 entry->eof = !p[0] && p[1]; 487
488 p = xdr_inline_peek(xdr, 8);
489 if (p != NULL)
490 entry->eof = !p[0] && p[1];
491 else
492 entry->eof = 0;
521 493
522 return p; 494 return p;
495
496out_overflow:
497 print_overflow_msg(__func__, xdr);
498 return ERR_PTR(-EIO);
523} 499}
524 500
525/* 501/*
@@ -575,7 +551,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
575static int 551static int
576nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) 552nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
577{ 553{
578 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 554 struct rpc_auth *auth = req->rq_cred->cr_auth;
579 unsigned int replen; 555 unsigned int replen;
580 556
581 p = xdr_encode_fhandle(p, args->fh); 557 p = xdr_encode_fhandle(p, args->fh);
@@ -597,7 +573,6 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
597 struct kvec *iov = rcvbuf->head; 573 struct kvec *iov = rcvbuf->head;
598 size_t hdrlen; 574 size_t hdrlen;
599 u32 len, recvd; 575 u32 len, recvd;
600 char *kaddr;
601 int status; 576 int status;
602 577
603 if ((status = ntohl(*p++))) 578 if ((status = ntohl(*p++)))
@@ -624,10 +599,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
624 return -EIO; 599 return -EIO;
625 } 600 }
626 601
627 /* NULL terminate the string we got */ 602 xdr_terminate_string(rcvbuf, len);
628 kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
629 kaddr[len+rcvbuf->page_base] = '\0';
630 kunmap_atomic(kaddr, KM_USER0);
631 return 0; 603 return 0;
632} 604}
633 605
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index fabb4f2849a1..ce939c062a52 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -313,7 +313,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
313 */ 313 */
314static int 314static int
315nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 315nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
316 int flags, struct nameidata *nd) 316 int flags, struct nfs_open_context *ctx)
317{ 317{
318 struct nfs3_createdata *data; 318 struct nfs3_createdata *data;
319 mode_t mode = sattr->ia_mode; 319 mode_t mode = sattr->ia_mode;
@@ -438,19 +438,38 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
438 return 1; 438 return 1;
439} 439}
440 440
441static void
442nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
443{
444 msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
445}
446
447static int
448nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
449 struct inode *new_dir)
450{
451 struct nfs_renameres *res;
452
453 if (nfs3_async_handle_jukebox(task, old_dir))
454 return 0;
455 res = task->tk_msg.rpc_resp;
456
457 nfs_post_op_update_inode(old_dir, res->old_fattr);
458 nfs_post_op_update_inode(new_dir, res->new_fattr);
459 return 1;
460}
461
441static int 462static int
442nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, 463nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
443 struct inode *new_dir, struct qstr *new_name) 464 struct inode *new_dir, struct qstr *new_name)
444{ 465{
445 struct nfs3_renameargs arg = { 466 struct nfs_renameargs arg = {
446 .fromfh = NFS_FH(old_dir), 467 .old_dir = NFS_FH(old_dir),
447 .fromname = old_name->name, 468 .old_name = old_name,
448 .fromlen = old_name->len, 469 .new_dir = NFS_FH(new_dir),
449 .tofh = NFS_FH(new_dir), 470 .new_name = new_name,
450 .toname = new_name->name,
451 .tolen = new_name->len
452 }; 471 };
453 struct nfs3_renameres res; 472 struct nfs_renameres res;
454 struct rpc_message msg = { 473 struct rpc_message msg = {
455 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], 474 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
456 .rpc_argp = &arg, 475 .rpc_argp = &arg,
@@ -460,17 +479,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
460 479
461 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); 480 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
462 481
463 res.fromattr = nfs_alloc_fattr(); 482 res.old_fattr = nfs_alloc_fattr();
464 res.toattr = nfs_alloc_fattr(); 483 res.new_fattr = nfs_alloc_fattr();
465 if (res.fromattr == NULL || res.toattr == NULL) 484 if (res.old_fattr == NULL || res.new_fattr == NULL)
466 goto out; 485 goto out;
467 486
468 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); 487 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
469 nfs_post_op_update_inode(old_dir, res.fromattr); 488 nfs_post_op_update_inode(old_dir, res.old_fattr);
470 nfs_post_op_update_inode(new_dir, res.toattr); 489 nfs_post_op_update_inode(new_dir, res.new_fattr);
471out: 490out:
472 nfs_free_fattr(res.toattr); 491 nfs_free_fattr(res.old_fattr);
473 nfs_free_fattr(res.fromattr); 492 nfs_free_fattr(res.new_fattr);
474 dprintk("NFS reply rename: %d\n", status); 493 dprintk("NFS reply rename: %d\n", status);
475 return status; 494 return status;
476} 495}
@@ -611,7 +630,7 @@ out:
611 */ 630 */
612static int 631static int
613nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 632nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
614 u64 cookie, struct page *page, unsigned int count, int plus) 633 u64 cookie, struct page **pages, unsigned int count, int plus)
615{ 634{
616 struct inode *dir = dentry->d_inode; 635 struct inode *dir = dentry->d_inode;
617 __be32 *verf = NFS_COOKIEVERF(dir); 636 __be32 *verf = NFS_COOKIEVERF(dir);
@@ -621,7 +640,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
621 .verf = {verf[0], verf[1]}, 640 .verf = {verf[0], verf[1]},
622 .plus = plus, 641 .plus = plus,
623 .count = count, 642 .count = count,
624 .pages = &page 643 .pages = pages
625 }; 644 };
626 struct nfs3_readdirres res = { 645 struct nfs3_readdirres res = {
627 .verf = verf, 646 .verf = verf,
@@ -652,7 +671,8 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
652 671
653 nfs_free_fattr(res.dir_attr); 672 nfs_free_fattr(res.dir_attr);
654out: 673out:
655 dprintk("NFS reply readdir: %d\n", status); 674 dprintk("NFS reply readdir%s: %d\n",
675 plus? "plus" : "", status);
656 return status; 676 return status;
657} 677}
658 678
@@ -722,7 +742,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
722 dprintk("NFS call fsstat\n"); 742 dprintk("NFS call fsstat\n");
723 nfs_fattr_init(stat->fattr); 743 nfs_fattr_init(stat->fattr);
724 status = rpc_call_sync(server->client, &msg, 0); 744 status = rpc_call_sync(server->client, &msg, 0);
725 dprintk("NFS reply statfs: %d\n", status); 745 dprintk("NFS reply fsstat: %d\n", status);
726 return status; 746 return status;
727} 747}
728 748
@@ -844,6 +864,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
844 .unlink_setup = nfs3_proc_unlink_setup, 864 .unlink_setup = nfs3_proc_unlink_setup,
845 .unlink_done = nfs3_proc_unlink_done, 865 .unlink_done = nfs3_proc_unlink_done,
846 .rename = nfs3_proc_rename, 866 .rename = nfs3_proc_rename,
867 .rename_setup = nfs3_proc_rename_setup,
868 .rename_done = nfs3_proc_rename_done,
847 .link = nfs3_proc_link, 869 .link = nfs3_proc_link,
848 .symlink = nfs3_proc_symlink, 870 .symlink = nfs3_proc_symlink,
849 .mkdir = nfs3_proc_mkdir, 871 .mkdir = nfs3_proc_mkdir,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 75dcfc7da365..d9a5e832c257 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -100,6 +100,13 @@ static const umode_t nfs_type2fmt[] = {
100 [NF3FIFO] = S_IFIFO, 100 [NF3FIFO] = S_IFIFO,
101}; 101};
102 102
103static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
104{
105 dprintk("nfs: %s: prematurely hit end of receive buffer. "
106 "Remaining buffer length is %tu words.\n",
107 func, xdr->end - xdr->p);
108}
109
103/* 110/*
104 * Common NFS XDR functions as inlines 111 * Common NFS XDR functions as inlines
105 */ 112 */
@@ -119,6 +126,29 @@ xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
119 return NULL; 126 return NULL;
120} 127}
121 128
129static inline __be32 *
130xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh)
131{
132 __be32 *p;
133 p = xdr_inline_decode(xdr, 4);
134 if (unlikely(!p))
135 goto out_overflow;
136 fh->size = ntohl(*p++);
137
138 if (fh->size <= NFS3_FHSIZE) {
139 p = xdr_inline_decode(xdr, fh->size);
140 if (unlikely(!p))
141 goto out_overflow;
142 memcpy(fh->data, p, fh->size);
143 return p + XDR_QUADLEN(fh->size);
144 }
145 return NULL;
146
147out_overflow:
148 print_overflow_msg(__func__, xdr);
149 return ERR_PTR(-EIO);
150}
151
122/* 152/*
123 * Encode/decode time. 153 * Encode/decode time.
124 */ 154 */
@@ -241,6 +271,26 @@ xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr)
241} 271}
242 272
243static inline __be32 * 273static inline __be32 *
274xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr)
275{
276 __be32 *p;
277
278 p = xdr_inline_decode(xdr, 4);
279 if (unlikely(!p))
280 goto out_overflow;
281 if (ntohl(*p++)) {
282 p = xdr_inline_decode(xdr, 84);
283 if (unlikely(!p))
284 goto out_overflow;
285 p = xdr_decode_fattr(p, fattr);
286 }
287 return p;
288out_overflow:
289 print_overflow_msg(__func__, xdr);
290 return ERR_PTR(-EIO);
291}
292
293static inline __be32 *
244xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr) 294xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
245{ 295{
246 if (*p++) 296 if (*p++)
@@ -330,7 +380,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
330static int 380static int
331nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 381nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
332{ 382{
333 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 383 struct rpc_auth *auth = req->rq_cred->cr_auth;
334 unsigned int replen; 384 unsigned int replen;
335 u32 count = args->count; 385 u32 count = args->count;
336 386
@@ -442,12 +492,12 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args)
442 * Encode RENAME arguments 492 * Encode RENAME arguments
443 */ 493 */
444static int 494static int
445nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args) 495nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
446{ 496{
447 p = xdr_encode_fhandle(p, args->fromfh); 497 p = xdr_encode_fhandle(p, args->old_dir);
448 p = xdr_encode_array(p, args->fromname, args->fromlen); 498 p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
449 p = xdr_encode_fhandle(p, args->tofh); 499 p = xdr_encode_fhandle(p, args->new_dir);
450 p = xdr_encode_array(p, args->toname, args->tolen); 500 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
451 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 501 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
452 return 0; 502 return 0;
453} 503}
@@ -471,7 +521,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
471static int 521static int
472nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) 522nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
473{ 523{
474 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 524 struct rpc_auth *auth = req->rq_cred->cr_auth;
475 unsigned int replen; 525 unsigned int replen;
476 u32 count = args->count; 526 u32 count = args->count;
477 527
@@ -504,9 +554,8 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
504 struct kvec *iov = rcvbuf->head; 554 struct kvec *iov = rcvbuf->head;
505 struct page **page; 555 struct page **page;
506 size_t hdrlen; 556 size_t hdrlen;
507 u32 len, recvd, pglen; 557 u32 recvd, pglen;
508 int status, nr = 0; 558 int status, nr = 0;
509 __be32 *entry, *end, *kaddr;
510 559
511 status = ntohl(*p++); 560 status = ntohl(*p++);
512 /* Decode post_op_attrs */ 561 /* Decode post_op_attrs */
@@ -536,99 +585,38 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
536 if (pglen > recvd) 585 if (pglen > recvd)
537 pglen = recvd; 586 pglen = recvd;
538 page = rcvbuf->pages; 587 page = rcvbuf->pages;
539 kaddr = p = kmap_atomic(*page, KM_USER0);
540 end = (__be32 *)((char *)p + pglen);
541 entry = p;
542
543 /* Make sure the packet actually has a value_follows and EOF entry */
544 if ((entry + 1) > end)
545 goto short_pkt;
546
547 for (; *p++; nr++) {
548 if (p + 3 > end)
549 goto short_pkt;
550 p += 2; /* inode # */
551 len = ntohl(*p++); /* string length */
552 p += XDR_QUADLEN(len) + 2; /* name + cookie */
553 if (len > NFS3_MAXNAMLEN) {
554 dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
555 len);
556 goto err_unmap;
557 }
558 588
559 if (res->plus) {
560 /* post_op_attr */
561 if (p + 2 > end)
562 goto short_pkt;
563 if (*p++) {
564 p += 21;
565 if (p + 1 > end)
566 goto short_pkt;
567 }
568 /* post_op_fh3 */
569 if (*p++) {
570 if (p + 1 > end)
571 goto short_pkt;
572 len = ntohl(*p++);
573 if (len > NFS3_FHSIZE) {
574 dprintk("NFS: giant filehandle in "
575 "readdir (len 0x%x)!\n", len);
576 goto err_unmap;
577 }
578 p += XDR_QUADLEN(len);
579 }
580 }
581
582 if (p + 2 > end)
583 goto short_pkt;
584 entry = p;
585 }
586
587 /*
588 * Apparently some server sends responses that are a valid size, but
589 * contain no entries, and have value_follows==0 and EOF==0. For
590 * those, just set the EOF marker.
591 */
592 if (!nr && entry[1] == 0) {
593 dprintk("NFS: readdir reply truncated!\n");
594 entry[1] = 1;
595 }
596 out:
597 kunmap_atomic(kaddr, KM_USER0);
598 return nr; 589 return nr;
599 short_pkt:
600 /*
601 * When we get a short packet there are 2 possibilities. We can
602 * return an error, or fix up the response to look like a valid
603 * response and return what we have so far. If there are no
604 * entries and the packet was short, then return -EIO. If there
605 * are valid entries in the response, return them and pretend that
606 * the call was successful, but incomplete. The caller can retry the
607 * readdir starting at the last cookie.
608 */
609 entry[0] = entry[1] = 0;
610 if (!nr)
611 nr = -errno_NFSERR_IO;
612 goto out;
613err_unmap:
614 nr = -errno_NFSERR_IO;
615 goto out;
616} 590}
617 591
618__be32 * 592__be32 *
619nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 593nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
620{ 594{
595 __be32 *p;
621 struct nfs_entry old = *entry; 596 struct nfs_entry old = *entry;
622 597
623 if (!*p++) { 598 p = xdr_inline_decode(xdr, 4);
624 if (!*p) 599 if (unlikely(!p))
600 goto out_overflow;
601 if (!ntohl(*p++)) {
602 p = xdr_inline_decode(xdr, 4);
603 if (unlikely(!p))
604 goto out_overflow;
605 if (!ntohl(*p++))
625 return ERR_PTR(-EAGAIN); 606 return ERR_PTR(-EAGAIN);
626 entry->eof = 1; 607 entry->eof = 1;
627 return ERR_PTR(-EBADCOOKIE); 608 return ERR_PTR(-EBADCOOKIE);
628 } 609 }
629 610
611 p = xdr_inline_decode(xdr, 12);
612 if (unlikely(!p))
613 goto out_overflow;
630 p = xdr_decode_hyper(p, &entry->ino); 614 p = xdr_decode_hyper(p, &entry->ino);
631 entry->len = ntohl(*p++); 615 entry->len = ntohl(*p++);
616
617 p = xdr_inline_decode(xdr, entry->len + 8);
618 if (unlikely(!p))
619 goto out_overflow;
632 entry->name = (const char *) p; 620 entry->name = (const char *) p;
633 p += XDR_QUADLEN(entry->len); 621 p += XDR_QUADLEN(entry->len);
634 entry->prev_cookie = entry->cookie; 622 entry->prev_cookie = entry->cookie;
@@ -636,10 +624,17 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
636 624
637 if (plus) { 625 if (plus) {
638 entry->fattr->valid = 0; 626 entry->fattr->valid = 0;
639 p = xdr_decode_post_op_attr(p, entry->fattr); 627 p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
628 if (IS_ERR(p))
629 goto out_overflow_exit;
640 /* In fact, a post_op_fh3: */ 630 /* In fact, a post_op_fh3: */
631 p = xdr_inline_decode(xdr, 4);
632 if (unlikely(!p))
633 goto out_overflow;
641 if (*p++) { 634 if (*p++) {
642 p = xdr_decode_fhandle(p, entry->fh); 635 p = xdr_decode_fhandle_stream(xdr, entry->fh);
636 if (IS_ERR(p))
637 goto out_overflow_exit;
643 /* Ugh -- server reply was truncated */ 638 /* Ugh -- server reply was truncated */
644 if (p == NULL) { 639 if (p == NULL) {
645 dprintk("NFS: FH truncated\n"); 640 dprintk("NFS: FH truncated\n");
@@ -650,8 +645,18 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
650 memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); 645 memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
651 } 646 }
652 647
653 entry->eof = !p[0] && p[1]; 648 p = xdr_inline_peek(xdr, 8);
649 if (p != NULL)
650 entry->eof = !p[0] && p[1];
651 else
652 entry->eof = 0;
653
654 return p; 654 return p;
655
656out_overflow:
657 print_overflow_msg(__func__, xdr);
658out_overflow_exit:
659 return ERR_PTR(-EIO);
655} 660}
656 661
657/* 662/*
@@ -675,7 +680,7 @@ static int
675nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, 680nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
676 struct nfs3_getaclargs *args) 681 struct nfs3_getaclargs *args)
677{ 682{
678 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 683 struct rpc_auth *auth = req->rq_cred->cr_auth;
679 unsigned int replen; 684 unsigned int replen;
680 685
681 p = xdr_encode_fhandle(p, args->fh); 686 p = xdr_encode_fhandle(p, args->fh);
@@ -802,7 +807,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
802static int 807static int
803nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) 808nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
804{ 809{
805 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 810 struct rpc_auth *auth = req->rq_cred->cr_auth;
806 unsigned int replen; 811 unsigned int replen;
807 812
808 p = xdr_encode_fhandle(p, args->fh); 813 p = xdr_encode_fhandle(p, args->fh);
@@ -824,7 +829,6 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
824 struct kvec *iov = rcvbuf->head; 829 struct kvec *iov = rcvbuf->head;
825 size_t hdrlen; 830 size_t hdrlen;
826 u32 len, recvd; 831 u32 len, recvd;
827 char *kaddr;
828 int status; 832 int status;
829 833
830 status = ntohl(*p++); 834 status = ntohl(*p++);
@@ -857,10 +861,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
857 return -EIO; 861 return -EIO;
858 } 862 }
859 863
860 /* NULL terminate the string we got */ 864 xdr_terminate_string(rcvbuf, len);
861 kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0);
862 kaddr[len+rcvbuf->page_base] = '\0';
863 kunmap_atomic(kaddr, KM_USER0);
864 return 0; 865 return 0;
865} 866}
866 867
@@ -970,14 +971,14 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
970 * Decode RENAME reply 971 * Decode RENAME reply
971 */ 972 */
972static int 973static int
973nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res) 974nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
974{ 975{
975 int status; 976 int status;
976 977
977 if ((status = ntohl(*p++)) != 0) 978 if ((status = ntohl(*p++)) != 0)
978 status = nfs_stat_to_errno(status); 979 status = nfs_stat_to_errno(status);
979 p = xdr_decode_wcc_data(p, res->fromattr); 980 p = xdr_decode_wcc_data(p, res->old_fattr);
980 p = xdr_decode_wcc_data(p, res->toattr); 981 p = xdr_decode_wcc_data(p, res->new_fattr);
981 return status; 982 return status;
982} 983}
983 984
@@ -1043,8 +1044,9 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
1043 res->wtmult = ntohl(*p++); 1044 res->wtmult = ntohl(*p++);
1044 res->dtpref = ntohl(*p++); 1045 res->dtpref = ntohl(*p++);
1045 p = xdr_decode_hyper(p, &res->maxfilesize); 1046 p = xdr_decode_hyper(p, &res->maxfilesize);
1047 p = xdr_decode_time3(p, &res->time_delta);
1046 1048
1047 /* ignore time_delta and properties */ 1049 /* ignore properties */
1048 res->lease_time = 0; 1050 res->lease_time = 0;
1049 return 0; 1051 return 0;
1050} 1052}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c538c6106e16..9fa496387fdf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -45,10 +45,29 @@ enum nfs4_client_state {
45 NFS4CLNT_RECLAIM_NOGRACE, 45 NFS4CLNT_RECLAIM_NOGRACE,
46 NFS4CLNT_DELEGRETURN, 46 NFS4CLNT_DELEGRETURN,
47 NFS4CLNT_SESSION_RESET, 47 NFS4CLNT_SESSION_RESET,
48 NFS4CLNT_SESSION_DRAINING,
49 NFS4CLNT_RECALL_SLOT, 48 NFS4CLNT_RECALL_SLOT,
50}; 49};
51 50
51enum nfs4_session_state {
52 NFS4_SESSION_INITING,
53 NFS4_SESSION_DRAINING,
54};
55
56struct nfs4_minor_version_ops {
57 u32 minor_version;
58
59 int (*call_sync)(struct nfs_server *server,
60 struct rpc_message *msg,
61 struct nfs4_sequence_args *args,
62 struct nfs4_sequence_res *res,
63 int cache_reply);
64 int (*validate_stateid)(struct nfs_delegation *,
65 const nfs4_stateid *);
66 const struct nfs4_state_recovery_ops *reboot_recovery_ops;
67 const struct nfs4_state_recovery_ops *nograce_recovery_ops;
68 const struct nfs4_state_maintenance_ops *state_renewal_ops;
69};
70
52/* 71/*
53 * struct rpc_sequence ensures that RPC calls are sent in the exact 72 * struct rpc_sequence ensures that RPC calls are sent in the exact
54 * order that they appear on the list. 73 * order that they appear on the list.
@@ -89,7 +108,6 @@ struct nfs_unique_id {
89 */ 108 */
90struct nfs4_state_owner { 109struct nfs4_state_owner {
91 struct nfs_unique_id so_owner_id; 110 struct nfs_unique_id so_owner_id;
92 struct nfs_client *so_client;
93 struct nfs_server *so_server; 111 struct nfs_server *so_server;
94 struct rb_node so_client_node; 112 struct rb_node so_client_node;
95 113
@@ -99,7 +117,6 @@ struct nfs4_state_owner {
99 atomic_t so_count; 117 atomic_t so_count;
100 unsigned long so_flags; 118 unsigned long so_flags;
101 struct list_head so_states; 119 struct list_head so_states;
102 struct list_head so_delegations;
103 struct nfs_seqid_counter so_seqid; 120 struct nfs_seqid_counter so_seqid;
104 struct rpc_sequence so_sequence; 121 struct rpc_sequence so_sequence;
105}; 122};
@@ -125,10 +142,20 @@ enum {
125 * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) 142 * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
126 */ 143 */
127 144
145struct nfs4_lock_owner {
146 unsigned int lo_type;
147#define NFS4_ANY_LOCK_TYPE (0U)
148#define NFS4_FLOCK_LOCK_TYPE (1U << 0)
149#define NFS4_POSIX_LOCK_TYPE (1U << 1)
150 union {
151 fl_owner_t posix_owner;
152 pid_t flock_owner;
153 } lo_u;
154};
155
128struct nfs4_lock_state { 156struct nfs4_lock_state {
129 struct list_head ls_locks; /* Other lock stateids */ 157 struct list_head ls_locks; /* Other lock stateids */
130 struct nfs4_state * ls_state; /* Pointer to open state */ 158 struct nfs4_state * ls_state; /* Pointer to open state */
131 fl_owner_t ls_owner; /* POSIX lock owner */
132#define NFS_LOCK_INITIALIZED 1 159#define NFS_LOCK_INITIALIZED 1
133 int ls_flags; 160 int ls_flags;
134 struct nfs_seqid_counter ls_seqid; 161 struct nfs_seqid_counter ls_seqid;
@@ -136,6 +163,7 @@ struct nfs4_lock_state {
136 struct nfs_unique_id ls_id; 163 struct nfs_unique_id ls_id;
137 nfs4_stateid ls_stateid; 164 nfs4_stateid ls_stateid;
138 atomic_t ls_count; 165 atomic_t ls_count;
166 struct nfs4_lock_owner ls_owner;
139}; 167};
140 168
141/* bits for nfs4_state->flags */ 169/* bits for nfs4_state->flags */
@@ -214,16 +242,18 @@ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
214extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 242extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
215extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 243extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
216extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); 244extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
217extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
218extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
219extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 245extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
220extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 246extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
221 struct nfs4_fs_locations *fs_locations, struct page *page); 247 struct nfs4_fs_locations *fs_locations, struct page *page);
248extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
222 249
223extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[];
224extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[];
225#if defined(CONFIG_NFS_V4_1) 250#if defined(CONFIG_NFS_V4_1)
226extern int nfs4_setup_sequence(struct nfs_client *clp, 251static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
252{
253 return server->nfs_client->cl_session;
254}
255
256extern int nfs4_setup_sequence(const struct nfs_server *server,
227 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 257 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
228 int cache_reply, struct rpc_task *task); 258 int cache_reply, struct rpc_task *task);
229extern void nfs4_destroy_session(struct nfs4_session *session); 259extern void nfs4_destroy_session(struct nfs4_session *session);
@@ -234,7 +264,12 @@ extern int nfs4_init_session(struct nfs_server *server);
234extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 264extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
235 struct nfs_fsinfo *fsinfo); 265 struct nfs_fsinfo *fsinfo);
236#else /* CONFIG_NFS_v4_1 */ 266#else /* CONFIG_NFS_v4_1 */
237static inline int nfs4_setup_sequence(struct nfs_client *clp, 267static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
268{
269 return NULL;
270}
271
272static inline int nfs4_setup_sequence(const struct nfs_server *server,
238 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 273 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
239 int cache_reply, struct rpc_task *task) 274 int cache_reply, struct rpc_task *task)
240{ 275{
@@ -247,7 +282,7 @@ static inline int nfs4_init_session(struct nfs_server *server)
247} 282}
248#endif /* CONFIG_NFS_V4_1 */ 283#endif /* CONFIG_NFS_V4_1 */
249 284
250extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[]; 285extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
251 286
252extern const u32 nfs4_fattr_bitmap[2]; 287extern const u32 nfs4_fattr_bitmap[2];
253extern const u32 nfs4_statfs_bitmap[2]; 288extern const u32 nfs4_statfs_bitmap[2];
@@ -284,7 +319,7 @@ extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
284extern void nfs41_handle_recall_slot(struct nfs_client *clp); 319extern void nfs41_handle_recall_slot(struct nfs_client *clp);
285extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 320extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
286extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 321extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
287extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 322extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
288 323
289extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); 324extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
290extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 325extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
@@ -296,7 +331,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
296extern const nfs4_stateid zero_stateid; 331extern const nfs4_stateid zero_stateid;
297 332
298/* nfs4xdr.c */ 333/* nfs4xdr.c */
299extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); 334extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
300extern struct rpc_procinfo nfs4_procedures[]; 335extern struct rpc_procinfo nfs4_procedures[];
301 336
302struct nfs4_mount_data; 337struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
new file mode 100644
index 000000000000..2e92f0d8d654
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.c
@@ -0,0 +1,280 @@
1/*
2 * Module for the pnfs nfs4 file layout driver.
3 * Defines all I/O and Policy interface operations, plus code
4 * to register itself with the pNFS client.
5 *
6 * Copyright (c) 2002
7 * The Regents of the University of Michigan
8 * All Rights Reserved
9 *
10 * Dean Hildebrand <dhildebz@umich.edu>
11 *
12 * Permission is granted to use, copy, create derivative works, and
13 * redistribute this software and such derivative works for any purpose,
14 * so long as the name of the University of Michigan is not used in
15 * any advertising or publicity pertaining to the use or distribution
16 * of this software without specific, written prior authorization. If
17 * the above copyright notice or any other identification of the
18 * University of Michigan is included in any copy of any portion of
19 * this software, then the disclaimer below must also be included.
20 *
21 * This software is provided as is, without representation or warranty
22 * of any kind either express or implied, including without limitation
23 * the implied warranties of merchantability, fitness for a particular
24 * purpose, or noninfringement. The Regents of the University of
25 * Michigan shall not be liable for any damages, including special,
26 * indirect, incidental, or consequential damages, with respect to any
27 * claim arising out of or in connection with the use of the software,
28 * even if it has been or is hereafter advised of the possibility of
29 * such damages.
30 */
31
32#include <linux/nfs_fs.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
41MODULE_DESCRIPTION("The NFSv4 file layout driver");
42
43static int
44filelayout_set_layoutdriver(struct nfs_server *nfss)
45{
46 int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client,
47 nfs4_fl_free_deviceid_callback);
48 if (status) {
49 printk(KERN_WARNING "%s: deviceid cache could not be "
50 "initialized\n", __func__);
51 return status;
52 }
53 dprintk("%s: deviceid cache has been initialized successfully\n",
54 __func__);
55 return 0;
56}
57
58/* Clear out the layout by destroying its device list */
59static int
60filelayout_clear_layoutdriver(struct nfs_server *nfss)
61{
62 dprintk("--> %s\n", __func__);
63
64 if (nfss->nfs_client->cl_devid_cache)
65 pnfs_put_deviceid_cache(nfss->nfs_client);
66 return 0;
67}
68
69/*
70 * filelayout_check_layout()
71 *
72 * Make sure layout segment parameters are sane WRT the device.
73 * At this point no generic layer initialization of the lseg has occurred,
74 * and nothing has been added to the layout_hdr cache.
75 *
76 */
77static int
78filelayout_check_layout(struct pnfs_layout_hdr *lo,
79 struct nfs4_filelayout_segment *fl,
80 struct nfs4_layoutget_res *lgr,
81 struct nfs4_deviceid *id)
82{
83 struct nfs4_file_layout_dsaddr *dsaddr;
84 int status = -EINVAL;
85 struct nfs_server *nfss = NFS_SERVER(lo->inode);
86
87 dprintk("--> %s\n", __func__);
88
89 if (fl->pattern_offset > lgr->range.offset) {
90 dprintk("%s pattern_offset %lld to large\n",
91 __func__, fl->pattern_offset);
92 goto out;
93 }
94
95 if (fl->stripe_unit % PAGE_SIZE) {
96 dprintk("%s Stripe unit (%u) not page aligned\n",
97 __func__, fl->stripe_unit);
98 goto out;
99 }
100
101 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
103 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->inode, id);
105 if (dsaddr == NULL)
106 goto out;
107 }
108 fl->dsaddr = dsaddr;
109
110 if (fl->first_stripe_index < 0 ||
111 fl->first_stripe_index >= dsaddr->stripe_count) {
112 dprintk("%s Bad first_stripe_index %d\n",
113 __func__, fl->first_stripe_index);
114 goto out_put;
115 }
116
117 if ((fl->stripe_type == STRIPE_SPARSE &&
118 fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
119 (fl->stripe_type == STRIPE_DENSE &&
120 fl->num_fh != dsaddr->stripe_count)) {
121 dprintk("%s num_fh %u not valid for given packing\n",
122 __func__, fl->num_fh);
123 goto out_put;
124 }
125
126 if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
127 dprintk("%s Stripe unit (%u) not aligned with rsize %u "
128 "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
129 nfss->wsize);
130 }
131
132 status = 0;
133out:
134 dprintk("--> %s returns %d\n", __func__, status);
135 return status;
136out_put:
137 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid);
138 goto out;
139}
140
141static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
142{
143 int i;
144
145 for (i = 0; i < fl->num_fh; i++) {
146 if (!fl->fh_array[i])
147 break;
148 kfree(fl->fh_array[i]);
149 }
150 kfree(fl->fh_array);
151 fl->fh_array = NULL;
152}
153
154static void
155_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
156{
157 filelayout_free_fh_array(fl);
158 kfree(fl);
159}
160
161static int
162filelayout_decode_layout(struct pnfs_layout_hdr *flo,
163 struct nfs4_filelayout_segment *fl,
164 struct nfs4_layoutget_res *lgr,
165 struct nfs4_deviceid *id)
166{
167 uint32_t *p = (uint32_t *)lgr->layout.buf;
168 uint32_t nfl_util;
169 int i;
170
171 dprintk("%s: set_layout_map Begin\n", __func__);
172
173 memcpy(id, p, sizeof(*id));
174 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
175 print_deviceid(id);
176
177 nfl_util = be32_to_cpup(p++);
178 if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
179 fl->commit_through_mds = 1;
180 if (nfl_util & NFL4_UFLG_DENSE)
181 fl->stripe_type = STRIPE_DENSE;
182 else
183 fl->stripe_type = STRIPE_SPARSE;
184 fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
185
186 fl->first_stripe_index = be32_to_cpup(p++);
187 p = xdr_decode_hyper(p, &fl->pattern_offset);
188 fl->num_fh = be32_to_cpup(p++);
189
190 dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n",
191 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
192 fl->pattern_offset);
193
194 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
195 GFP_KERNEL);
196 if (!fl->fh_array)
197 return -ENOMEM;
198
199 for (i = 0; i < fl->num_fh; i++) {
200 /* Do we want to use a mempool here? */
201 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
202 if (!fl->fh_array[i]) {
203 filelayout_free_fh_array(fl);
204 return -ENOMEM;
205 }
206 fl->fh_array[i]->size = be32_to_cpup(p++);
207 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
208 printk(KERN_ERR "Too big fh %d received %d\n",
209 i, fl->fh_array[i]->size);
210 filelayout_free_fh_array(fl);
211 return -EIO;
212 }
213 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
214 p += XDR_QUADLEN(fl->fh_array[i]->size);
215 dprintk("DEBUG: %s: fh len %d\n", __func__,
216 fl->fh_array[i]->size);
217 }
218
219 return 0;
220}
221
222static struct pnfs_layout_segment *
223filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
224 struct nfs4_layoutget_res *lgr)
225{
226 struct nfs4_filelayout_segment *fl;
227 int rc;
228 struct nfs4_deviceid id;
229
230 dprintk("--> %s\n", __func__);
231 fl = kzalloc(sizeof(*fl), GFP_KERNEL);
232 if (!fl)
233 return NULL;
234
235 rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
236 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
237 _filelayout_free_lseg(fl);
238 return NULL;
239 }
240 return &fl->generic_hdr;
241}
242
243static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{
246 struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248
249 dprintk("--> %s\n", __func__);
250 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache,
251 &fl->dsaddr->deviceid);
252 _filelayout_free_lseg(fl);
253}
254
255static struct pnfs_layoutdriver_type filelayout_type = {
256 .id = LAYOUT_NFSV4_1_FILES,
257 .name = "LAYOUT_NFSV4_1_FILES",
258 .owner = THIS_MODULE,
259 .set_layoutdriver = filelayout_set_layoutdriver,
260 .clear_layoutdriver = filelayout_clear_layoutdriver,
261 .alloc_lseg = filelayout_alloc_lseg,
262 .free_lseg = filelayout_free_lseg,
263};
264
265static int __init nfs4filelayout_init(void)
266{
267 printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
268 __func__);
269 return pnfs_register_layoutdriver(&filelayout_type);
270}
271
272static void __exit nfs4filelayout_exit(void)
273{
274 printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
275 __func__);
276 pnfs_unregister_layoutdriver(&filelayout_type);
277}
278
279module_init(nfs4filelayout_init);
280module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
new file mode 100644
index 000000000000..bbf60dd2ab9d
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.h
@@ -0,0 +1,94 @@
1/*
2 * NFSv4 file layout driver data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_NFS4FILELAYOUT_H
31#define FS_NFS_NFS4FILELAYOUT_H
32
33#include "pnfs.h"
34
35/*
36 * Field testing shows we need to support upto 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256
39 * RFC 5661 multipath_list4 structures.
40 */
41#define NFS4_PNFS_MAX_STRIPE_CNT 4096
42#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
43
44enum stripetype4 {
45 STRIPE_SPARSE = 1,
46 STRIPE_DENSE = 2
47};
48
49/* Individual ip address */
50struct nfs4_pnfs_ds {
51 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
52 u32 ds_ip_addr;
53 u32 ds_port;
54 struct nfs_client *ds_clp;
55 atomic_t ds_count;
56};
57
58struct nfs4_file_layout_dsaddr {
59 struct pnfs_deviceid_node deviceid;
60 u32 stripe_count;
61 u8 *stripe_indices;
62 u32 ds_num;
63 struct nfs4_pnfs_ds *ds_list[1];
64};
65
66struct nfs4_filelayout_segment {
67 struct pnfs_layout_segment generic_hdr;
68 u32 stripe_type;
69 u32 commit_through_mds;
70 u32 stripe_unit;
71 u32 first_stripe_index;
72 u64 pattern_offset;
73 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
74 unsigned int num_fh;
75 struct nfs_fh **fh_array;
76};
77
78static inline struct nfs4_filelayout_segment *
79FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
80{
81 return container_of(lseg,
82 struct nfs4_filelayout_segment,
83 generic_hdr);
84}
85
86extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *);
87extern void print_ds(struct nfs4_pnfs_ds *ds);
88extern void print_deviceid(struct nfs4_deviceid *dev_id);
89extern struct nfs4_file_layout_dsaddr *
90nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id);
91struct nfs4_file_layout_dsaddr *
92get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
93
94#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
new file mode 100644
index 000000000000..51fe64ace55a
--- /dev/null
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -0,0 +1,448 @@
1/*
2 * Device operations for the pnfs nfs4 file layout driver.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 * Garth Goodson <Garth.Goodson@netapp.com>
10 *
11 * Permission is granted to use, copy, create derivative works, and
12 * redistribute this software and such derivative works for any purpose,
13 * so long as the name of the University of Michigan is not used in
14 * any advertising or publicity pertaining to the use or distribution
15 * of this software without specific, written prior authorization. If
16 * the above copyright notice or any other identification of the
17 * University of Michigan is included in any copy of any portion of
18 * this software, then the disclaimer below must also be included.
19 *
20 * This software is provided as is, without representation or warranty
21 * of any kind either express or implied, including without limitation
22 * the implied warranties of merchantability, fitness for a particular
23 * purpose, or noninfringement. The Regents of the University of
24 * Michigan shall not be liable for any damages, including special,
25 * indirect, incidental, or consequential damages, with respect to any
26 * claim arising out of or in connection with the use of the software,
27 * even if it has been or is hereafter advised of the possibility of
28 * such damages.
29 */
30
31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39/*
40 * Data server cache
41 *
42 * Data servers can be mapped to different device ids.
43 * nfs4_pnfs_ds reference counting
44 * - set to 1 on allocation
45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache.
47 */
48DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49static LIST_HEAD(nfs4_data_server_cache);
50
51/* Debug routines */
52void
53print_ds(struct nfs4_pnfs_ds *ds)
54{
55 if (ds == NULL) {
56 printk("%s NULL device\n", __func__);
57 return;
58 }
59 printk(" ip_addr %x port %hu\n"
60 " ref count %d\n"
61 " client %p\n"
62 " cl_exchange_flags %x\n",
63 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
64 atomic_read(&ds->ds_count), ds->ds_clp,
65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66}
67
68void
69print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
70{
71 int i;
72
73 ifdebug(FACILITY) {
74 printk("%s dsaddr->ds_num %d\n", __func__,
75 dsaddr->ds_num);
76 for (i = 0; i < dsaddr->ds_num; i++)
77 print_ds(dsaddr->ds_list[i]);
78 }
79}
80
81void print_deviceid(struct nfs4_deviceid *id)
82{
83 u32 *p = (u32 *)id;
84
85 dprintk("%s: device id= [%x%x%x%x]\n", __func__,
86 p[0], p[1], p[2], p[3]);
87}
88
89/* nfs4_ds_cache_lock is held */
90static struct nfs4_pnfs_ds *
91_data_server_lookup_locked(u32 ip_addr, u32 port)
92{
93 struct nfs4_pnfs_ds *ds;
94
95 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
96 ntohl(ip_addr), ntohs(port));
97
98 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
99 if (ds->ds_ip_addr == ip_addr &&
100 ds->ds_port == port) {
101 return ds;
102 }
103 }
104 return NULL;
105}
106
107static void
108destroy_ds(struct nfs4_pnfs_ds *ds)
109{
110 dprintk("--> %s\n", __func__);
111 ifdebug(FACILITY)
112 print_ds(ds);
113
114 if (ds->ds_clp)
115 nfs_put_client(ds->ds_clp);
116 kfree(ds);
117}
118
119static void
120nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
121{
122 struct nfs4_pnfs_ds *ds;
123 int i;
124
125 print_deviceid(&dsaddr->deviceid.de_id);
126
127 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i];
129 if (ds != NULL) {
130 if (atomic_dec_and_lock(&ds->ds_count,
131 &nfs4_ds_cache_lock)) {
132 list_del_init(&ds->ds_node);
133 spin_unlock(&nfs4_ds_cache_lock);
134 destroy_ds(ds);
135 }
136 }
137 }
138 kfree(dsaddr->stripe_indices);
139 kfree(dsaddr);
140}
141
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{
154 struct nfs4_pnfs_ds *tmp_ds, *ds;
155
156 ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
157 if (!ds)
158 goto out;
159
160 spin_lock(&nfs4_ds_cache_lock);
161 tmp_ds = _data_server_lookup_locked(ip_addr, port);
162 if (tmp_ds == NULL) {
163 ds->ds_ip_addr = ip_addr;
164 ds->ds_port = port;
165 atomic_set(&ds->ds_count, 1);
166 INIT_LIST_HEAD(&ds->ds_node);
167 ds->ds_clp = NULL;
168 list_add(&ds->ds_node, &nfs4_data_server_cache);
169 dprintk("%s add new data server ip 0x%x\n", __func__,
170 ds->ds_ip_addr);
171 } else {
172 kfree(ds);
173 atomic_inc(&tmp_ds->ds_count);
174 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
175 __func__, tmp_ds->ds_ip_addr,
176 atomic_read(&tmp_ds->ds_count));
177 ds = tmp_ds;
178 }
179 spin_unlock(&nfs4_ds_cache_lock);
180out:
181 return ds;
182}
183
184/*
185 * Currently only support ipv4, and one multi-path address.
186 */
187static struct nfs4_pnfs_ds *
188decode_and_add_ds(__be32 **pp, struct inode *inode)
189{
190 struct nfs4_pnfs_ds *ds = NULL;
191 char *buf;
192 const char *ipend, *pstr;
193 u32 ip_addr, port;
194 int nlen, rlen, i;
195 int tmp[2];
196 __be32 *r_netid, *r_addr, *p = *pp;
197
198 /* r_netid */
199 nlen = be32_to_cpup(p++);
200 r_netid = p;
201 p += XDR_QUADLEN(nlen);
202
203 /* r_addr */
204 rlen = be32_to_cpup(p++);
205 r_addr = p;
206 p += XDR_QUADLEN(rlen);
207 *pp = p;
208
209 /* Check that netid is "tcp" */
210 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) {
211 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
212 goto out_err;
213 }
214
215 /* ipv6 length plus port is legal */
216 if (rlen > INET6_ADDRSTRLEN + 8) {
217 dprintk("%s Invalid address, length %d\n", __func__,
218 rlen);
219 goto out_err;
220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL);
222 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen);
224
225 /* replace the port dots with dashes for the in4_pton() delimiter*/
226 for (i = 0; i < 2; i++) {
227 char *res = strrchr(buf, '.');
228 *res = '-';
229 }
230
231 /* Currently only support ipv4 address */
232 if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
233 dprintk("%s: Only ipv4 addresses supported\n", __func__);
234 goto out_free;
235 }
236
237 /* port */
238 pstr = ipend;
239 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
240 port = htons((tmp[0] << 8) | (tmp[1]));
241
242 ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
243 dprintk("%s Decoded address and port %s\n", __func__, buf);
244out_free:
245 kfree(buf);
246out_err:
247 return ds;
248}
249
250/* Decode opaque device data and return the result */
251static struct nfs4_file_layout_dsaddr*
252decode_device(struct inode *ino, struct pnfs_device *pdev)
253{
254 int i, dummy;
255 u32 cnt, num;
256 u8 *indexp;
257 __be32 *p = (__be32 *)pdev->area, *indicesp;
258 struct nfs4_file_layout_dsaddr *dsaddr;
259
260 /* Get the stripe count (number of stripe index) */
261 cnt = be32_to_cpup(p++);
262 dprintk("%s stripe count %d\n", __func__, cnt);
263 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
264 printk(KERN_WARNING "%s: stripe count %d greater than "
265 "supported maximum %d\n", __func__,
266 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
267 goto out_err;
268 }
269
270 /* Check the multipath list count */
271 indicesp = p;
272 p += XDR_QUADLEN(cnt << 2);
273 num = be32_to_cpup(p++);
274 dprintk("%s ds_num %u\n", __func__, num);
275 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
276 printk(KERN_WARNING "%s: multipath count %d greater than "
277 "supported maximum %d\n", __func__,
278 num, NFS4_PNFS_MAX_MULTI_CNT);
279 goto out_err;
280 }
281 dsaddr = kzalloc(sizeof(*dsaddr) +
282 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
283 GFP_KERNEL);
284 if (!dsaddr)
285 goto out_err;
286
287 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
288 if (!dsaddr->stripe_indices)
289 goto out_err_free;
290
291 dsaddr->stripe_count = cnt;
292 dsaddr->ds_num = num;
293
294 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
295
296 /* Go back an read stripe indices */
297 p = indicesp;
298 indexp = &dsaddr->stripe_indices[0];
299 for (i = 0; i < dsaddr->stripe_count; i++) {
300 *indexp = be32_to_cpup(p++);
301 if (*indexp >= num)
302 goto out_err_free;
303 indexp++;
304 }
305 /* Skip already read multipath list count */
306 p++;
307
308 for (i = 0; i < dsaddr->ds_num; i++) {
309 int j;
310
311 dummy = be32_to_cpup(p++); /* multipath count */
312 if (dummy > 1) {
313 printk(KERN_WARNING
314 "%s: Multipath count %d not supported, "
315 "skipping all greater than 1\n", __func__,
316 dummy);
317 }
318 for (j = 0; j < dummy; j++) {
319 if (j == 0) {
320 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
321 if (dsaddr->ds_list[i] == NULL)
322 goto out_err_free;
323 } else {
324 u32 len;
325 /* skip extra multipath */
326 len = be32_to_cpup(p++);
327 p += XDR_QUADLEN(len);
328 len = be32_to_cpup(p++);
329 p += XDR_QUADLEN(len);
330 continue;
331 }
332 }
333 }
334 return dsaddr;
335
336out_err_free:
337 nfs4_fl_free_deviceid(dsaddr);
338out_err:
339 dprintk("%s ERROR: returning NULL\n", __func__);
340 return NULL;
341}
342
343/*
344 * Decode the opaque device specified in 'dev'
345 * and add it to the list of available devices.
346 * If the deviceid is already cached, nfs4_add_deviceid will return
347 * a pointer to the cached struct and throw away the new.
348 */
349static struct nfs4_file_layout_dsaddr*
350decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
351{
352 struct nfs4_file_layout_dsaddr *dsaddr;
353 struct pnfs_deviceid_node *d;
354
355 dsaddr = decode_device(inode, dev);
356 if (!dsaddr) {
357 printk(KERN_WARNING "%s: Could not decode or add device\n",
358 __func__);
359 return NULL;
360 }
361
362 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
363 &dsaddr->deviceid);
364
365 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
366}
367
368/*
369 * Retrieve the information for dev_id, add it to the list
370 * of available devices, and return it.
371 */
372struct nfs4_file_layout_dsaddr *
373get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
374{
375 struct pnfs_device *pdev = NULL;
376 u32 max_resp_sz;
377 int max_pages;
378 struct page **pages = NULL;
379 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
380 int rc, i;
381 struct nfs_server *server = NFS_SERVER(inode);
382
383 /*
384 * Use the session max response size as the basis for setting
385 * GETDEVICEINFO's maxcount
386 */
387 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
388 max_pages = max_resp_sz >> PAGE_SHIFT;
389 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
390 __func__, inode, max_resp_sz, max_pages);
391
392 pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
393 if (pdev == NULL)
394 return NULL;
395
396 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
397 if (pages == NULL) {
398 kfree(pdev);
399 return NULL;
400 }
401 for (i = 0; i < max_pages; i++) {
402 pages[i] = alloc_page(GFP_KERNEL);
403 if (!pages[i])
404 goto out_free;
405 }
406
407 /* set pdev->area */
408 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
409 if (!pdev->area)
410 goto out_free;
411
412 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
413 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
414 pdev->pages = pages;
415 pdev->pgbase = 0;
416 pdev->pglen = PAGE_SIZE * max_pages;
417 pdev->mincount = 0;
418
419 rc = nfs4_proc_getdeviceinfo(server, pdev);
420 dprintk("%s getdevice info returns %d\n", __func__, rc);
421 if (rc)
422 goto out_free;
423
424 /*
425 * Found new device, need to decode it and then add it to the
426 * list of known devices for this mountpoint.
427 */
428 dsaddr = decode_and_add_device(inode, pdev);
429out_free:
430 if (pdev->area != NULL)
431 vunmap(pdev->area);
432 for (i = 0; i < max_pages; i++)
433 __free_page(pages[i]);
434 kfree(pages);
435 kfree(pdev);
436 dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
437 return dsaddr;
438}
439
440struct nfs4_file_layout_dsaddr *
441nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
442{
443 struct pnfs_deviceid_node *d;
444
445 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
446 return (d == NULL) ? NULL :
447 container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
448}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 70015dd60a98..0f24cdf2cb13 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,7 @@
55#include "internal.h" 55#include "internal.h"
56#include "iostat.h" 56#include "iostat.h"
57#include "callback.h" 57#include "callback.h"
58#include "pnfs.h"
58 59
59#define NFSDBG_FACILITY NFSDBG_PROC 60#define NFSDBG_FACILITY NFSDBG_PROC
60 61
@@ -129,7 +130,8 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
129 | FATTR4_WORD0_MAXREAD 130 | FATTR4_WORD0_MAXREAD
130 | FATTR4_WORD0_MAXWRITE 131 | FATTR4_WORD0_MAXWRITE
131 | FATTR4_WORD0_LEASE_TIME, 132 | FATTR4_WORD0_LEASE_TIME,
132 0 133 FATTR4_WORD1_TIME_DELTA
134 | FATTR4_WORD1_FS_LAYOUT_TYPES
133}; 135};
134 136
135const u32 nfs4_fs_locations_bitmap[2] = { 137const u32 nfs4_fs_locations_bitmap[2] = {
@@ -255,9 +257,6 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
255 nfs4_state_mark_reclaim_nograce(clp, state); 257 nfs4_state_mark_reclaim_nograce(clp, state);
256 goto do_state_recovery; 258 goto do_state_recovery;
257 case -NFS4ERR_STALE_STATEID: 259 case -NFS4ERR_STALE_STATEID:
258 if (state == NULL)
259 break;
260 nfs4_state_mark_reclaim_reboot(clp, state);
261 case -NFS4ERR_STALE_CLIENTID: 260 case -NFS4ERR_STALE_CLIENTID:
262 case -NFS4ERR_EXPIRED: 261 case -NFS4ERR_EXPIRED:
263 goto do_state_recovery; 262 goto do_state_recovery;
@@ -303,15 +302,19 @@ do_state_recovery:
303} 302}
304 303
305 304
306static void renew_lease(const struct nfs_server *server, unsigned long timestamp) 305static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp)
307{ 306{
308 struct nfs_client *clp = server->nfs_client;
309 spin_lock(&clp->cl_lock); 307 spin_lock(&clp->cl_lock);
310 if (time_before(clp->cl_last_renewal,timestamp)) 308 if (time_before(clp->cl_last_renewal,timestamp))
311 clp->cl_last_renewal = timestamp; 309 clp->cl_last_renewal = timestamp;
312 spin_unlock(&clp->cl_lock); 310 spin_unlock(&clp->cl_lock);
313} 311}
314 312
313static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
314{
315 do_renew_lease(server->nfs_client, timestamp);
316}
317
315#if defined(CONFIG_NFS_V4_1) 318#if defined(CONFIG_NFS_V4_1)
316 319
317/* 320/*
@@ -330,10 +333,12 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
330 * Must be called while holding tbl->slot_tbl_lock 333 * Must be called while holding tbl->slot_tbl_lock
331 */ 334 */
332static void 335static void
333nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) 336nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
334{ 337{
338 int free_slotid = free_slot - tbl->slots;
335 int slotid = free_slotid; 339 int slotid = free_slotid;
336 340
341 BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
337 /* clear used bit in bitmap */ 342 /* clear used bit in bitmap */
338 __clear_bit(slotid, tbl->used_slots); 343 __clear_bit(slotid, tbl->used_slots);
339 344
@@ -356,7 +361,7 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
356{ 361{
357 struct rpc_task *task; 362 struct rpc_task *task;
358 363
359 if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) { 364 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
360 task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); 365 task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
361 if (task) 366 if (task)
362 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); 367 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
@@ -370,13 +375,12 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
370 complete(&ses->complete); 375 complete(&ses->complete);
371} 376}
372 377
373static void nfs41_sequence_free_slot(const struct nfs_client *clp, 378static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
374 struct nfs4_sequence_res *res)
375{ 379{
376 struct nfs4_slot_table *tbl; 380 struct nfs4_slot_table *tbl;
377 381
378 tbl = &clp->cl_session->fc_slot_table; 382 tbl = &res->sr_session->fc_slot_table;
379 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { 383 if (!res->sr_slot) {
380 /* just wake up the next guy waiting since 384 /* just wake up the next guy waiting since
381 * we may have not consumed a slot after all */ 385 * we may have not consumed a slot after all */
382 dprintk("%s: No slot\n", __func__); 386 dprintk("%s: No slot\n", __func__);
@@ -384,19 +388,16 @@ static void nfs41_sequence_free_slot(const struct nfs_client *clp,
384 } 388 }
385 389
386 spin_lock(&tbl->slot_tbl_lock); 390 spin_lock(&tbl->slot_tbl_lock);
387 nfs4_free_slot(tbl, res->sr_slotid); 391 nfs4_free_slot(tbl, res->sr_slot);
388 nfs41_check_drain_session_complete(clp->cl_session); 392 nfs41_check_drain_session_complete(res->sr_session);
389 spin_unlock(&tbl->slot_tbl_lock); 393 spin_unlock(&tbl->slot_tbl_lock);
390 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 394 res->sr_slot = NULL;
391} 395}
392 396
393static void nfs41_sequence_done(struct nfs_client *clp, 397static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
394 struct nfs4_sequence_res *res,
395 int rpc_status)
396{ 398{
397 unsigned long timestamp; 399 unsigned long timestamp;
398 struct nfs4_slot_table *tbl; 400 struct nfs_client *clp;
399 struct nfs4_slot *slot;
400 401
401 /* 402 /*
402 * sr_status remains 1 if an RPC level error occurred. The server 403 * sr_status remains 1 if an RPC level error occurred. The server
@@ -408,28 +409,53 @@ static void nfs41_sequence_done(struct nfs_client *clp,
408 res->sr_status = NFS_OK; 409 res->sr_status = NFS_OK;
409 410
410 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ 411 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
411 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) 412 if (!res->sr_slot)
412 goto out; 413 goto out;
413 414
414 /* Check the SEQUENCE operation status */ 415 /* Check the SEQUENCE operation status */
415 if (res->sr_status == 0) { 416 switch (res->sr_status) {
416 tbl = &clp->cl_session->fc_slot_table; 417 case 0:
417 slot = tbl->slots + res->sr_slotid;
418 /* Update the slot's sequence and clientid lease timer */ 418 /* Update the slot's sequence and clientid lease timer */
419 ++slot->seq_nr; 419 ++res->sr_slot->seq_nr;
420 timestamp = res->sr_renewal_time; 420 timestamp = res->sr_renewal_time;
421 spin_lock(&clp->cl_lock); 421 clp = res->sr_session->clp;
422 if (time_before(clp->cl_last_renewal, timestamp)) 422 do_renew_lease(clp, timestamp);
423 clp->cl_last_renewal = timestamp;
424 spin_unlock(&clp->cl_lock);
425 /* Check sequence flags */ 423 /* Check sequence flags */
426 if (atomic_read(&clp->cl_count) > 1) 424 if (atomic_read(&clp->cl_count) > 1)
427 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 425 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
426 break;
427 case -NFS4ERR_DELAY:
428 /* The server detected a resend of the RPC call and
429 * returned NFS4ERR_DELAY as per Section 2.10.6.2
430 * of RFC5661.
431 */
432 dprintk("%s: slot=%td seq=%d: Operation in progress\n",
433 __func__,
434 res->sr_slot - res->sr_session->fc_slot_table.slots,
435 res->sr_slot->seq_nr);
436 goto out_retry;
437 default:
438 /* Just update the slot sequence no. */
439 ++res->sr_slot->seq_nr;
428 } 440 }
429out: 441out:
430 /* The session may be reset by one of the error handlers. */ 442 /* The session may be reset by one of the error handlers. */
431 dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); 443 dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
432 nfs41_sequence_free_slot(clp, res); 444 nfs41_sequence_free_slot(res);
445 return 1;
446out_retry:
447 if (!rpc_restart_call(task))
448 goto out;
449 rpc_delay(task, NFS4_POLL_RETRY_MAX);
450 return 0;
451}
452
453static int nfs4_sequence_done(struct rpc_task *task,
454 struct nfs4_sequence_res *res)
455{
456 if (res->sr_session == NULL)
457 return 1;
458 return nfs41_sequence_done(task, res);
433} 459}
434 460
435/* 461/*
@@ -477,15 +503,13 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
477 503
478 dprintk("--> %s\n", __func__); 504 dprintk("--> %s\n", __func__);
479 /* slot already allocated? */ 505 /* slot already allocated? */
480 if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) 506 if (res->sr_slot != NULL)
481 return 0; 507 return 0;
482 508
483 memset(res, 0, sizeof(*res));
484 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
485 tbl = &session->fc_slot_table; 509 tbl = &session->fc_slot_table;
486 510
487 spin_lock(&tbl->slot_tbl_lock); 511 spin_lock(&tbl->slot_tbl_lock);
488 if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) && 512 if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
489 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { 513 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
490 /* 514 /*
491 * The state manager will wait until the slot table is empty. 515 * The state manager will wait until the slot table is empty.
@@ -523,8 +547,9 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
523 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); 547 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
524 548
525 res->sr_session = session; 549 res->sr_session = session;
526 res->sr_slotid = slotid; 550 res->sr_slot = slot;
527 res->sr_renewal_time = jiffies; 551 res->sr_renewal_time = jiffies;
552 res->sr_status_flags = 0;
528 /* 553 /*
529 * sr_status is only set in decode_sequence, and so will remain 554 * sr_status is only set in decode_sequence, and so will remain
530 * set to 1 if an rpc level failure occurs. 555 * set to 1 if an rpc level failure occurs.
@@ -533,33 +558,34 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
533 return 0; 558 return 0;
534} 559}
535 560
536int nfs4_setup_sequence(struct nfs_client *clp, 561int nfs4_setup_sequence(const struct nfs_server *server,
537 struct nfs4_sequence_args *args, 562 struct nfs4_sequence_args *args,
538 struct nfs4_sequence_res *res, 563 struct nfs4_sequence_res *res,
539 int cache_reply, 564 int cache_reply,
540 struct rpc_task *task) 565 struct rpc_task *task)
541{ 566{
567 struct nfs4_session *session = nfs4_get_session(server);
542 int ret = 0; 568 int ret = 0;
543 569
544 dprintk("--> %s clp %p session %p sr_slotid %d\n", 570 if (session == NULL) {
545 __func__, clp, clp->cl_session, res->sr_slotid); 571 args->sa_session = NULL;
546 572 res->sr_session = NULL;
547 if (!nfs4_has_session(clp))
548 goto out; 573 goto out;
549 ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
550 task);
551 if (ret && ret != -EAGAIN) {
552 /* terminate rpc task */
553 task->tk_status = ret;
554 task->tk_action = NULL;
555 } 574 }
575
576 dprintk("--> %s clp %p session %p sr_slot %td\n",
577 __func__, session->clp, session, res->sr_slot ?
578 res->sr_slot - session->fc_slot_table.slots : -1);
579
580 ret = nfs41_setup_sequence(session, args, res, cache_reply,
581 task);
556out: 582out:
557 dprintk("<-- %s status=%d\n", __func__, ret); 583 dprintk("<-- %s status=%d\n", __func__, ret);
558 return ret; 584 return ret;
559} 585}
560 586
561struct nfs41_call_sync_data { 587struct nfs41_call_sync_data {
562 struct nfs_client *clp; 588 const struct nfs_server *seq_server;
563 struct nfs4_sequence_args *seq_args; 589 struct nfs4_sequence_args *seq_args;
564 struct nfs4_sequence_res *seq_res; 590 struct nfs4_sequence_res *seq_res;
565 int cache_reply; 591 int cache_reply;
@@ -569,9 +595,9 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
569{ 595{
570 struct nfs41_call_sync_data *data = calldata; 596 struct nfs41_call_sync_data *data = calldata;
571 597
572 dprintk("--> %s data->clp->cl_session %p\n", __func__, 598 dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
573 data->clp->cl_session); 599
574 if (nfs4_setup_sequence(data->clp, data->seq_args, 600 if (nfs4_setup_sequence(data->seq_server, data->seq_args,
575 data->seq_res, data->cache_reply, task)) 601 data->seq_res, data->cache_reply, task))
576 return; 602 return;
577 rpc_call_start(task); 603 rpc_call_start(task);
@@ -587,7 +613,7 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
587{ 613{
588 struct nfs41_call_sync_data *data = calldata; 614 struct nfs41_call_sync_data *data = calldata;
589 615
590 nfs41_sequence_done(data->clp, data->seq_res, task->tk_status); 616 nfs41_sequence_done(task, data->seq_res);
591} 617}
592 618
593struct rpc_call_ops nfs41_call_sync_ops = { 619struct rpc_call_ops nfs41_call_sync_ops = {
@@ -600,8 +626,7 @@ struct rpc_call_ops nfs41_call_priv_sync_ops = {
600 .rpc_call_done = nfs41_call_sync_done, 626 .rpc_call_done = nfs41_call_sync_done,
601}; 627};
602 628
603static int nfs4_call_sync_sequence(struct nfs_client *clp, 629static int nfs4_call_sync_sequence(struct nfs_server *server,
604 struct rpc_clnt *clnt,
605 struct rpc_message *msg, 630 struct rpc_message *msg,
606 struct nfs4_sequence_args *args, 631 struct nfs4_sequence_args *args,
607 struct nfs4_sequence_res *res, 632 struct nfs4_sequence_res *res,
@@ -611,19 +636,19 @@ static int nfs4_call_sync_sequence(struct nfs_client *clp,
611 int ret; 636 int ret;
612 struct rpc_task *task; 637 struct rpc_task *task;
613 struct nfs41_call_sync_data data = { 638 struct nfs41_call_sync_data data = {
614 .clp = clp, 639 .seq_server = server,
615 .seq_args = args, 640 .seq_args = args,
616 .seq_res = res, 641 .seq_res = res,
617 .cache_reply = cache_reply, 642 .cache_reply = cache_reply,
618 }; 643 };
619 struct rpc_task_setup task_setup = { 644 struct rpc_task_setup task_setup = {
620 .rpc_client = clnt, 645 .rpc_client = server->client,
621 .rpc_message = msg, 646 .rpc_message = msg,
622 .callback_ops = &nfs41_call_sync_ops, 647 .callback_ops = &nfs41_call_sync_ops,
623 .callback_data = &data 648 .callback_data = &data
624 }; 649 };
625 650
626 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 651 res->sr_slot = NULL;
627 if (privileged) 652 if (privileged)
628 task_setup.callback_ops = &nfs41_call_priv_sync_ops; 653 task_setup.callback_ops = &nfs41_call_priv_sync_ops;
629 task = rpc_run_task(&task_setup); 654 task = rpc_run_task(&task_setup);
@@ -642,10 +667,15 @@ int _nfs4_call_sync_session(struct nfs_server *server,
642 struct nfs4_sequence_res *res, 667 struct nfs4_sequence_res *res,
643 int cache_reply) 668 int cache_reply)
644{ 669{
645 return nfs4_call_sync_sequence(server->nfs_client, server->client, 670 return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0);
646 msg, args, res, cache_reply, 0);
647} 671}
648 672
673#else
674static int nfs4_sequence_done(struct rpc_task *task,
675 struct nfs4_sequence_res *res)
676{
677 return 1;
678}
649#endif /* CONFIG_NFS_V4_1 */ 679#endif /* CONFIG_NFS_V4_1 */
650 680
651int _nfs4_call_sync(struct nfs_server *server, 681int _nfs4_call_sync(struct nfs_server *server,
@@ -659,18 +689,9 @@ int _nfs4_call_sync(struct nfs_server *server,
659} 689}
660 690
661#define nfs4_call_sync(server, msg, args, res, cache_reply) \ 691#define nfs4_call_sync(server, msg, args, res, cache_reply) \
662 (server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \ 692 (server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \
663 &(res)->seq_res, (cache_reply)) 693 &(res)->seq_res, (cache_reply))
664 694
665static void nfs4_sequence_done(const struct nfs_server *server,
666 struct nfs4_sequence_res *res, int rpc_status)
667{
668#ifdef CONFIG_NFS_V4_1
669 if (nfs4_has_session(server->nfs_client))
670 nfs41_sequence_done(server->nfs_client, res, rpc_status);
671#endif /* CONFIG_NFS_V4_1 */
672}
673
674static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) 695static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
675{ 696{
676 struct nfs_inode *nfsi = NFS_I(dir); 697 struct nfs_inode *nfsi = NFS_I(dir);
@@ -712,7 +733,6 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
712 p->o_res.server = p->o_arg.server; 733 p->o_res.server = p->o_arg.server;
713 nfs_fattr_init(&p->f_attr); 734 nfs_fattr_init(&p->f_attr);
714 nfs_fattr_init(&p->dir_attr); 735 nfs_fattr_init(&p->dir_attr);
715 p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
716} 736}
717 737
718static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 738static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
@@ -745,19 +765,14 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
745 p->o_arg.server = server; 765 p->o_arg.server = server;
746 p->o_arg.bitmask = server->attr_bitmask; 766 p->o_arg.bitmask = server->attr_bitmask;
747 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 767 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
748 if (flags & O_EXCL) { 768 if (flags & O_CREAT) {
749 if (nfs4_has_persistent_session(server->nfs_client)) { 769 u32 *s;
750 /* GUARDED */ 770
751 p->o_arg.u.attrs = &p->attrs;
752 memcpy(&p->attrs, attrs, sizeof(p->attrs));
753 } else { /* EXCLUSIVE4_1 */
754 u32 *s = (u32 *) p->o_arg.u.verifier.data;
755 s[0] = jiffies;
756 s[1] = current->pid;
757 }
758 } else if (flags & O_CREAT) {
759 p->o_arg.u.attrs = &p->attrs; 771 p->o_arg.u.attrs = &p->attrs;
760 memcpy(&p->attrs, attrs, sizeof(p->attrs)); 772 memcpy(&p->attrs, attrs, sizeof(p->attrs));
773 s = (u32 *) p->o_arg.u.verifier.data;
774 s[0] = jiffies;
775 s[1] = current->pid;
761 } 776 }
762 p->c_arg.fh = &p->o_res.fh; 777 p->c_arg.fh = &p->o_res.fh;
763 p->c_arg.stateid = &p->o_res.stateid; 778 p->c_arg.stateid = &p->o_res.stateid;
@@ -1102,6 +1117,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1102 clear_bit(NFS_DELEGATED_STATE, &state->flags); 1117 clear_bit(NFS_DELEGATED_STATE, &state->flags);
1103 smp_rmb(); 1118 smp_rmb();
1104 if (state->n_rdwr != 0) { 1119 if (state->n_rdwr != 0) {
1120 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1105 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); 1121 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
1106 if (ret != 0) 1122 if (ret != 0)
1107 return ret; 1123 return ret;
@@ -1109,6 +1125,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1109 return -ESTALE; 1125 return -ESTALE;
1110 } 1126 }
1111 if (state->n_wronly != 0) { 1127 if (state->n_wronly != 0) {
1128 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1112 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); 1129 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
1113 if (ret != 0) 1130 if (ret != 0)
1114 return ret; 1131 return ret;
@@ -1116,6 +1133,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1116 return -ESTALE; 1133 return -ESTALE;
1117 } 1134 }
1118 if (state->n_rdonly != 0) { 1135 if (state->n_rdonly != 0) {
1136 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1119 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); 1137 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
1120 if (ret != 0) 1138 if (ret != 0)
1121 return ret; 1139 return ret;
@@ -1170,7 +1188,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
1170 int err; 1188 int err;
1171 do { 1189 do {
1172 err = _nfs4_do_open_reclaim(ctx, state); 1190 err = _nfs4_do_open_reclaim(ctx, state);
1173 if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) 1191 if (err != -NFS4ERR_DELAY)
1174 break; 1192 break;
1175 nfs4_handle_exception(server, err, &exception); 1193 nfs4_handle_exception(server, err, &exception);
1176 } while (exception.retry); 1194 } while (exception.retry);
@@ -1240,6 +1258,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1240 case -NFS4ERR_ADMIN_REVOKED: 1258 case -NFS4ERR_ADMIN_REVOKED:
1241 case -NFS4ERR_BAD_STATEID: 1259 case -NFS4ERR_BAD_STATEID:
1242 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1260 nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
1261 case -EKEYEXPIRED:
1262 /*
1263 * User RPCSEC_GSS context has expired.
1264 * We cannot recover this stateid now, so
1265 * skip it and allow recovery thread to
1266 * proceed.
1267 */
1243 case -ENOMEM: 1268 case -ENOMEM:
1244 err = 0; 1269 err = 0;
1245 goto out; 1270 goto out;
@@ -1255,8 +1280,6 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
1255 struct nfs4_opendata *data = calldata; 1280 struct nfs4_opendata *data = calldata;
1256 1281
1257 data->rpc_status = task->tk_status; 1282 data->rpc_status = task->tk_status;
1258 if (RPC_ASSASSINATED(task))
1259 return;
1260 if (data->rpc_status == 0) { 1283 if (data->rpc_status == 0) {
1261 memcpy(data->o_res.stateid.data, data->c_res.stateid.data, 1284 memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
1262 sizeof(data->o_res.stateid.data)); 1285 sizeof(data->o_res.stateid.data));
@@ -1356,13 +1379,13 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1356 } 1379 }
1357 /* Update sequence id. */ 1380 /* Update sequence id. */
1358 data->o_arg.id = sp->so_owner_id.id; 1381 data->o_arg.id = sp->so_owner_id.id;
1359 data->o_arg.clientid = sp->so_client->cl_clientid; 1382 data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
1360 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { 1383 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
1361 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; 1384 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
1362 nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); 1385 nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
1363 } 1386 }
1364 data->timestamp = jiffies; 1387 data->timestamp = jiffies;
1365 if (nfs4_setup_sequence(data->o_arg.server->nfs_client, 1388 if (nfs4_setup_sequence(data->o_arg.server,
1366 &data->o_arg.seq_args, 1389 &data->o_arg.seq_args,
1367 &data->o_res.seq_res, 1, task)) 1390 &data->o_res.seq_res, 1, task))
1368 return; 1391 return;
@@ -1385,11 +1408,9 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
1385 1408
1386 data->rpc_status = task->tk_status; 1409 data->rpc_status = task->tk_status;
1387 1410
1388 nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res, 1411 if (!nfs4_sequence_done(task, &data->o_res.seq_res))
1389 task->tk_status);
1390
1391 if (RPC_ASSASSINATED(task))
1392 return; 1412 return;
1413
1393 if (task->tk_status == 0) { 1414 if (task->tk_status == 0) {
1394 switch (data->o_res.f_attr->mode & S_IFMT) { 1415 switch (data->o_res.f_attr->mode & S_IFMT) {
1395 case S_IFREG: 1416 case S_IFREG:
@@ -1591,7 +1612,6 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state
1591 goto out; 1612 goto out;
1592 case -NFS4ERR_GRACE: 1613 case -NFS4ERR_GRACE:
1593 case -NFS4ERR_DELAY: 1614 case -NFS4ERR_DELAY:
1594 case -EKEYEXPIRED:
1595 nfs4_handle_exception(server, err, &exception); 1615 nfs4_handle_exception(server, err, &exception);
1596 err = 0; 1616 err = 0;
1597 } 1617 }
@@ -1773,7 +1793,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1773 if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { 1793 if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
1774 /* Use that stateid */ 1794 /* Use that stateid */
1775 } else if (state != NULL) { 1795 } else if (state != NULL) {
1776 nfs4_copy_stateid(&arg.stateid, state, current->files); 1796 nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
1777 } else 1797 } else
1778 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); 1798 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
1779 1799
@@ -1838,8 +1858,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1838 struct nfs4_state *state = calldata->state; 1858 struct nfs4_state *state = calldata->state;
1839 struct nfs_server *server = NFS_SERVER(calldata->inode); 1859 struct nfs_server *server = NFS_SERVER(calldata->inode);
1840 1860
1841 nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status); 1861 if (!nfs4_sequence_done(task, &calldata->res.seq_res))
1842 if (RPC_ASSASSINATED(task))
1843 return; 1862 return;
1844 /* hmm. we are done with the inode, and in the process of freeing 1863 /* hmm. we are done with the inode, and in the process of freeing
1845 * the state_owner. we keep this around to process errors 1864 * the state_owner. we keep this around to process errors
@@ -1903,7 +1922,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1903 1922
1904 nfs_fattr_init(calldata->res.fattr); 1923 nfs_fattr_init(calldata->res.fattr);
1905 calldata->timestamp = jiffies; 1924 calldata->timestamp = jiffies;
1906 if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client, 1925 if (nfs4_setup_sequence(NFS_SERVER(calldata->inode),
1907 &calldata->arg.seq_args, &calldata->res.seq_res, 1926 &calldata->arg.seq_args, &calldata->res.seq_res,
1908 1, task)) 1927 1, task))
1909 return; 1928 return;
@@ -1962,7 +1981,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
1962 calldata->res.fattr = &calldata->fattr; 1981 calldata->res.fattr = &calldata->fattr;
1963 calldata->res.seqid = calldata->arg.seqid; 1982 calldata->res.seqid = calldata->arg.seqid;
1964 calldata->res.server = server; 1983 calldata->res.server = server;
1965 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
1966 path_get(path); 1984 path_get(path);
1967 calldata->path = *path; 1985 calldata->path = *path;
1968 1986
@@ -1985,118 +2003,17 @@ out:
1985 return status; 2003 return status;
1986} 2004}
1987 2005
1988static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode) 2006static struct inode *
2007nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
1989{ 2008{
1990 struct file *filp;
1991 int ret;
1992
1993 /* If the open_intent is for execute, we have an extra check to make */
1994 if (fmode & FMODE_EXEC) {
1995 ret = nfs_may_open(state->inode,
1996 state->owner->so_cred,
1997 nd->intent.open.flags);
1998 if (ret < 0)
1999 goto out_close;
2000 }
2001 filp = lookup_instantiate_filp(nd, path->dentry, NULL);
2002 if (!IS_ERR(filp)) {
2003 struct nfs_open_context *ctx;
2004 ctx = nfs_file_open_context(filp);
2005 ctx->state = state;
2006 return 0;
2007 }
2008 ret = PTR_ERR(filp);
2009out_close:
2010 nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE));
2011 return ret;
2012}
2013
2014struct dentry *
2015nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2016{
2017 struct path path = {
2018 .mnt = nd->path.mnt,
2019 .dentry = dentry,
2020 };
2021 struct dentry *parent;
2022 struct iattr attr;
2023 struct rpc_cred *cred;
2024 struct nfs4_state *state; 2009 struct nfs4_state *state;
2025 struct dentry *res;
2026 fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
2027
2028 if (nd->flags & LOOKUP_CREATE) {
2029 attr.ia_mode = nd->intent.open.create_mode;
2030 attr.ia_valid = ATTR_MODE;
2031 if (!IS_POSIXACL(dir))
2032 attr.ia_mode &= ~current_umask();
2033 } else {
2034 attr.ia_valid = 0;
2035 BUG_ON(nd->intent.open.flags & O_CREAT);
2036 }
2037 2010
2038 cred = rpc_lookup_cred();
2039 if (IS_ERR(cred))
2040 return (struct dentry *)cred;
2041 parent = dentry->d_parent;
2042 /* Protect against concurrent sillydeletes */ 2011 /* Protect against concurrent sillydeletes */
2043 nfs_block_sillyrename(parent); 2012 state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred);
2044 state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred); 2013 if (IS_ERR(state))
2045 put_rpccred(cred); 2014 return ERR_CAST(state);
2046 if (IS_ERR(state)) { 2015 ctx->state = state;
2047 if (PTR_ERR(state) == -ENOENT) { 2016 return igrab(state->inode);
2048 d_add(dentry, NULL);
2049 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2050 }
2051 nfs_unblock_sillyrename(parent);
2052 return (struct dentry *)state;
2053 }
2054 res = d_add_unique(dentry, igrab(state->inode));
2055 if (res != NULL)
2056 path.dentry = res;
2057 nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
2058 nfs_unblock_sillyrename(parent);
2059 nfs4_intent_set_file(nd, &path, state, fmode);
2060 return res;
2061}
2062
2063int
2064nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
2065{
2066 struct path path = {
2067 .mnt = nd->path.mnt,
2068 .dentry = dentry,
2069 };
2070 struct rpc_cred *cred;
2071 struct nfs4_state *state;
2072 fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE);
2073
2074 cred = rpc_lookup_cred();
2075 if (IS_ERR(cred))
2076 return PTR_ERR(cred);
2077 state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred);
2078 put_rpccred(cred);
2079 if (IS_ERR(state)) {
2080 switch (PTR_ERR(state)) {
2081 case -EPERM:
2082 case -EACCES:
2083 case -EDQUOT:
2084 case -ENOSPC:
2085 case -EROFS:
2086 return PTR_ERR(state);
2087 default:
2088 goto out_drop;
2089 }
2090 }
2091 if (state->inode == dentry->d_inode) {
2092 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2093 nfs4_intent_set_file(nd, &path, state, fmode);
2094 return 1;
2095 }
2096 nfs4_close_sync(&path, state, fmode);
2097out_drop:
2098 d_drop(dentry);
2099 return 0;
2100} 2017}
2101 2018
2102static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) 2019static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
@@ -2260,8 +2177,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct
2260out: 2177out:
2261 if (page) 2178 if (page)
2262 __free_page(page); 2179 __free_page(page);
2263 if (locations) 2180 kfree(locations);
2264 kfree(locations);
2265 return status; 2181 return status;
2266} 2182}
2267 2183
@@ -2554,36 +2470,34 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
2554 2470
2555static int 2471static int
2556nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 2472nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2557 int flags, struct nameidata *nd) 2473 int flags, struct nfs_open_context *ctx)
2558{ 2474{
2559 struct path path = { 2475 struct path my_path = {
2560 .mnt = nd->path.mnt,
2561 .dentry = dentry, 2476 .dentry = dentry,
2562 }; 2477 };
2478 struct path *path = &my_path;
2563 struct nfs4_state *state; 2479 struct nfs4_state *state;
2564 struct rpc_cred *cred; 2480 struct rpc_cred *cred = NULL;
2565 fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE); 2481 fmode_t fmode = 0;
2566 int status = 0; 2482 int status = 0;
2567 2483
2568 cred = rpc_lookup_cred(); 2484 if (ctx != NULL) {
2569 if (IS_ERR(cred)) { 2485 cred = ctx->cred;
2570 status = PTR_ERR(cred); 2486 path = &ctx->path;
2571 goto out; 2487 fmode = ctx->mode;
2572 } 2488 }
2573 state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred); 2489 state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
2574 d_drop(dentry); 2490 d_drop(dentry);
2575 if (IS_ERR(state)) { 2491 if (IS_ERR(state)) {
2576 status = PTR_ERR(state); 2492 status = PTR_ERR(state);
2577 goto out_putcred; 2493 goto out;
2578 } 2494 }
2579 d_add(dentry, igrab(state->inode)); 2495 d_add(dentry, igrab(state->inode));
2580 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 2496 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2581 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) 2497 if (ctx != NULL)
2582 status = nfs4_intent_set_file(nd, &path, state, fmode); 2498 ctx->state = state;
2583 else 2499 else
2584 nfs4_close_sync(&path, state, fmode); 2500 nfs4_close_sync(path, state, fmode);
2585out_putcred:
2586 put_rpccred(cred);
2587out: 2501out:
2588 return status; 2502 return status;
2589} 2503}
@@ -2641,6 +2555,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2641 2555
2642 args->bitmask = server->cache_consistency_bitmask; 2556 args->bitmask = server->cache_consistency_bitmask;
2643 res->server = server; 2557 res->server = server;
2558 res->seq_res.sr_slot = NULL;
2644 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2559 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2645} 2560}
2646 2561
@@ -2648,7 +2563,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2648{ 2563{
2649 struct nfs_removeres *res = task->tk_msg.rpc_resp; 2564 struct nfs_removeres *res = task->tk_msg.rpc_resp;
2650 2565
2651 nfs4_sequence_done(res->server, &res->seq_res, task->tk_status); 2566 if (!nfs4_sequence_done(task, &res->seq_res))
2567 return 0;
2652 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) 2568 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2653 return 0; 2569 return 0;
2654 update_changeattr(dir, &res->cinfo); 2570 update_changeattr(dir, &res->cinfo);
@@ -2656,18 +2572,46 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2656 return 1; 2572 return 1;
2657} 2573}
2658 2574
2575static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
2576{
2577 struct nfs_server *server = NFS_SERVER(dir);
2578 struct nfs_renameargs *arg = msg->rpc_argp;
2579 struct nfs_renameres *res = msg->rpc_resp;
2580
2581 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
2582 arg->bitmask = server->attr_bitmask;
2583 res->server = server;
2584}
2585
2586static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
2587 struct inode *new_dir)
2588{
2589 struct nfs_renameres *res = task->tk_msg.rpc_resp;
2590
2591 if (!nfs4_sequence_done(task, &res->seq_res))
2592 return 0;
2593 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2594 return 0;
2595
2596 update_changeattr(old_dir, &res->old_cinfo);
2597 nfs_post_op_update_inode(old_dir, res->old_fattr);
2598 update_changeattr(new_dir, &res->new_cinfo);
2599 nfs_post_op_update_inode(new_dir, res->new_fattr);
2600 return 1;
2601}
2602
2659static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, 2603static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2660 struct inode *new_dir, struct qstr *new_name) 2604 struct inode *new_dir, struct qstr *new_name)
2661{ 2605{
2662 struct nfs_server *server = NFS_SERVER(old_dir); 2606 struct nfs_server *server = NFS_SERVER(old_dir);
2663 struct nfs4_rename_arg arg = { 2607 struct nfs_renameargs arg = {
2664 .old_dir = NFS_FH(old_dir), 2608 .old_dir = NFS_FH(old_dir),
2665 .new_dir = NFS_FH(new_dir), 2609 .new_dir = NFS_FH(new_dir),
2666 .old_name = old_name, 2610 .old_name = old_name,
2667 .new_name = new_name, 2611 .new_name = new_name,
2668 .bitmask = server->attr_bitmask, 2612 .bitmask = server->attr_bitmask,
2669 }; 2613 };
2670 struct nfs4_rename_res res = { 2614 struct nfs_renameres res = {
2671 .server = server, 2615 .server = server,
2672 }; 2616 };
2673 struct rpc_message msg = { 2617 struct rpc_message msg = {
@@ -2881,15 +2825,16 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
2881} 2825}
2882 2826
2883static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 2827static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2884 u64 cookie, struct page *page, unsigned int count, int plus) 2828 u64 cookie, struct page **pages, unsigned int count, int plus)
2885{ 2829{
2886 struct inode *dir = dentry->d_inode; 2830 struct inode *dir = dentry->d_inode;
2887 struct nfs4_readdir_arg args = { 2831 struct nfs4_readdir_arg args = {
2888 .fh = NFS_FH(dir), 2832 .fh = NFS_FH(dir),
2889 .pages = &page, 2833 .pages = pages,
2890 .pgbase = 0, 2834 .pgbase = 0,
2891 .count = count, 2835 .count = count,
2892 .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, 2836 .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
2837 .plus = plus,
2893 }; 2838 };
2894 struct nfs4_readdir_res res; 2839 struct nfs4_readdir_res res;
2895 struct rpc_message msg = { 2840 struct rpc_message msg = {
@@ -2917,14 +2862,14 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2917} 2862}
2918 2863
2919static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 2864static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2920 u64 cookie, struct page *page, unsigned int count, int plus) 2865 u64 cookie, struct page **pages, unsigned int count, int plus)
2921{ 2866{
2922 struct nfs4_exception exception = { }; 2867 struct nfs4_exception exception = { };
2923 int err; 2868 int err;
2924 do { 2869 do {
2925 err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), 2870 err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
2926 _nfs4_proc_readdir(dentry, cred, cookie, 2871 _nfs4_proc_readdir(dentry, cred, cookie,
2927 page, count, plus), 2872 pages, count, plus),
2928 &exception); 2873 &exception);
2929 } while (exception.retry); 2874 } while (exception.retry);
2930 return err; 2875 return err;
@@ -3093,7 +3038,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3093 3038
3094 dprintk("--> %s\n", __func__); 3039 dprintk("--> %s\n", __func__);
3095 3040
3096 nfs4_sequence_done(server, &data->res.seq_res, task->tk_status); 3041 if (!nfs4_sequence_done(task, &data->res.seq_res))
3042 return -EAGAIN;
3097 3043
3098 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3044 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3099 nfs_restart_rpc(task, server->nfs_client); 3045 nfs_restart_rpc(task, server->nfs_client);
@@ -3116,8 +3062,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3116{ 3062{
3117 struct inode *inode = data->inode; 3063 struct inode *inode = data->inode;
3118 3064
3119 nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, 3065 if (!nfs4_sequence_done(task, &data->res.seq_res))
3120 task->tk_status); 3066 return -EAGAIN;
3121 3067
3122 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3068 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3123 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3069 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
@@ -3145,8 +3091,9 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3145{ 3091{
3146 struct inode *inode = data->inode; 3092 struct inode *inode = data->inode;
3147 3093
3148 nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, 3094 if (!nfs4_sequence_done(task, &data->res.seq_res))
3149 task->tk_status); 3095 return -EAGAIN;
3096
3150 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { 3097 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
3151 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3098 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
3152 return -EAGAIN; 3099 return -EAGAIN;
@@ -3196,10 +3143,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3196 nfs4_schedule_state_recovery(clp); 3143 nfs4_schedule_state_recovery(clp);
3197 return; 3144 return;
3198 } 3145 }
3199 spin_lock(&clp->cl_lock); 3146 do_renew_lease(clp, timestamp);
3200 if (time_before(clp->cl_last_renewal,timestamp))
3201 clp->cl_last_renewal = timestamp;
3202 spin_unlock(&clp->cl_lock);
3203} 3147}
3204 3148
3205static const struct rpc_call_ops nfs4_renew_ops = { 3149static const struct rpc_call_ops nfs4_renew_ops = {
@@ -3240,10 +3184,7 @@ int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
3240 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 3184 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
3241 if (status < 0) 3185 if (status < 0)
3242 return status; 3186 return status;
3243 spin_lock(&clp->cl_lock); 3187 do_renew_lease(clp, now);
3244 if (time_before(clp->cl_last_renewal,now))
3245 clp->cl_last_renewal = now;
3246 spin_unlock(&clp->cl_lock);
3247 return 0; 3188 return 0;
3248} 3189}
3249 3190
@@ -3464,9 +3405,11 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
3464} 3405}
3465 3406
3466static int 3407static int
3467_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state) 3408nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
3468{ 3409{
3469 if (!clp || task->tk_status >= 0) 3410 struct nfs_client *clp = server->nfs_client;
3411
3412 if (task->tk_status >= 0)
3470 return 0; 3413 return 0;
3471 switch(task->tk_status) { 3414 switch(task->tk_status) {
3472 case -NFS4ERR_ADMIN_REVOKED: 3415 case -NFS4ERR_ADMIN_REVOKED:
@@ -3477,9 +3420,6 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3477 nfs4_state_mark_reclaim_nograce(clp, state); 3420 nfs4_state_mark_reclaim_nograce(clp, state);
3478 goto do_state_recovery; 3421 goto do_state_recovery;
3479 case -NFS4ERR_STALE_STATEID: 3422 case -NFS4ERR_STALE_STATEID:
3480 if (state == NULL)
3481 break;
3482 nfs4_state_mark_reclaim_reboot(clp, state);
3483 case -NFS4ERR_STALE_CLIENTID: 3423 case -NFS4ERR_STALE_CLIENTID:
3484 case -NFS4ERR_EXPIRED: 3424 case -NFS4ERR_EXPIRED:
3485 goto do_state_recovery; 3425 goto do_state_recovery;
@@ -3498,8 +3438,7 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3498 return -EAGAIN; 3438 return -EAGAIN;
3499#endif /* CONFIG_NFS_V4_1 */ 3439#endif /* CONFIG_NFS_V4_1 */
3500 case -NFS4ERR_DELAY: 3440 case -NFS4ERR_DELAY:
3501 if (server) 3441 nfs_inc_server_stats(server, NFSIOS_DELAY);
3502 nfs_inc_server_stats(server, NFSIOS_DELAY);
3503 case -NFS4ERR_GRACE: 3442 case -NFS4ERR_GRACE:
3504 case -EKEYEXPIRED: 3443 case -EKEYEXPIRED:
3505 rpc_delay(task, NFS4_POLL_RETRY_MAX); 3444 rpc_delay(task, NFS4_POLL_RETRY_MAX);
@@ -3520,12 +3459,6 @@ do_state_recovery:
3520 return -EAGAIN; 3459 return -EAGAIN;
3521} 3460}
3522 3461
3523static int
3524nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
3525{
3526 return _nfs4_async_handle_error(task, server, server->nfs_client, state);
3527}
3528
3529int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, 3462int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3530 unsigned short port, struct rpc_cred *cred, 3463 unsigned short port, struct rpc_cred *cred,
3531 struct nfs4_setclientid_res *res) 3464 struct nfs4_setclientid_res *res)
@@ -3620,7 +3553,6 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3620 case -NFS4ERR_RESOURCE: 3553 case -NFS4ERR_RESOURCE:
3621 /* The IBM lawyers misread another document! */ 3554 /* The IBM lawyers misread another document! */
3622 case -NFS4ERR_DELAY: 3555 case -NFS4ERR_DELAY:
3623 case -EKEYEXPIRED:
3624 err = nfs4_delay(clp->cl_rpcclient, &timeout); 3556 err = nfs4_delay(clp->cl_rpcclient, &timeout);
3625 } 3557 }
3626 } while (err == 0); 3558 } while (err == 0);
@@ -3641,8 +3573,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
3641{ 3573{
3642 struct nfs4_delegreturndata *data = calldata; 3574 struct nfs4_delegreturndata *data = calldata;
3643 3575
3644 nfs4_sequence_done(data->res.server, &data->res.seq_res, 3576 if (!nfs4_sequence_done(task, &data->res.seq_res))
3645 task->tk_status); 3577 return;
3646 3578
3647 switch (task->tk_status) { 3579 switch (task->tk_status) {
3648 case -NFS4ERR_STALE_STATEID: 3580 case -NFS4ERR_STALE_STATEID:
@@ -3672,7 +3604,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
3672 3604
3673 d_data = (struct nfs4_delegreturndata *)data; 3605 d_data = (struct nfs4_delegreturndata *)data;
3674 3606
3675 if (nfs4_setup_sequence(d_data->res.server->nfs_client, 3607 if (nfs4_setup_sequence(d_data->res.server,
3676 &d_data->args.seq_args, 3608 &d_data->args.seq_args,
3677 &d_data->res.seq_res, 1, task)) 3609 &d_data->res.seq_res, 1, task))
3678 return; 3610 return;
@@ -3715,7 +3647,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3715 memcpy(&data->stateid, stateid, sizeof(data->stateid)); 3647 memcpy(&data->stateid, stateid, sizeof(data->stateid));
3716 data->res.fattr = &data->fattr; 3648 data->res.fattr = &data->fattr;
3717 data->res.server = server; 3649 data->res.server = server;
3718 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3719 nfs_fattr_init(data->res.fattr); 3650 nfs_fattr_init(data->res.fattr);
3720 data->timestamp = jiffies; 3651 data->timestamp = jiffies;
3721 data->rpc_status = 0; 3652 data->rpc_status = 0;
@@ -3868,7 +3799,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
3868 p->arg.fl = &p->fl; 3799 p->arg.fl = &p->fl;
3869 p->arg.seqid = seqid; 3800 p->arg.seqid = seqid;
3870 p->res.seqid = seqid; 3801 p->res.seqid = seqid;
3871 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3872 p->arg.stateid = &lsp->ls_stateid; 3802 p->arg.stateid = &lsp->ls_stateid;
3873 p->lsp = lsp; 3803 p->lsp = lsp;
3874 atomic_inc(&lsp->ls_count); 3804 atomic_inc(&lsp->ls_count);
@@ -3892,9 +3822,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
3892{ 3822{
3893 struct nfs4_unlockdata *calldata = data; 3823 struct nfs4_unlockdata *calldata = data;
3894 3824
3895 nfs4_sequence_done(calldata->server, &calldata->res.seq_res, 3825 if (!nfs4_sequence_done(task, &calldata->res.seq_res))
3896 task->tk_status);
3897 if (RPC_ASSASSINATED(task))
3898 return; 3826 return;
3899 switch (task->tk_status) { 3827 switch (task->tk_status) {
3900 case 0: 3828 case 0:
@@ -3927,7 +3855,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
3927 return; 3855 return;
3928 } 3856 }
3929 calldata->timestamp = jiffies; 3857 calldata->timestamp = jiffies;
3930 if (nfs4_setup_sequence(calldata->server->nfs_client, 3858 if (nfs4_setup_sequence(calldata->server,
3931 &calldata->arg.seq_args, 3859 &calldata->arg.seq_args,
3932 &calldata->res.seq_res, 1, task)) 3860 &calldata->res.seq_res, 1, task))
3933 return; 3861 return;
@@ -4050,7 +3978,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
4050 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 3978 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
4051 p->arg.lock_owner.id = lsp->ls_id.id; 3979 p->arg.lock_owner.id = lsp->ls_id.id;
4052 p->res.lock_seqid = p->arg.lock_seqid; 3980 p->res.lock_seqid = p->arg.lock_seqid;
4053 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
4054 p->lsp = lsp; 3981 p->lsp = lsp;
4055 p->server = server; 3982 p->server = server;
4056 atomic_inc(&lsp->ls_count); 3983 atomic_inc(&lsp->ls_count);
@@ -4082,7 +4009,8 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4082 } else 4009 } else
4083 data->arg.new_lock_owner = 0; 4010 data->arg.new_lock_owner = 0;
4084 data->timestamp = jiffies; 4011 data->timestamp = jiffies;
4085 if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args, 4012 if (nfs4_setup_sequence(data->server,
4013 &data->arg.seq_args,
4086 &data->res.seq_res, 1, task)) 4014 &data->res.seq_res, 1, task))
4087 return; 4015 return;
4088 rpc_call_start(task); 4016 rpc_call_start(task);
@@ -4101,12 +4029,10 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
4101 4029
4102 dprintk("%s: begin!\n", __func__); 4030 dprintk("%s: begin!\n", __func__);
4103 4031
4104 nfs4_sequence_done(data->server, &data->res.seq_res, 4032 if (!nfs4_sequence_done(task, &data->res.seq_res))
4105 task->tk_status); 4033 return;
4106 4034
4107 data->rpc_status = task->tk_status; 4035 data->rpc_status = task->tk_status;
4108 if (RPC_ASSASSINATED(task))
4109 goto out;
4110 if (data->arg.new_lock_owner != 0) { 4036 if (data->arg.new_lock_owner != 0) {
4111 if (data->rpc_status == 0) 4037 if (data->rpc_status == 0)
4112 nfs_confirm_seqid(&data->lsp->ls_seqid, 0); 4038 nfs_confirm_seqid(&data->lsp->ls_seqid, 0);
@@ -4238,7 +4164,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4238 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4164 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4239 return 0; 4165 return 0;
4240 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); 4166 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
4241 if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) 4167 if (err != -NFS4ERR_DELAY)
4242 break; 4168 break;
4243 nfs4_handle_exception(server, err, &exception); 4169 nfs4_handle_exception(server, err, &exception);
4244 } while (exception.retry); 4170 } while (exception.retry);
@@ -4263,7 +4189,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
4263 goto out; 4189 goto out;
4264 case -NFS4ERR_GRACE: 4190 case -NFS4ERR_GRACE:
4265 case -NFS4ERR_DELAY: 4191 case -NFS4ERR_DELAY:
4266 case -EKEYEXPIRED:
4267 nfs4_handle_exception(server, err, &exception); 4192 nfs4_handle_exception(server, err, &exception);
4268 err = 0; 4193 err = 0;
4269 } 4194 }
@@ -4409,13 +4334,21 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4409 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4334 nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
4410 err = 0; 4335 err = 0;
4411 goto out; 4336 goto out;
4337 case -EKEYEXPIRED:
4338 /*
4339 * User RPCSEC_GSS context has expired.
4340 * We cannot recover this stateid now, so
4341 * skip it and allow recovery thread to
4342 * proceed.
4343 */
4344 err = 0;
4345 goto out;
4412 case -ENOMEM: 4346 case -ENOMEM:
4413 case -NFS4ERR_DENIED: 4347 case -NFS4ERR_DENIED:
4414 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 4348 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
4415 err = 0; 4349 err = 0;
4416 goto out; 4350 goto out;
4417 case -NFS4ERR_DELAY: 4351 case -NFS4ERR_DELAY:
4418 case -EKEYEXPIRED:
4419 break; 4352 break;
4420 } 4353 }
4421 err = nfs4_handle_exception(server, err, &exception); 4354 err = nfs4_handle_exception(server, err, &exception);
@@ -4424,6 +4357,34 @@ out:
4424 return err; 4357 return err;
4425} 4358}
4426 4359
4360static void nfs4_release_lockowner_release(void *calldata)
4361{
4362 kfree(calldata);
4363}
4364
4365const struct rpc_call_ops nfs4_release_lockowner_ops = {
4366 .rpc_release = nfs4_release_lockowner_release,
4367};
4368
4369void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
4370{
4371 struct nfs_server *server = lsp->ls_state->owner->so_server;
4372 struct nfs_release_lockowner_args *args;
4373 struct rpc_message msg = {
4374 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
4375 };
4376
4377 if (server->nfs_client->cl_mvops->minor_version != 0)
4378 return;
4379 args = kmalloc(sizeof(*args), GFP_NOFS);
4380 if (!args)
4381 return;
4382 args->lock_owner.clientid = server->nfs_client->cl_clientid;
4383 args->lock_owner.id = lsp->ls_id.id;
4384 msg.rpc_argp = args;
4385 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
4386}
4387
4427#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" 4388#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
4428 4389
4429int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, 4390int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
@@ -4611,11 +4572,11 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4611 (struct nfs4_get_lease_time_data *)calldata; 4572 (struct nfs4_get_lease_time_data *)calldata;
4612 4573
4613 dprintk("--> %s\n", __func__); 4574 dprintk("--> %s\n", __func__);
4614 nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status); 4575 if (!nfs41_sequence_done(task, &data->res->lr_seq_res))
4576 return;
4615 switch (task->tk_status) { 4577 switch (task->tk_status) {
4616 case -NFS4ERR_DELAY: 4578 case -NFS4ERR_DELAY:
4617 case -NFS4ERR_GRACE: 4579 case -NFS4ERR_GRACE:
4618 case -EKEYEXPIRED:
4619 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); 4580 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
4620 rpc_delay(task, NFS4_POLL_RETRY_MIN); 4581 rpc_delay(task, NFS4_POLL_RETRY_MIN);
4621 task->tk_status = 0; 4582 task->tk_status = 0;
@@ -4655,7 +4616,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4655 }; 4616 };
4656 int status; 4617 int status;
4657 4618
4658 res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
4659 dprintk("--> %s\n", __func__); 4619 dprintk("--> %s\n", __func__);
4660 task = rpc_run_task(&task_setup); 4620 task = rpc_run_task(&task_setup);
4661 4621
@@ -4805,13 +4765,6 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4805 if (!session) 4765 if (!session)
4806 return NULL; 4766 return NULL;
4807 4767
4808 /*
4809 * The create session reply races with the server back
4810 * channel probe. Mark the client NFS_CS_SESSION_INITING
4811 * so that the client back channel can find the
4812 * nfs_client struct
4813 */
4814 clp->cl_cons_state = NFS_CS_SESSION_INITING;
4815 init_completion(&session->complete); 4768 init_completion(&session->complete);
4816 4769
4817 tbl = &session->fc_slot_table; 4770 tbl = &session->fc_slot_table;
@@ -4824,6 +4777,8 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4824 spin_lock_init(&tbl->slot_tbl_lock); 4777 spin_lock_init(&tbl->slot_tbl_lock);
4825 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); 4778 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
4826 4779
4780 session->session_state = 1<<NFS4_SESSION_INITING;
4781
4827 session->clp = clp; 4782 session->clp = clp;
4828 return session; 4783 return session;
4829} 4784}
@@ -4887,49 +4842,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
4887 args->bc_attrs.max_reqs); 4842 args->bc_attrs.max_reqs);
4888} 4843}
4889 4844
4890static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) 4845static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4891{ 4846{
4892 if (rcvd <= sent) 4847 struct nfs4_channel_attrs *sent = &args->fc_attrs;
4893 return 0; 4848 struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
4894 printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " 4849
4895 "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); 4850 if (rcvd->headerpadsz > sent->headerpadsz)
4896 return -EINVAL; 4851 return -EINVAL;
4852 if (rcvd->max_resp_sz > sent->max_resp_sz)
4853 return -EINVAL;
4854 /*
4855 * Our requested max_ops is the minimum we need; we're not
4856 * prepared to break up compounds into smaller pieces than that.
4857 * So, no point even trying to continue if the server won't
4858 * cooperate:
4859 */
4860 if (rcvd->max_ops < sent->max_ops)
4861 return -EINVAL;
4862 if (rcvd->max_reqs == 0)
4863 return -EINVAL;
4864 return 0;
4897} 4865}
4898 4866
4899#define _verify_fore_channel_attr(_name_) \ 4867static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4900 _verify_channel_attr("fore", #_name_, \ 4868{
4901 args->fc_attrs._name_, \ 4869 struct nfs4_channel_attrs *sent = &args->bc_attrs;
4902 session->fc_attrs._name_) 4870 struct nfs4_channel_attrs *rcvd = &session->bc_attrs;
4903 4871
4904#define _verify_back_channel_attr(_name_) \ 4872 if (rcvd->max_rqst_sz > sent->max_rqst_sz)
4905 _verify_channel_attr("back", #_name_, \ 4873 return -EINVAL;
4906 args->bc_attrs._name_, \ 4874 if (rcvd->max_resp_sz < sent->max_resp_sz)
4907 session->bc_attrs._name_) 4875 return -EINVAL;
4876 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
4877 return -EINVAL;
4878 /* These would render the backchannel useless: */
4879 if (rcvd->max_ops == 0)
4880 return -EINVAL;
4881 if (rcvd->max_reqs == 0)
4882 return -EINVAL;
4883 return 0;
4884}
4908 4885
4909/*
4910 * The server is not allowed to increase the fore channel header pad size,
4911 * maximum response size, or maximum number of operations.
4912 *
4913 * The back channel attributes are only negotiatied down: We send what the
4914 * (back channel) server insists upon.
4915 */
4916static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, 4886static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
4917 struct nfs4_session *session) 4887 struct nfs4_session *session)
4918{ 4888{
4919 int ret = 0; 4889 int ret;
4920
4921 ret |= _verify_fore_channel_attr(headerpadsz);
4922 ret |= _verify_fore_channel_attr(max_resp_sz);
4923 ret |= _verify_fore_channel_attr(max_ops);
4924
4925 ret |= _verify_back_channel_attr(headerpadsz);
4926 ret |= _verify_back_channel_attr(max_rqst_sz);
4927 ret |= _verify_back_channel_attr(max_resp_sz);
4928 ret |= _verify_back_channel_attr(max_resp_sz_cached);
4929 ret |= _verify_back_channel_attr(max_ops);
4930 ret |= _verify_back_channel_attr(max_reqs);
4931 4890
4932 return ret; 4891 ret = nfs4_verify_fore_channel_attrs(args, session);
4892 if (ret)
4893 return ret;
4894 return nfs4_verify_back_channel_attrs(args, session);
4933} 4895}
4934 4896
4935static int _nfs4_proc_create_session(struct nfs_client *clp) 4897static int _nfs4_proc_create_session(struct nfs_client *clp)
@@ -5040,6 +5002,10 @@ int nfs4_init_session(struct nfs_server *server)
5040 if (!nfs4_has_session(clp)) 5002 if (!nfs4_has_session(clp))
5041 return 0; 5003 return 0;
5042 5004
5005 session = clp->cl_session;
5006 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
5007 return 0;
5008
5043 rsize = server->rsize; 5009 rsize = server->rsize;
5044 if (rsize == 0) 5010 if (rsize == 0)
5045 rsize = NFS_MAX_FILE_IO_SIZE; 5011 rsize = NFS_MAX_FILE_IO_SIZE;
@@ -5047,7 +5013,6 @@ int nfs4_init_session(struct nfs_server *server)
5047 if (wsize == 0) 5013 if (wsize == 0)
5048 wsize = NFS_MAX_FILE_IO_SIZE; 5014 wsize = NFS_MAX_FILE_IO_SIZE;
5049 5015
5050 session = clp->cl_session;
5051 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; 5016 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
5052 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; 5017 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
5053 5018
@@ -5060,69 +5025,69 @@ int nfs4_init_session(struct nfs_server *server)
5060/* 5025/*
5061 * Renew the cl_session lease. 5026 * Renew the cl_session lease.
5062 */ 5027 */
5063static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) 5028struct nfs4_sequence_data {
5064{ 5029 struct nfs_client *clp;
5065 struct nfs4_sequence_args args; 5030 struct nfs4_sequence_args args;
5066 struct nfs4_sequence_res res; 5031 struct nfs4_sequence_res res;
5067 5032};
5068 struct rpc_message msg = {
5069 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
5070 .rpc_argp = &args,
5071 .rpc_resp = &res,
5072 .rpc_cred = cred,
5073 };
5074
5075 args.sa_cache_this = 0;
5076
5077 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
5078 &res, args.sa_cache_this, 1);
5079}
5080 5033
5081static void nfs41_sequence_release(void *data) 5034static void nfs41_sequence_release(void *data)
5082{ 5035{
5083 struct nfs_client *clp = (struct nfs_client *)data; 5036 struct nfs4_sequence_data *calldata = data;
5037 struct nfs_client *clp = calldata->clp;
5084 5038
5085 if (atomic_read(&clp->cl_count) > 1) 5039 if (atomic_read(&clp->cl_count) > 1)
5086 nfs4_schedule_state_renewal(clp); 5040 nfs4_schedule_state_renewal(clp);
5087 nfs_put_client(clp); 5041 nfs_put_client(clp);
5042 kfree(calldata);
5043}
5044
5045static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client *clp)
5046{
5047 switch(task->tk_status) {
5048 case -NFS4ERR_DELAY:
5049 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5050 return -EAGAIN;
5051 default:
5052 nfs4_schedule_state_recovery(clp);
5053 }
5054 return 0;
5088} 5055}
5089 5056
5090static void nfs41_sequence_call_done(struct rpc_task *task, void *data) 5057static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
5091{ 5058{
5092 struct nfs_client *clp = (struct nfs_client *)data; 5059 struct nfs4_sequence_data *calldata = data;
5060 struct nfs_client *clp = calldata->clp;
5093 5061
5094 nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status); 5062 if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp))
5063 return;
5095 5064
5096 if (task->tk_status < 0) { 5065 if (task->tk_status < 0) {
5097 dprintk("%s ERROR %d\n", __func__, task->tk_status); 5066 dprintk("%s ERROR %d\n", __func__, task->tk_status);
5098 if (atomic_read(&clp->cl_count) == 1) 5067 if (atomic_read(&clp->cl_count) == 1)
5099 goto out; 5068 goto out;
5100 5069
5101 if (_nfs4_async_handle_error(task, NULL, clp, NULL) 5070 if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) {
5102 == -EAGAIN) { 5071 rpc_restart_call_prepare(task);
5103 nfs_restart_rpc(task, clp);
5104 return; 5072 return;
5105 } 5073 }
5106 } 5074 }
5107 dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); 5075 dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
5108out: 5076out:
5109 kfree(task->tk_msg.rpc_argp);
5110 kfree(task->tk_msg.rpc_resp);
5111
5112 dprintk("<-- %s\n", __func__); 5077 dprintk("<-- %s\n", __func__);
5113} 5078}
5114 5079
5115static void nfs41_sequence_prepare(struct rpc_task *task, void *data) 5080static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
5116{ 5081{
5117 struct nfs_client *clp; 5082 struct nfs4_sequence_data *calldata = data;
5083 struct nfs_client *clp = calldata->clp;
5118 struct nfs4_sequence_args *args; 5084 struct nfs4_sequence_args *args;
5119 struct nfs4_sequence_res *res; 5085 struct nfs4_sequence_res *res;
5120 5086
5121 clp = (struct nfs_client *)data;
5122 args = task->tk_msg.rpc_argp; 5087 args = task->tk_msg.rpc_argp;
5123 res = task->tk_msg.rpc_resp; 5088 res = task->tk_msg.rpc_resp;
5124 5089
5125 if (nfs4_setup_sequence(clp, args, res, 0, task)) 5090 if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task))
5126 return; 5091 return;
5127 rpc_call_start(task); 5092 rpc_call_start(task);
5128} 5093}
@@ -5133,32 +5098,66 @@ static const struct rpc_call_ops nfs41_sequence_ops = {
5133 .rpc_release = nfs41_sequence_release, 5098 .rpc_release = nfs41_sequence_release,
5134}; 5099};
5135 5100
5136static int nfs41_proc_async_sequence(struct nfs_client *clp, 5101static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5137 struct rpc_cred *cred)
5138{ 5102{
5139 struct nfs4_sequence_args *args; 5103 struct nfs4_sequence_data *calldata;
5140 struct nfs4_sequence_res *res;
5141 struct rpc_message msg = { 5104 struct rpc_message msg = {
5142 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], 5105 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
5143 .rpc_cred = cred, 5106 .rpc_cred = cred,
5144 }; 5107 };
5108 struct rpc_task_setup task_setup_data = {
5109 .rpc_client = clp->cl_rpcclient,
5110 .rpc_message = &msg,
5111 .callback_ops = &nfs41_sequence_ops,
5112 .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
5113 };
5145 5114
5146 if (!atomic_inc_not_zero(&clp->cl_count)) 5115 if (!atomic_inc_not_zero(&clp->cl_count))
5147 return -EIO; 5116 return ERR_PTR(-EIO);
5148 args = kzalloc(sizeof(*args), GFP_NOFS); 5117 calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
5149 res = kzalloc(sizeof(*res), GFP_NOFS); 5118 if (calldata == NULL) {
5150 if (!args || !res) {
5151 kfree(args);
5152 kfree(res);
5153 nfs_put_client(clp); 5119 nfs_put_client(clp);
5154 return -ENOMEM; 5120 return ERR_PTR(-ENOMEM);
5155 } 5121 }
5156 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 5122 msg.rpc_argp = &calldata->args;
5157 msg.rpc_argp = args; 5123 msg.rpc_resp = &calldata->res;
5158 msg.rpc_resp = res; 5124 calldata->clp = clp;
5125 task_setup_data.callback_data = calldata;
5159 5126
5160 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, 5127 return rpc_run_task(&task_setup_data);
5161 &nfs41_sequence_ops, (void *)clp); 5128}
5129
5130static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5131{
5132 struct rpc_task *task;
5133 int ret = 0;
5134
5135 task = _nfs41_proc_sequence(clp, cred);
5136 if (IS_ERR(task))
5137 ret = PTR_ERR(task);
5138 else
5139 rpc_put_task(task);
5140 dprintk("<-- %s status=%d\n", __func__, ret);
5141 return ret;
5142}
5143
5144static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5145{
5146 struct rpc_task *task;
5147 int ret;
5148
5149 task = _nfs41_proc_sequence(clp, cred);
5150 if (IS_ERR(task)) {
5151 ret = PTR_ERR(task);
5152 goto out;
5153 }
5154 ret = rpc_wait_for_completion_task(task);
5155 if (!ret)
5156 ret = task->tk_status;
5157 rpc_put_task(task);
5158out:
5159 dprintk("<-- %s status=%d\n", __func__, ret);
5160 return ret;
5162} 5161}
5163 5162
5164struct nfs4_reclaim_complete_data { 5163struct nfs4_reclaim_complete_data {
@@ -5172,13 +5171,30 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
5172 struct nfs4_reclaim_complete_data *calldata = data; 5171 struct nfs4_reclaim_complete_data *calldata = data;
5173 5172
5174 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); 5173 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
5175 if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args, 5174 if (nfs41_setup_sequence(calldata->clp->cl_session,
5175 &calldata->arg.seq_args,
5176 &calldata->res.seq_res, 0, task)) 5176 &calldata->res.seq_res, 0, task))
5177 return; 5177 return;
5178 5178
5179 rpc_call_start(task); 5179 rpc_call_start(task);
5180} 5180}
5181 5181
5182static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp)
5183{
5184 switch(task->tk_status) {
5185 case 0:
5186 case -NFS4ERR_COMPLETE_ALREADY:
5187 case -NFS4ERR_WRONG_CRED: /* What to do here? */
5188 break;
5189 case -NFS4ERR_DELAY:
5190 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5191 return -EAGAIN;
5192 default:
5193 nfs4_schedule_state_recovery(clp);
5194 }
5195 return 0;
5196}
5197
5182static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) 5198static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
5183{ 5199{
5184 struct nfs4_reclaim_complete_data *calldata = data; 5200 struct nfs4_reclaim_complete_data *calldata = data;
@@ -5186,32 +5202,13 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
5186 struct nfs4_sequence_res *res = &calldata->res.seq_res; 5202 struct nfs4_sequence_res *res = &calldata->res.seq_res;
5187 5203
5188 dprintk("--> %s\n", __func__); 5204 dprintk("--> %s\n", __func__);
5189 nfs41_sequence_done(clp, res, task->tk_status); 5205 if (!nfs41_sequence_done(task, res))
5190 switch (task->tk_status) { 5206 return;
5191 case 0:
5192 case -NFS4ERR_COMPLETE_ALREADY:
5193 break;
5194 case -NFS4ERR_BADSESSION:
5195 case -NFS4ERR_DEADSESSION:
5196 /*
5197 * Handle the session error, but do not retry the operation, as
5198 * we have no way of telling whether the clientid had to be
5199 * reset before we got our reply. If reset, a new wave of
5200 * reclaim operations will follow, containing their own reclaim
5201 * complete. We don't want our retry to get on the way of
5202 * recovery by incorrectly indicating to the server that we're
5203 * done reclaiming state since the process had to be restarted.
5204 */
5205 _nfs4_async_handle_error(task, NULL, clp, NULL);
5206 break;
5207 default:
5208 if (_nfs4_async_handle_error(
5209 task, NULL, clp, NULL) == -EAGAIN) {
5210 rpc_restart_call_prepare(task);
5211 return;
5212 }
5213 }
5214 5207
5208 if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) {
5209 rpc_restart_call_prepare(task);
5210 return;
5211 }
5215 dprintk("<-- %s\n", __func__); 5212 dprintk("<-- %s\n", __func__);
5216} 5213}
5217 5214
@@ -5252,7 +5249,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5252 goto out; 5249 goto out;
5253 calldata->clp = clp; 5250 calldata->clp = clp;
5254 calldata->arg.one_fs = 0; 5251 calldata->arg.one_fs = 0;
5255 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
5256 5252
5257 msg.rpc_argp = &calldata->arg; 5253 msg.rpc_argp = &calldata->arg;
5258 msg.rpc_resp = &calldata->res; 5254 msg.rpc_resp = &calldata->res;
@@ -5268,6 +5264,147 @@ out:
5268 dprintk("<-- %s status=%d\n", __func__, status); 5264 dprintk("<-- %s status=%d\n", __func__, status);
5269 return status; 5265 return status;
5270} 5266}
5267
5268static void
5269nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5270{
5271 struct nfs4_layoutget *lgp = calldata;
5272 struct inode *ino = lgp->args.inode;
5273 struct nfs_server *server = NFS_SERVER(ino);
5274
5275 dprintk("--> %s\n", __func__);
5276 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5277 &lgp->res.seq_res, 0, task))
5278 return;
5279 rpc_call_start(task);
5280}
5281
5282static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
5283{
5284 struct nfs4_layoutget *lgp = calldata;
5285 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5286
5287 dprintk("--> %s\n", __func__);
5288
5289 if (!nfs4_sequence_done(task, &lgp->res.seq_res))
5290 return;
5291
5292 switch (task->tk_status) {
5293 case 0:
5294 break;
5295 case -NFS4ERR_LAYOUTTRYLATER:
5296 case -NFS4ERR_RECALLCONFLICT:
5297 task->tk_status = -NFS4ERR_DELAY;
5298 /* Fall through */
5299 default:
5300 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5301 rpc_restart_call_prepare(task);
5302 return;
5303 }
5304 }
5305 lgp->status = task->tk_status;
5306 dprintk("<-- %s\n", __func__);
5307}
5308
5309static void nfs4_layoutget_release(void *calldata)
5310{
5311 struct nfs4_layoutget *lgp = calldata;
5312
5313 dprintk("--> %s\n", __func__);
5314 put_layout_hdr(lgp->args.inode);
5315 if (lgp->res.layout.buf != NULL)
5316 free_page((unsigned long) lgp->res.layout.buf);
5317 put_nfs_open_context(lgp->args.ctx);
5318 kfree(calldata);
5319 dprintk("<-- %s\n", __func__);
5320}
5321
5322static const struct rpc_call_ops nfs4_layoutget_call_ops = {
5323 .rpc_call_prepare = nfs4_layoutget_prepare,
5324 .rpc_call_done = nfs4_layoutget_done,
5325 .rpc_release = nfs4_layoutget_release,
5326};
5327
5328int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5329{
5330 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5331 struct rpc_task *task;
5332 struct rpc_message msg = {
5333 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
5334 .rpc_argp = &lgp->args,
5335 .rpc_resp = &lgp->res,
5336 };
5337 struct rpc_task_setup task_setup_data = {
5338 .rpc_client = server->client,
5339 .rpc_message = &msg,
5340 .callback_ops = &nfs4_layoutget_call_ops,
5341 .callback_data = lgp,
5342 .flags = RPC_TASK_ASYNC,
5343 };
5344 int status = 0;
5345
5346 dprintk("--> %s\n", __func__);
5347
5348 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
5349 if (lgp->res.layout.buf == NULL) {
5350 nfs4_layoutget_release(lgp);
5351 return -ENOMEM;
5352 }
5353
5354 lgp->res.seq_res.sr_slot = NULL;
5355 task = rpc_run_task(&task_setup_data);
5356 if (IS_ERR(task))
5357 return PTR_ERR(task);
5358 status = nfs4_wait_for_completion_rpc_task(task);
5359 if (status != 0)
5360 goto out;
5361 status = lgp->status;
5362 if (status != 0)
5363 goto out;
5364 status = pnfs_layout_process(lgp);
5365out:
5366 rpc_put_task(task);
5367 dprintk("<-- %s status=%d\n", __func__, status);
5368 return status;
5369}
5370
5371static int
5372_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5373{
5374 struct nfs4_getdeviceinfo_args args = {
5375 .pdev = pdev,
5376 };
5377 struct nfs4_getdeviceinfo_res res = {
5378 .pdev = pdev,
5379 };
5380 struct rpc_message msg = {
5381 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
5382 .rpc_argp = &args,
5383 .rpc_resp = &res,
5384 };
5385 int status;
5386
5387 dprintk("--> %s\n", __func__);
5388 status = nfs4_call_sync(server, &msg, &args, &res, 0);
5389 dprintk("<-- %s status=%d\n", __func__, status);
5390
5391 return status;
5392}
5393
5394int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5395{
5396 struct nfs4_exception exception = { };
5397 int err;
5398
5399 do {
5400 err = nfs4_handle_exception(server,
5401 _nfs4_proc_getdeviceinfo(server, pdev),
5402 &exception);
5403 } while (exception.retry);
5404 return err;
5405}
5406EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5407
5271#endif /* CONFIG_NFS_V4_1 */ 5408#endif /* CONFIG_NFS_V4_1 */
5272 5409
5273struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5410struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5325,28 +5462,30 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
5325}; 5462};
5326#endif 5463#endif
5327 5464
5328/* 5465static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
5329 * Per minor version reboot and network partition recovery ops 5466 .minor_version = 0,
5330 */ 5467 .call_sync = _nfs4_call_sync,
5331 5468 .validate_stateid = nfs4_validate_delegation_stateid,
5332struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = { 5469 .reboot_recovery_ops = &nfs40_reboot_recovery_ops,
5333 &nfs40_reboot_recovery_ops, 5470 .nograce_recovery_ops = &nfs40_nograce_recovery_ops,
5334#if defined(CONFIG_NFS_V4_1) 5471 .state_renewal_ops = &nfs40_state_renewal_ops,
5335 &nfs41_reboot_recovery_ops,
5336#endif
5337}; 5472};
5338 5473
5339struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = {
5340 &nfs40_nograce_recovery_ops,
5341#if defined(CONFIG_NFS_V4_1) 5474#if defined(CONFIG_NFS_V4_1)
5342 &nfs41_nograce_recovery_ops, 5475static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
5343#endif 5476 .minor_version = 1,
5477 .call_sync = _nfs4_call_sync_session,
5478 .validate_stateid = nfs41_validate_delegation_stateid,
5479 .reboot_recovery_ops = &nfs41_reboot_recovery_ops,
5480 .nograce_recovery_ops = &nfs41_nograce_recovery_ops,
5481 .state_renewal_ops = &nfs41_state_renewal_ops,
5344}; 5482};
5483#endif
5345 5484
5346struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = { 5485const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
5347 &nfs40_state_renewal_ops, 5486 [0] = &nfs_v4_0_minor_ops,
5348#if defined(CONFIG_NFS_V4_1) 5487#if defined(CONFIG_NFS_V4_1)
5349 &nfs41_state_renewal_ops, 5488 [1] = &nfs_v4_1_minor_ops,
5350#endif 5489#endif
5351}; 5490};
5352 5491
@@ -5376,6 +5515,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5376 .unlink_setup = nfs4_proc_unlink_setup, 5515 .unlink_setup = nfs4_proc_unlink_setup,
5377 .unlink_done = nfs4_proc_unlink_done, 5516 .unlink_done = nfs4_proc_unlink_done,
5378 .rename = nfs4_proc_rename, 5517 .rename = nfs4_proc_rename,
5518 .rename_setup = nfs4_proc_rename_setup,
5519 .rename_done = nfs4_proc_rename_done,
5379 .link = nfs4_proc_link, 5520 .link = nfs4_proc_link,
5380 .symlink = nfs4_proc_symlink, 5521 .symlink = nfs4_proc_symlink,
5381 .mkdir = nfs4_proc_mkdir, 5522 .mkdir = nfs4_proc_mkdir,
@@ -5396,6 +5537,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5396 .lock = nfs4_proc_lock, 5537 .lock = nfs4_proc_lock,
5397 .clear_acl_cache = nfs4_zap_acl_attr, 5538 .clear_acl_cache = nfs4_zap_acl_attr,
5398 .close_context = nfs4_close_context, 5539 .close_context = nfs4_close_context,
5540 .open_context = nfs4_atomic_open,
5399}; 5541};
5400 5542
5401/* 5543/*
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index d87f10327b72..72b6c580af13 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -54,14 +54,14 @@
54void 54void
55nfs4_renew_state(struct work_struct *work) 55nfs4_renew_state(struct work_struct *work)
56{ 56{
57 struct nfs4_state_maintenance_ops *ops; 57 const struct nfs4_state_maintenance_ops *ops;
58 struct nfs_client *clp = 58 struct nfs_client *clp =
59 container_of(work, struct nfs_client, cl_renewd.work); 59 container_of(work, struct nfs_client, cl_renewd.work);
60 struct rpc_cred *cred; 60 struct rpc_cred *cred;
61 long lease; 61 long lease;
62 unsigned long last, now; 62 unsigned long last, now;
63 63
64 ops = nfs4_state_renewal_ops[clp->cl_minorversion]; 64 ops = clp->cl_mvops->state_renewal_ops;
65 dprintk("%s: start\n", __func__); 65 dprintk("%s: start\n", __func__);
66 /* Are there any active superblocks? */ 66 /* Are there any active superblocks? */
67 if (list_empty(&clp->cl_superblocks)) 67 if (list_empty(&clp->cl_superblocks))
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 34acf5926fdc..f575a3126737 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -40,12 +40,13 @@
40 40
41#include <linux/kernel.h> 41#include <linux/kernel.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/smp_lock.h> 43#include <linux/fs.h>
44#include <linux/nfs_fs.h> 44#include <linux/nfs_fs.h>
45#include <linux/nfs_idmap.h> 45#include <linux/nfs_idmap.h>
46#include <linux/kthread.h> 46#include <linux/kthread.h>
47#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/random.h> 48#include <linux/random.h>
49#include <linux/ratelimit.h>
49#include <linux/workqueue.h> 50#include <linux/workqueue.h>
50#include <linux/bitops.h> 51#include <linux/bitops.h>
51 52
@@ -53,6 +54,7 @@
53#include "callback.h" 54#include "callback.h"
54#include "delegation.h" 55#include "delegation.h"
55#include "internal.h" 56#include "internal.h"
57#include "pnfs.h"
56 58
57#define OPENOWNER_POOL_SIZE 8 59#define OPENOWNER_POOL_SIZE 8
58 60
@@ -145,7 +147,9 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
145 struct nfs4_session *ses = clp->cl_session; 147 struct nfs4_session *ses = clp->cl_session;
146 int max_slots; 148 int max_slots;
147 149
148 if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) { 150 if (ses == NULL)
151 return;
152 if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
149 spin_lock(&ses->fc_slot_table.slot_tbl_lock); 153 spin_lock(&ses->fc_slot_table.slot_tbl_lock);
150 max_slots = ses->fc_slot_table.max_slots; 154 max_slots = ses->fc_slot_table.max_slots;
151 while (max_slots--) { 155 while (max_slots--) {
@@ -167,7 +171,7 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
167 struct nfs4_slot_table *tbl = &ses->fc_slot_table; 171 struct nfs4_slot_table *tbl = &ses->fc_slot_table;
168 172
169 spin_lock(&tbl->slot_tbl_lock); 173 spin_lock(&tbl->slot_tbl_lock);
170 set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); 174 set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
171 if (tbl->highest_used_slotid != -1) { 175 if (tbl->highest_used_slotid != -1) {
172 INIT_COMPLETION(ses->complete); 176 INIT_COMPLETION(ses->complete);
173 spin_unlock(&tbl->slot_tbl_lock); 177 spin_unlock(&tbl->slot_tbl_lock);
@@ -371,7 +375,6 @@ nfs4_alloc_state_owner(void)
371 return NULL; 375 return NULL;
372 spin_lock_init(&sp->so_lock); 376 spin_lock_init(&sp->so_lock);
373 INIT_LIST_HEAD(&sp->so_states); 377 INIT_LIST_HEAD(&sp->so_states);
374 INIT_LIST_HEAD(&sp->so_delegations);
375 rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); 378 rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
376 sp->so_seqid.sequence = &sp->so_sequence; 379 sp->so_seqid.sequence = &sp->so_sequence;
377 spin_lock_init(&sp->so_sequence.lock); 380 spin_lock_init(&sp->so_sequence.lock);
@@ -384,7 +387,7 @@ static void
384nfs4_drop_state_owner(struct nfs4_state_owner *sp) 387nfs4_drop_state_owner(struct nfs4_state_owner *sp)
385{ 388{
386 if (!RB_EMPTY_NODE(&sp->so_client_node)) { 389 if (!RB_EMPTY_NODE(&sp->so_client_node)) {
387 struct nfs_client *clp = sp->so_client; 390 struct nfs_client *clp = sp->so_server->nfs_client;
388 391
389 spin_lock(&clp->cl_lock); 392 spin_lock(&clp->cl_lock);
390 rb_erase(&sp->so_client_node, &clp->cl_state_owners); 393 rb_erase(&sp->so_client_node, &clp->cl_state_owners);
@@ -406,7 +409,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
406 new = nfs4_alloc_state_owner(); 409 new = nfs4_alloc_state_owner();
407 if (new == NULL) 410 if (new == NULL)
408 return NULL; 411 return NULL;
409 new->so_client = clp;
410 new->so_server = server; 412 new->so_server = server;
411 new->so_cred = cred; 413 new->so_cred = cred;
412 spin_lock(&clp->cl_lock); 414 spin_lock(&clp->cl_lock);
@@ -423,7 +425,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
423 425
424void nfs4_put_state_owner(struct nfs4_state_owner *sp) 426void nfs4_put_state_owner(struct nfs4_state_owner *sp)
425{ 427{
426 struct nfs_client *clp = sp->so_client; 428 struct nfs_client *clp = sp->so_server->nfs_client;
427 struct rpc_cred *cred = sp->so_cred; 429 struct rpc_cred *cred = sp->so_cred;
428 430
429 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 431 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
@@ -602,12 +604,21 @@ void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
602 * that is compatible with current->files 604 * that is compatible with current->files
603 */ 605 */
604static struct nfs4_lock_state * 606static struct nfs4_lock_state *
605__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) 607__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
606{ 608{
607 struct nfs4_lock_state *pos; 609 struct nfs4_lock_state *pos;
608 list_for_each_entry(pos, &state->lock_states, ls_locks) { 610 list_for_each_entry(pos, &state->lock_states, ls_locks) {
609 if (pos->ls_owner != fl_owner) 611 if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
610 continue; 612 continue;
613 switch (pos->ls_owner.lo_type) {
614 case NFS4_POSIX_LOCK_TYPE:
615 if (pos->ls_owner.lo_u.posix_owner != fl_owner)
616 continue;
617 break;
618 case NFS4_FLOCK_LOCK_TYPE:
619 if (pos->ls_owner.lo_u.flock_owner != fl_pid)
620 continue;
621 }
611 atomic_inc(&pos->ls_count); 622 atomic_inc(&pos->ls_count);
612 return pos; 623 return pos;
613 } 624 }
@@ -619,10 +630,10 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
619 * exists, return an uninitialized one. 630 * exists, return an uninitialized one.
620 * 631 *
621 */ 632 */
622static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) 633static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
623{ 634{
624 struct nfs4_lock_state *lsp; 635 struct nfs4_lock_state *lsp;
625 struct nfs_client *clp = state->owner->so_client; 636 struct nfs_client *clp = state->owner->so_server->nfs_client;
626 637
627 lsp = kzalloc(sizeof(*lsp), GFP_NOFS); 638 lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
628 if (lsp == NULL) 639 if (lsp == NULL)
@@ -633,7 +644,18 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
633 lsp->ls_seqid.sequence = &lsp->ls_sequence; 644 lsp->ls_seqid.sequence = &lsp->ls_sequence;
634 atomic_set(&lsp->ls_count, 1); 645 atomic_set(&lsp->ls_count, 1);
635 lsp->ls_state = state; 646 lsp->ls_state = state;
636 lsp->ls_owner = fl_owner; 647 lsp->ls_owner.lo_type = type;
648 switch (lsp->ls_owner.lo_type) {
649 case NFS4_FLOCK_LOCK_TYPE:
650 lsp->ls_owner.lo_u.flock_owner = fl_pid;
651 break;
652 case NFS4_POSIX_LOCK_TYPE:
653 lsp->ls_owner.lo_u.posix_owner = fl_owner;
654 break;
655 default:
656 kfree(lsp);
657 return NULL;
658 }
637 spin_lock(&clp->cl_lock); 659 spin_lock(&clp->cl_lock);
638 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); 660 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
639 spin_unlock(&clp->cl_lock); 661 spin_unlock(&clp->cl_lock);
@@ -643,7 +665,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
643 665
644static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) 666static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
645{ 667{
646 struct nfs_client *clp = lsp->ls_state->owner->so_client; 668 struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
647 669
648 spin_lock(&clp->cl_lock); 670 spin_lock(&clp->cl_lock);
649 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); 671 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
@@ -657,13 +679,13 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
657 * exists, return an uninitialized one. 679 * exists, return an uninitialized one.
658 * 680 *
659 */ 681 */
660static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) 682static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
661{ 683{
662 struct nfs4_lock_state *lsp, *new = NULL; 684 struct nfs4_lock_state *lsp, *new = NULL;
663 685
664 for(;;) { 686 for(;;) {
665 spin_lock(&state->state_lock); 687 spin_lock(&state->state_lock);
666 lsp = __nfs4_find_lock_state(state, owner); 688 lsp = __nfs4_find_lock_state(state, owner, pid, type);
667 if (lsp != NULL) 689 if (lsp != NULL)
668 break; 690 break;
669 if (new != NULL) { 691 if (new != NULL) {
@@ -674,7 +696,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
674 break; 696 break;
675 } 697 }
676 spin_unlock(&state->state_lock); 698 spin_unlock(&state->state_lock);
677 new = nfs4_alloc_lock_state(state, owner); 699 new = nfs4_alloc_lock_state(state, owner, pid, type);
678 if (new == NULL) 700 if (new == NULL)
679 return NULL; 701 return NULL;
680 } 702 }
@@ -701,6 +723,8 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
701 if (list_empty(&state->lock_states)) 723 if (list_empty(&state->lock_states))
702 clear_bit(LK_STATE_IN_USE, &state->flags); 724 clear_bit(LK_STATE_IN_USE, &state->flags);
703 spin_unlock(&state->state_lock); 725 spin_unlock(&state->state_lock);
726 if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
727 nfs4_release_lockowner(lsp);
704 nfs4_free_lock_state(lsp); 728 nfs4_free_lock_state(lsp);
705} 729}
706 730
@@ -728,7 +752,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
728 752
729 if (fl->fl_ops != NULL) 753 if (fl->fl_ops != NULL)
730 return 0; 754 return 0;
731 lsp = nfs4_get_lock_state(state, fl->fl_owner); 755 if (fl->fl_flags & FL_POSIX)
756 lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
757 else if (fl->fl_flags & FL_FLOCK)
758 lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE);
759 else
760 return -EINVAL;
732 if (lsp == NULL) 761 if (lsp == NULL)
733 return -ENOMEM; 762 return -ENOMEM;
734 fl->fl_u.nfs4_fl.owner = lsp; 763 fl->fl_u.nfs4_fl.owner = lsp;
@@ -740,7 +769,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
740 * Byte-range lock aware utility to initialize the stateid of read/write 769 * Byte-range lock aware utility to initialize the stateid of read/write
741 * requests. 770 * requests.
742 */ 771 */
743void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) 772void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
744{ 773{
745 struct nfs4_lock_state *lsp; 774 struct nfs4_lock_state *lsp;
746 int seq; 775 int seq;
@@ -753,7 +782,7 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
753 return; 782 return;
754 783
755 spin_lock(&state->state_lock); 784 spin_lock(&state->state_lock);
756 lsp = __nfs4_find_lock_state(state, fl_owner); 785 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
757 if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 786 if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
758 memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); 787 memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
759 spin_unlock(&state->state_lock); 788 spin_unlock(&state->state_lock);
@@ -943,13 +972,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
943 /* Guard against delegation returns and new lock/unlock calls */ 972 /* Guard against delegation returns and new lock/unlock calls */
944 down_write(&nfsi->rwsem); 973 down_write(&nfsi->rwsem);
945 /* Protect inode->i_flock using the BKL */ 974 /* Protect inode->i_flock using the BKL */
946 lock_kernel(); 975 lock_flocks();
947 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 976 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
948 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 977 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
949 continue; 978 continue;
950 if (nfs_file_open_context(fl->fl_file)->state != state) 979 if (nfs_file_open_context(fl->fl_file)->state != state)
951 continue; 980 continue;
952 unlock_kernel(); 981 unlock_flocks();
953 status = ops->recover_lock(state, fl); 982 status = ops->recover_lock(state, fl);
954 switch (status) { 983 switch (status) {
955 case 0: 984 case 0:
@@ -976,9 +1005,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
976 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 1005 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
977 status = 0; 1006 status = 0;
978 } 1007 }
979 lock_kernel(); 1008 lock_flocks();
980 } 1009 }
981 unlock_kernel(); 1010 unlock_flocks();
982out: 1011out:
983 up_write(&nfsi->rwsem); 1012 up_write(&nfsi->rwsem);
984 return status; 1013 return status;
@@ -1036,16 +1065,24 @@ restart:
1036 /* Mark the file as being 'closed' */ 1065 /* Mark the file as being 'closed' */
1037 state->state = 0; 1066 state->state = 0;
1038 break; 1067 break;
1068 case -EKEYEXPIRED:
1069 /*
1070 * User RPCSEC_GSS context has expired.
1071 * We cannot recover this stateid now, so
1072 * skip it and allow recovery thread to
1073 * proceed.
1074 */
1075 break;
1039 case -NFS4ERR_ADMIN_REVOKED: 1076 case -NFS4ERR_ADMIN_REVOKED:
1040 case -NFS4ERR_STALE_STATEID: 1077 case -NFS4ERR_STALE_STATEID:
1041 case -NFS4ERR_BAD_STATEID: 1078 case -NFS4ERR_BAD_STATEID:
1042 case -NFS4ERR_RECLAIM_BAD: 1079 case -NFS4ERR_RECLAIM_BAD:
1043 case -NFS4ERR_RECLAIM_CONFLICT: 1080 case -NFS4ERR_RECLAIM_CONFLICT:
1044 nfs4_state_mark_reclaim_nograce(sp->so_client, state); 1081 nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1045 break; 1082 break;
1046 case -NFS4ERR_EXPIRED: 1083 case -NFS4ERR_EXPIRED:
1047 case -NFS4ERR_NO_GRACE: 1084 case -NFS4ERR_NO_GRACE:
1048 nfs4_state_mark_reclaim_nograce(sp->so_client, state); 1085 nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1049 case -NFS4ERR_STALE_CLIENTID: 1086 case -NFS4ERR_STALE_CLIENTID:
1050 case -NFS4ERR_BADSESSION: 1087 case -NFS4ERR_BADSESSION:
1051 case -NFS4ERR_BADSLOT: 1088 case -NFS4ERR_BADSLOT:
@@ -1111,17 +1148,14 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
1111 (void)ops->reclaim_complete(clp); 1148 (void)ops->reclaim_complete(clp);
1112} 1149}
1113 1150
1114static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) 1151static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
1115{ 1152{
1116 struct nfs4_state_owner *sp; 1153 struct nfs4_state_owner *sp;
1117 struct rb_node *pos; 1154 struct rb_node *pos;
1118 struct nfs4_state *state; 1155 struct nfs4_state *state;
1119 1156
1120 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1157 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1121 return; 1158 return 0;
1122
1123 nfs4_reclaim_complete(clp,
1124 nfs4_reboot_recovery_ops[clp->cl_minorversion]);
1125 1159
1126 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1160 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
1127 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1161 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
@@ -1135,6 +1169,14 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1135 } 1169 }
1136 1170
1137 nfs_delegation_reap_unclaimed(clp); 1171 nfs_delegation_reap_unclaimed(clp);
1172 return 1;
1173}
1174
1175static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1176{
1177 if (!nfs4_state_clear_reclaim_reboot(clp))
1178 return;
1179 nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
1138} 1180}
1139 1181
1140static void nfs_delegation_clear_all(struct nfs_client *clp) 1182static void nfs_delegation_clear_all(struct nfs_client *clp)
@@ -1149,6 +1191,14 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
1149 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); 1191 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
1150} 1192}
1151 1193
1194static void nfs4_warn_keyexpired(const char *s)
1195{
1196 printk_ratelimited(KERN_WARNING "Error: state manager"
1197 " encountered RPCSEC_GSS session"
1198 " expired against NFSv4 server %s.\n",
1199 s);
1200}
1201
1152static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) 1202static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1153{ 1203{
1154 switch (error) { 1204 switch (error) {
@@ -1161,7 +1211,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1161 case -NFS4ERR_STALE_CLIENTID: 1211 case -NFS4ERR_STALE_CLIENTID:
1162 case -NFS4ERR_LEASE_MOVED: 1212 case -NFS4ERR_LEASE_MOVED:
1163 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1213 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1164 nfs4_state_end_reclaim_reboot(clp); 1214 nfs4_state_clear_reclaim_reboot(clp);
1165 nfs4_state_start_reclaim_reboot(clp); 1215 nfs4_state_start_reclaim_reboot(clp);
1166 break; 1216 break;
1167 case -NFS4ERR_EXPIRED: 1217 case -NFS4ERR_EXPIRED:
@@ -1178,6 +1228,10 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1178 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1228 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1179 /* Zero session reset errors */ 1229 /* Zero session reset errors */
1180 return 0; 1230 return 0;
1231 case -EKEYEXPIRED:
1232 /* Nothing we can do */
1233 nfs4_warn_keyexpired(clp->cl_hostname);
1234 return 0;
1181 } 1235 }
1182 return error; 1236 return error;
1183} 1237}
@@ -1211,8 +1265,8 @@ restart:
1211static int nfs4_check_lease(struct nfs_client *clp) 1265static int nfs4_check_lease(struct nfs_client *clp)
1212{ 1266{
1213 struct rpc_cred *cred; 1267 struct rpc_cred *cred;
1214 struct nfs4_state_maintenance_ops *ops = 1268 const struct nfs4_state_maintenance_ops *ops =
1215 nfs4_state_renewal_ops[clp->cl_minorversion]; 1269 clp->cl_mvops->state_renewal_ops;
1216 int status = -NFS4ERR_EXPIRED; 1270 int status = -NFS4ERR_EXPIRED;
1217 1271
1218 /* Is the client already known to have an expired lease? */ 1272 /* Is the client already known to have an expired lease? */
@@ -1235,8 +1289,8 @@ out:
1235static int nfs4_reclaim_lease(struct nfs_client *clp) 1289static int nfs4_reclaim_lease(struct nfs_client *clp)
1236{ 1290{
1237 struct rpc_cred *cred; 1291 struct rpc_cred *cred;
1238 struct nfs4_state_recovery_ops *ops = 1292 const struct nfs4_state_recovery_ops *ops =
1239 nfs4_reboot_recovery_ops[clp->cl_minorversion]; 1293 clp->cl_mvops->reboot_recovery_ops;
1240 int status = -ENOENT; 1294 int status = -ENOENT;
1241 1295
1242 cred = ops->get_clid_cred(clp); 1296 cred = ops->get_clid_cred(clp);
@@ -1388,9 +1442,10 @@ static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1388 case -NFS4ERR_DELAY: 1442 case -NFS4ERR_DELAY:
1389 case -NFS4ERR_CLID_INUSE: 1443 case -NFS4ERR_CLID_INUSE:
1390 case -EAGAIN: 1444 case -EAGAIN:
1391 case -EKEYEXPIRED:
1392 break; 1445 break;
1393 1446
1447 case -EKEYEXPIRED:
1448 nfs4_warn_keyexpired(clp->cl_hostname);
1394 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery 1449 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1395 * in nfs4_exchange_id */ 1450 * in nfs4_exchange_id */
1396 default: 1451 default:
@@ -1421,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1421 } 1476 }
1422 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1477 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1423 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); 1478 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1479 pnfs_destroy_all_layouts(clp);
1424 } 1480 }
1425 1481
1426 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { 1482 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
@@ -1444,7 +1500,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1444 /* First recover reboot state... */ 1500 /* First recover reboot state... */
1445 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { 1501 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
1446 status = nfs4_do_reclaim(clp, 1502 status = nfs4_do_reclaim(clp,
1447 nfs4_reboot_recovery_ops[clp->cl_minorversion]); 1503 clp->cl_mvops->reboot_recovery_ops);
1448 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || 1504 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
1449 test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) 1505 test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
1450 continue; 1506 continue;
@@ -1458,7 +1514,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1458 /* Now recover expired state... */ 1514 /* Now recover expired state... */
1459 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { 1515 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
1460 status = nfs4_do_reclaim(clp, 1516 status = nfs4_do_reclaim(clp,
1461 nfs4_nograce_recovery_ops[clp->cl_minorversion]); 1517 clp->cl_mvops->nograce_recovery_ops);
1462 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || 1518 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
1463 test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) || 1519 test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
1464 test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1520 test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 65c8dae4b267..f313c4cce7e4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
52#include <linux/nfs_idmap.h> 52#include <linux/nfs_idmap.h>
53#include "nfs4_fs.h" 53#include "nfs4_fs.h"
54#include "internal.h" 54#include "internal.h"
55#include "pnfs.h"
55 56
56#define NFSDBG_FACILITY NFSDBG_XDR 57#define NFSDBG_FACILITY NFSDBG_XDR
57 58
@@ -202,14 +203,17 @@ static int nfs4_stat_to_errno(int);
202#define encode_link_maxsz (op_encode_hdr_maxsz + \ 203#define encode_link_maxsz (op_encode_hdr_maxsz + \
203 nfs4_name_maxsz) 204 nfs4_name_maxsz)
204#define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) 205#define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
206#define encode_lockowner_maxsz (7)
205#define encode_lock_maxsz (op_encode_hdr_maxsz + \ 207#define encode_lock_maxsz (op_encode_hdr_maxsz + \
206 7 + \ 208 7 + \
207 1 + encode_stateid_maxsz + 8) 209 1 + encode_stateid_maxsz + 1 + \
210 encode_lockowner_maxsz)
208#define decode_lock_denied_maxsz \ 211#define decode_lock_denied_maxsz \
209 (8 + decode_lockowner_maxsz) 212 (8 + decode_lockowner_maxsz)
210#define decode_lock_maxsz (op_decode_hdr_maxsz + \ 213#define decode_lock_maxsz (op_decode_hdr_maxsz + \
211 decode_lock_denied_maxsz) 214 decode_lock_denied_maxsz)
212#define encode_lockt_maxsz (op_encode_hdr_maxsz + 12) 215#define encode_lockt_maxsz (op_encode_hdr_maxsz + 5 + \
216 encode_lockowner_maxsz)
213#define decode_lockt_maxsz (op_decode_hdr_maxsz + \ 217#define decode_lockt_maxsz (op_decode_hdr_maxsz + \
214 decode_lock_denied_maxsz) 218 decode_lock_denied_maxsz)
215#define encode_locku_maxsz (op_encode_hdr_maxsz + 3 + \ 219#define encode_locku_maxsz (op_encode_hdr_maxsz + 3 + \
@@ -217,6 +221,11 @@ static int nfs4_stat_to_errno(int);
217 4) 221 4)
218#define decode_locku_maxsz (op_decode_hdr_maxsz + \ 222#define decode_locku_maxsz (op_decode_hdr_maxsz + \
219 decode_stateid_maxsz) 223 decode_stateid_maxsz)
224#define encode_release_lockowner_maxsz \
225 (op_encode_hdr_maxsz + \
226 encode_lockowner_maxsz)
227#define decode_release_lockowner_maxsz \
228 (op_decode_hdr_maxsz)
220#define encode_access_maxsz (op_encode_hdr_maxsz + 1) 229#define encode_access_maxsz (op_encode_hdr_maxsz + 1)
221#define decode_access_maxsz (op_decode_hdr_maxsz + 2) 230#define decode_access_maxsz (op_decode_hdr_maxsz + 2)
222#define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 231#define encode_symlink_maxsz (op_encode_hdr_maxsz + \
@@ -302,6 +311,19 @@ static int nfs4_stat_to_errno(int);
302 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) 311 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
303#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) 312#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4)
304#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) 313#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4)
314#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
315 XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
316#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
317 1 /* layout type */ + \
318 1 /* opaque devaddr4 length */ + \
319 /* devaddr4 payload is read into page */ \
320 1 /* notification bitmap length */ + \
321 1 /* notification bitmap */)
322#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
323 encode_stateid_maxsz)
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
305#else /* CONFIG_NFS_V4_1 */ 327#else /* CONFIG_NFS_V4_1 */
306#define encode_sequence_maxsz 0 328#define encode_sequence_maxsz 0
307#define decode_sequence_maxsz 0 329#define decode_sequence_maxsz 0
@@ -471,6 +493,12 @@ static int nfs4_stat_to_errno(int);
471 decode_sequence_maxsz + \ 493 decode_sequence_maxsz + \
472 decode_putfh_maxsz + \ 494 decode_putfh_maxsz + \
473 decode_locku_maxsz) 495 decode_locku_maxsz)
496#define NFS4_enc_release_lockowner_sz \
497 (compound_encode_hdr_maxsz + \
498 encode_lockowner_maxsz)
499#define NFS4_dec_release_lockowner_sz \
500 (compound_decode_hdr_maxsz + \
501 decode_lockowner_maxsz)
474#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ 502#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \
475 encode_sequence_maxsz + \ 503 encode_sequence_maxsz + \
476 encode_putfh_maxsz + \ 504 encode_putfh_maxsz + \
@@ -685,6 +713,20 @@ static int nfs4_stat_to_errno(int);
685#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ 713#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \
686 decode_sequence_maxsz + \ 714 decode_sequence_maxsz + \
687 decode_reclaim_complete_maxsz) 715 decode_reclaim_complete_maxsz)
716#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \
717 encode_sequence_maxsz +\
718 encode_getdeviceinfo_maxsz)
719#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \
720 decode_sequence_maxsz + \
721 decode_getdeviceinfo_maxsz)
722#define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \
723 encode_sequence_maxsz + \
724 encode_putfh_maxsz + \
725 encode_layoutget_maxsz)
726#define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \
727 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \
729 decode_layoutget_maxsz)
688 730
689const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
690 compound_encode_hdr_maxsz + 732 compound_encode_hdr_maxsz +
@@ -744,7 +786,7 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
744 struct compound_hdr *hdr) 786 struct compound_hdr *hdr)
745{ 787{
746 __be32 *p; 788 __be32 *p;
747 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 789 struct rpc_auth *auth = req->rq_cred->cr_auth;
748 790
749 /* initialize running count of expected bytes in reply. 791 /* initialize running count of expected bytes in reply.
750 * NOTE: the replied tag SHOULD be the same is the one sent, 792 * NOTE: the replied tag SHOULD be the same is the one sent,
@@ -802,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
802 if (iap->ia_valid & ATTR_MODE) 844 if (iap->ia_valid & ATTR_MODE)
803 len += 4; 845 len += 4;
804 if (iap->ia_valid & ATTR_UID) { 846 if (iap->ia_valid & ATTR_UID) {
805 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); 847 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ);
806 if (owner_namelen < 0) { 848 if (owner_namelen < 0) {
807 dprintk("nfs: couldn't resolve uid %d to string\n", 849 dprintk("nfs: couldn't resolve uid %d to string\n",
808 iap->ia_uid); 850 iap->ia_uid);
@@ -814,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
814 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
815 } 857 }
816 if (iap->ia_valid & ATTR_GID) { 858 if (iap->ia_valid & ATTR_GID) {
817 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); 859 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ);
818 if (owner_grouplen < 0) { 860 if (owner_grouplen < 0) {
819 dprintk("nfs: couldn't resolve gid %d to string\n", 861 dprintk("nfs: couldn't resolve gid %d to string\n",
820 iap->ia_gid); 862 iap->ia_gid);
@@ -1042,6 +1084,17 @@ static inline uint64_t nfs4_lock_length(struct file_lock *fl)
1042 return fl->fl_end - fl->fl_start + 1; 1084 return fl->fl_end - fl->fl_start + 1;
1043} 1085}
1044 1086
1087static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner)
1088{
1089 __be32 *p;
1090
1091 p = reserve_space(xdr, 28);
1092 p = xdr_encode_hyper(p, lowner->clientid);
1093 *p++ = cpu_to_be32(16);
1094 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1095 xdr_encode_hyper(p, lowner->id);
1096}
1097
1045/* 1098/*
1046 * opcode,type,reclaim,offset,length,new_lock_owner = 32 1099 * opcode,type,reclaim,offset,length,new_lock_owner = 32
1047 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 1100 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
@@ -1058,14 +1111,11 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
1058 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); 1111 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1059 *p = cpu_to_be32(args->new_lock_owner); 1112 *p = cpu_to_be32(args->new_lock_owner);
1060 if (args->new_lock_owner){ 1113 if (args->new_lock_owner){
1061 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32); 1114 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
1062 *p++ = cpu_to_be32(args->open_seqid->sequence->counter); 1115 *p++ = cpu_to_be32(args->open_seqid->sequence->counter);
1063 p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); 1116 p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
1064 *p++ = cpu_to_be32(args->lock_seqid->sequence->counter); 1117 *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
1065 p = xdr_encode_hyper(p, args->lock_owner.clientid); 1118 encode_lockowner(xdr, &args->lock_owner);
1066 *p++ = cpu_to_be32(16);
1067 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1068 xdr_encode_hyper(p, args->lock_owner.id);
1069 } 1119 }
1070 else { 1120 else {
1071 p = reserve_space(xdr, NFS4_STATEID_SIZE+4); 1121 p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
@@ -1080,15 +1130,12 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar
1080{ 1130{
1081 __be32 *p; 1131 __be32 *p;
1082 1132
1083 p = reserve_space(xdr, 52); 1133 p = reserve_space(xdr, 24);
1084 *p++ = cpu_to_be32(OP_LOCKT); 1134 *p++ = cpu_to_be32(OP_LOCKT);
1085 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); 1135 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1086 p = xdr_encode_hyper(p, args->fl->fl_start); 1136 p = xdr_encode_hyper(p, args->fl->fl_start);
1087 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); 1137 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1088 p = xdr_encode_hyper(p, args->lock_owner.clientid); 1138 encode_lockowner(xdr, &args->lock_owner);
1089 *p++ = cpu_to_be32(16);
1090 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1091 xdr_encode_hyper(p, args->lock_owner.id);
1092 hdr->nops++; 1139 hdr->nops++;
1093 hdr->replen += decode_lockt_maxsz; 1140 hdr->replen += decode_lockt_maxsz;
1094} 1141}
@@ -1108,6 +1155,17 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
1108 hdr->replen += decode_locku_maxsz; 1155 hdr->replen += decode_locku_maxsz;
1109} 1156}
1110 1157
1158static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
1159{
1160 __be32 *p;
1161
1162 p = reserve_space(xdr, 4);
1163 *p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
1164 encode_lockowner(xdr, lowner);
1165 hdr->nops++;
1166 hdr->replen += decode_release_lockowner_maxsz;
1167}
1168
1111static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1169static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1112{ 1170{
1113 int len = name->len; 1171 int len = name->len;
@@ -1172,7 +1230,7 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
1172 break; 1230 break;
1173 default: 1231 default:
1174 clp = arg->server->nfs_client; 1232 clp = arg->server->nfs_client;
1175 if (clp->cl_minorversion > 0) { 1233 if (clp->cl_mvops->minor_version > 0) {
1176 if (nfs4_has_persistent_session(clp)) { 1234 if (nfs4_has_persistent_session(clp)) {
1177 *p = cpu_to_be32(NFS4_CREATE_GUARDED); 1235 *p = cpu_to_be32(NFS4_CREATE_GUARDED);
1178 encode_attrs(xdr, arg->u.attrs, arg->server); 1236 encode_attrs(xdr, arg->u.attrs, arg->server);
@@ -1324,14 +1382,14 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1324 hdr->replen += decode_putrootfh_maxsz; 1382 hdr->replen += decode_putrootfh_maxsz;
1325} 1383}
1326 1384
1327static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) 1385static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
1328{ 1386{
1329 nfs4_stateid stateid; 1387 nfs4_stateid stateid;
1330 __be32 *p; 1388 __be32 *p;
1331 1389
1332 p = reserve_space(xdr, NFS4_STATEID_SIZE); 1390 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1333 if (ctx->state != NULL) { 1391 if (ctx->state != NULL) {
1334 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); 1392 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
1335 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); 1393 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
1336 } else 1394 } else
1337 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); 1395 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
@@ -1344,7 +1402,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1344 p = reserve_space(xdr, 4); 1402 p = reserve_space(xdr, 4);
1345 *p = cpu_to_be32(OP_READ); 1403 *p = cpu_to_be32(OP_READ);
1346 1404
1347 encode_stateid(xdr, args->context); 1405 encode_stateid(xdr, args->context, args->lock_context);
1348 1406
1349 p = reserve_space(xdr, 12); 1407 p = reserve_space(xdr, 12);
1350 p = xdr_encode_hyper(p, args->offset); 1408 p = xdr_encode_hyper(p, args->offset);
@@ -1355,24 +1413,35 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1355 1413
1356static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) 1414static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
1357{ 1415{
1358 uint32_t attrs[2] = { 1416 uint32_t attrs[2] = {0, 0};
1359 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, 1417 uint32_t dircount = readdir->count >> 1;
1360 FATTR4_WORD1_MOUNTED_ON_FILEID,
1361 };
1362 __be32 *p; 1418 __be32 *p;
1363 1419
1420 if (readdir->plus) {
1421 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
1422 FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE;
1423 attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
1424 FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
1425 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
1426 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
1427 dircount >>= 1;
1428 }
1429 attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID;
1430 attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
1431 /* Switch to mounted_on_fileid if the server supports it */
1432 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1433 attrs[0] &= ~FATTR4_WORD0_FILEID;
1434 else
1435 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1436
1364 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); 1437 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1365 *p++ = cpu_to_be32(OP_READDIR); 1438 *p++ = cpu_to_be32(OP_READDIR);
1366 p = xdr_encode_hyper(p, readdir->cookie); 1439 p = xdr_encode_hyper(p, readdir->cookie);
1367 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); 1440 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
1368 *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */ 1441 *p++ = cpu_to_be32(dircount);
1369 *p++ = cpu_to_be32(readdir->count); 1442 *p++ = cpu_to_be32(readdir->count);
1370 *p++ = cpu_to_be32(2); 1443 *p++ = cpu_to_be32(2);
1371 /* Switch to mounted_on_fileid if the server supports it */ 1444
1372 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1373 attrs[0] &= ~FATTR4_WORD0_FILEID;
1374 else
1375 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1376 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); 1445 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
1377 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); 1446 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
1378 hdr->nops++; 1447 hdr->nops++;
@@ -1523,7 +1592,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1523 p = reserve_space(xdr, 4); 1592 p = reserve_space(xdr, 4);
1524 *p = cpu_to_be32(OP_WRITE); 1593 *p = cpu_to_be32(OP_WRITE);
1525 1594
1526 encode_stateid(xdr, args->context); 1595 encode_stateid(xdr, args->context, args->lock_context);
1527 1596
1528 p = reserve_space(xdr, 16); 1597 p = reserve_space(xdr, 16);
1529 p = xdr_encode_hyper(p, args->offset); 1598 p = xdr_encode_hyper(p, args->offset);
@@ -1696,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr,
1696#endif /* CONFIG_NFS_V4_1 */ 1765#endif /* CONFIG_NFS_V4_1 */
1697} 1766}
1698 1767
1768#ifdef CONFIG_NFS_V4_1
1769static void
1770encode_getdeviceinfo(struct xdr_stream *xdr,
1771 const struct nfs4_getdeviceinfo_args *args,
1772 struct compound_hdr *hdr)
1773{
1774 __be32 *p;
1775
1776 p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
1777 *p++ = cpu_to_be32(OP_GETDEVICEINFO);
1778 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
1779 NFS4_DEVICEID4_SIZE);
1780 *p++ = cpu_to_be32(args->pdev->layout_type);
1781 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
1782 *p++ = cpu_to_be32(0); /* bitmap length 0 */
1783 hdr->nops++;
1784 hdr->replen += decode_getdeviceinfo_maxsz;
1785}
1786
1787static void
1788encode_layoutget(struct xdr_stream *xdr,
1789 const struct nfs4_layoutget_args *args,
1790 struct compound_hdr *hdr)
1791{
1792 nfs4_stateid stateid;
1793 __be32 *p;
1794
1795 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
1796 *p++ = cpu_to_be32(OP_LAYOUTGET);
1797 *p++ = cpu_to_be32(0); /* Signal layout available */
1798 *p++ = cpu_to_be32(args->type);
1799 *p++ = cpu_to_be32(args->range.iomode);
1800 p = xdr_encode_hyper(p, args->range.offset);
1801 p = xdr_encode_hyper(p, args->range.length);
1802 p = xdr_encode_hyper(p, args->minlength);
1803 pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
1804 args->ctx->state);
1805 p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
1806 *p = cpu_to_be32(args->maxcount);
1807
1808 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
1809 __func__,
1810 args->type,
1811 args->range.iomode,
1812 (unsigned long)args->range.offset,
1813 (unsigned long)args->range.length,
1814 args->maxcount);
1815 hdr->nops++;
1816 hdr->replen += decode_layoutget_maxsz;
1817}
1818#endif /* CONFIG_NFS_V4_1 */
1819
1699/* 1820/*
1700 * END OF "GENERIC" ENCODE ROUTINES. 1821 * END OF "GENERIC" ENCODE ROUTINES.
1701 */ 1822 */
@@ -1704,7 +1825,7 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
1704{ 1825{
1705#if defined(CONFIG_NFS_V4_1) 1826#if defined(CONFIG_NFS_V4_1)
1706 if (args->sa_session) 1827 if (args->sa_session)
1707 return args->sa_session->clp->cl_minorversion; 1828 return args->sa_session->clp->cl_mvops->minor_version;
1708#endif /* CONFIG_NFS_V4_1 */ 1829#endif /* CONFIG_NFS_V4_1 */
1709 return 0; 1830 return 0;
1710} 1831}
@@ -1793,7 +1914,7 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs
1793/* 1914/*
1794 * Encode RENAME request 1915 * Encode RENAME request
1795 */ 1916 */
1796static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args) 1917static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args)
1797{ 1918{
1798 struct xdr_stream xdr; 1919 struct xdr_stream xdr;
1799 struct compound_hdr hdr = { 1920 struct compound_hdr hdr = {
@@ -2048,6 +2169,20 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_
2048 return 0; 2169 return 0;
2049} 2170}
2050 2171
2172static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
2173{
2174 struct xdr_stream xdr;
2175 struct compound_hdr hdr = {
2176 .minorversion = 0,
2177 };
2178
2179 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2180 encode_compound_hdr(&xdr, req, &hdr);
2181 encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
2182 encode_nops(&hdr);
2183 return 0;
2184}
2185
2051/* 2186/*
2052 * Encode a READLINK request 2187 * Encode a READLINK request
2053 */ 2188 */
@@ -2395,7 +2530,7 @@ static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
2395{ 2530{
2396 struct xdr_stream xdr; 2531 struct xdr_stream xdr;
2397 struct compound_hdr hdr = { 2532 struct compound_hdr hdr = {
2398 .minorversion = args->client->cl_minorversion, 2533 .minorversion = args->client->cl_mvops->minor_version,
2399 }; 2534 };
2400 2535
2401 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2536 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
@@ -2413,7 +2548,7 @@ static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p,
2413{ 2548{
2414 struct xdr_stream xdr; 2549 struct xdr_stream xdr;
2415 struct compound_hdr hdr = { 2550 struct compound_hdr hdr = {
2416 .minorversion = args->client->cl_minorversion, 2551 .minorversion = args->client->cl_mvops->minor_version,
2417 }; 2552 };
2418 2553
2419 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2554 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
@@ -2431,7 +2566,7 @@ static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p,
2431{ 2566{
2432 struct xdr_stream xdr; 2567 struct xdr_stream xdr;
2433 struct compound_hdr hdr = { 2568 struct compound_hdr hdr = {
2434 .minorversion = session->clp->cl_minorversion, 2569 .minorversion = session->clp->cl_mvops->minor_version,
2435 }; 2570 };
2436 2571
2437 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2572 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
@@ -2499,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
2499 return 0; 2634 return 0;
2500} 2635}
2501 2636
2637/*
2638 * Encode GETDEVICEINFO request
2639 */
2640static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
2641 struct nfs4_getdeviceinfo_args *args)
2642{
2643 struct xdr_stream xdr;
2644 struct compound_hdr hdr = {
2645 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2646 };
2647
2648 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2649 encode_compound_hdr(&xdr, req, &hdr);
2650 encode_sequence(&xdr, &args->seq_args, &hdr);
2651 encode_getdeviceinfo(&xdr, args, &hdr);
2652
2653 /* set up reply kvec. Subtract notification bitmap max size (2)
2654 * so that notification bitmap is put in xdr_buf tail */
2655 xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
2656 args->pdev->pages, args->pdev->pgbase,
2657 args->pdev->pglen);
2658
2659 encode_nops(&hdr);
2660 return 0;
2661}
2662
2663/*
2664 * Encode LAYOUTGET request
2665 */
2666static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
2667 struct nfs4_layoutget_args *args)
2668{
2669 struct xdr_stream xdr;
2670 struct compound_hdr hdr = {
2671 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2672 };
2673
2674 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2675 encode_compound_hdr(&xdr, req, &hdr);
2676 encode_sequence(&xdr, &args->seq_args, &hdr);
2677 encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
2678 encode_layoutget(&xdr, args, &hdr);
2679 encode_nops(&hdr);
2680 return 0;
2681}
2502#endif /* CONFIG_NFS_V4_1 */ 2682#endif /* CONFIG_NFS_V4_1 */
2503 2683
2504static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2684static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -2632,7 +2812,10 @@ out_overflow:
2632static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) 2812static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
2633{ 2813{
2634 if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) { 2814 if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) {
2635 decode_attr_bitmap(xdr, bitmask); 2815 int ret;
2816 ret = decode_attr_bitmap(xdr, bitmask);
2817 if (unlikely(ret < 0))
2818 return ret;
2636 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; 2819 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
2637 } else 2820 } else
2638 bitmask[0] = bitmask[1] = 0; 2821 bitmask[0] = bitmask[1] = 0;
@@ -2804,6 +2987,56 @@ out_overflow:
2804 return -EIO; 2987 return -EIO;
2805} 2988}
2806 2989
2990static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
2991{
2992 __be32 *p;
2993
2994 if (unlikely(bitmap[0] & (FATTR4_WORD0_RDATTR_ERROR - 1U)))
2995 return -EIO;
2996 if (likely(bitmap[0] & FATTR4_WORD0_RDATTR_ERROR)) {
2997 p = xdr_inline_decode(xdr, 4);
2998 if (unlikely(!p))
2999 goto out_overflow;
3000 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
3001 }
3002 return 0;
3003out_overflow:
3004 print_overflow_msg(__func__, xdr);
3005 return -EIO;
3006}
3007
3008static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh)
3009{
3010 __be32 *p;
3011 int len;
3012
3013 if (fh != NULL)
3014 memset(fh, 0, sizeof(*fh));
3015
3016 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEHANDLE - 1U)))
3017 return -EIO;
3018 if (likely(bitmap[0] & FATTR4_WORD0_FILEHANDLE)) {
3019 p = xdr_inline_decode(xdr, 4);
3020 if (unlikely(!p))
3021 goto out_overflow;
3022 len = be32_to_cpup(p);
3023 if (len > NFS4_FHSIZE)
3024 return -EIO;
3025 p = xdr_inline_decode(xdr, len);
3026 if (unlikely(!p))
3027 goto out_overflow;
3028 if (fh != NULL) {
3029 memcpy(fh->data, p, len);
3030 fh->size = len;
3031 }
3032 bitmap[0] &= ~FATTR4_WORD0_FILEHANDLE;
3033 }
3034 return 0;
3035out_overflow:
3036 print_overflow_msg(__func__, xdr);
3037 return -EIO;
3038}
3039
2807static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3040static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
2808{ 3041{
2809 __be32 *p; 3042 __be32 *p;
@@ -3477,6 +3710,24 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
3477 return status; 3710 return status;
3478} 3711}
3479 3712
3713static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap,
3714 struct timespec *time)
3715{
3716 int status = 0;
3717
3718 time->tv_sec = 0;
3719 time->tv_nsec = 0;
3720 if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_DELTA - 1U)))
3721 return -EIO;
3722 if (likely(bitmap[1] & FATTR4_WORD1_TIME_DELTA)) {
3723 status = decode_attr_time(xdr, time);
3724 bitmap[1] &= ~FATTR4_WORD1_TIME_DELTA;
3725 }
3726 dprintk("%s: time_delta=%ld %ld\n", __func__, (long)time->tv_sec,
3727 (long)time->tv_nsec);
3728 return status;
3729}
3730
3480static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 3731static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
3481{ 3732{
3482 int status = 0; 3733 int status = 0;
@@ -3700,29 +3951,14 @@ xdr_error:
3700 return status; 3951 return status;
3701} 3952}
3702 3953
3703static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, 3954static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3955 struct nfs_fattr *fattr, struct nfs_fh *fh,
3704 const struct nfs_server *server, int may_sleep) 3956 const struct nfs_server *server, int may_sleep)
3705{ 3957{
3706 __be32 *savep;
3707 uint32_t attrlen,
3708 bitmap[2] = {0},
3709 type;
3710 int status; 3958 int status;
3711 umode_t fmode = 0; 3959 umode_t fmode = 0;
3712 uint64_t fileid; 3960 uint64_t fileid;
3713 3961 uint32_t type;
3714 status = decode_op_hdr(xdr, OP_GETATTR);
3715 if (status < 0)
3716 goto xdr_error;
3717
3718 status = decode_attr_bitmap(xdr, bitmap);
3719 if (status < 0)
3720 goto xdr_error;
3721
3722 status = decode_attr_length(xdr, &attrlen, &savep);
3723 if (status < 0)
3724 goto xdr_error;
3725
3726 3962
3727 status = decode_attr_type(xdr, bitmap, &type); 3963 status = decode_attr_type(xdr, bitmap, &type);
3728 if (status < 0) 3964 if (status < 0)
@@ -3748,6 +3984,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3748 goto xdr_error; 3984 goto xdr_error;
3749 fattr->valid |= status; 3985 fattr->valid |= status;
3750 3986
3987 status = decode_attr_error(xdr, bitmap);
3988 if (status < 0)
3989 goto xdr_error;
3990
3991 status = decode_attr_filehandle(xdr, bitmap, fh);
3992 if (status < 0)
3993 goto xdr_error;
3994
3751 status = decode_attr_fileid(xdr, bitmap, &fattr->fileid); 3995 status = decode_attr_fileid(xdr, bitmap, &fattr->fileid);
3752 if (status < 0) 3996 if (status < 0)
3753 goto xdr_error; 3997 goto xdr_error;
@@ -3818,12 +4062,101 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3818 fattr->valid |= status; 4062 fattr->valid |= status;
3819 } 4063 }
3820 4064
4065xdr_error:
4066 dprintk("%s: xdr returned %d\n", __func__, -status);
4067 return status;
4068}
4069
4070static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4071 struct nfs_fh *fh, const struct nfs_server *server, int may_sleep)
4072{
4073 __be32 *savep;
4074 uint32_t attrlen,
4075 bitmap[2] = {0};
4076 int status;
4077
4078 status = decode_op_hdr(xdr, OP_GETATTR);
4079 if (status < 0)
4080 goto xdr_error;
4081
4082 status = decode_attr_bitmap(xdr, bitmap);
4083 if (status < 0)
4084 goto xdr_error;
4085
4086 status = decode_attr_length(xdr, &attrlen, &savep);
4087 if (status < 0)
4088 goto xdr_error;
4089
4090 status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep);
4091 if (status < 0)
4092 goto xdr_error;
4093
3821 status = verify_attr_len(xdr, savep, attrlen); 4094 status = verify_attr_len(xdr, savep, attrlen);
3822xdr_error: 4095xdr_error:
3823 dprintk("%s: xdr returned %d\n", __func__, -status); 4096 dprintk("%s: xdr returned %d\n", __func__, -status);
3824 return status; 4097 return status;
3825} 4098}
3826 4099
4100static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4101 const struct nfs_server *server, int may_sleep)
4102{
4103 return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
4104}
4105
4106/*
4107 * Decode potentially multiple layout types. Currently we only support
4108 * one layout driver per file system.
4109 */
4110static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
4111 uint32_t *layouttype)
4112{
4113 uint32_t *p;
4114 int num;
4115
4116 p = xdr_inline_decode(xdr, 4);
4117 if (unlikely(!p))
4118 goto out_overflow;
4119 num = be32_to_cpup(p);
4120
4121 /* pNFS is not supported by the underlying file system */
4122 if (num == 0) {
4123 *layouttype = 0;
4124 return 0;
4125 }
4126 if (num > 1)
4127 printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
4128 "per filesystem not supported\n", __func__);
4129
4130 /* Decode and set first layout type, move xdr->p past unused types */
4131 p = xdr_inline_decode(xdr, num * 4);
4132 if (unlikely(!p))
4133 goto out_overflow;
4134 *layouttype = be32_to_cpup(p);
4135 return 0;
4136out_overflow:
4137 print_overflow_msg(__func__, xdr);
4138 return -EIO;
4139}
4140
4141/*
4142 * The type of file system exported.
4143 * Note we must ensure that layouttype is set in any non-error case.
4144 */
4145static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
4146 uint32_t *layouttype)
4147{
4148 int status = 0;
4149
4150 dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
4151 if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
4152 return -EIO;
4153 if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
4154 status = decode_first_pnfs_layout_type(xdr, layouttype);
4155 bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
4156 } else
4157 *layouttype = 0;
4158 return status;
4159}
3827 4160
3828static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) 4161static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
3829{ 4162{
@@ -3850,6 +4183,12 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
3850 if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) 4183 if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
3851 goto xdr_error; 4184 goto xdr_error;
3852 fsinfo->wtpref = fsinfo->wtmax; 4185 fsinfo->wtpref = fsinfo->wtmax;
4186 status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
4187 if (status != 0)
4188 goto xdr_error;
4189 status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
4190 if (status != 0)
4191 goto xdr_error;
3853 4192
3854 status = verify_attr_len(xdr, savep, attrlen); 4193 status = verify_attr_len(xdr, savep, attrlen);
3855xdr_error: 4194xdr_error:
@@ -3906,13 +4245,13 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3906 __be32 *p; 4245 __be32 *p;
3907 uint32_t namelen, type; 4246 uint32_t namelen, type;
3908 4247
3909 p = xdr_inline_decode(xdr, 32); 4248 p = xdr_inline_decode(xdr, 32); /* read 32 bytes */
3910 if (unlikely(!p)) 4249 if (unlikely(!p))
3911 goto out_overflow; 4250 goto out_overflow;
3912 p = xdr_decode_hyper(p, &offset); 4251 p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */
3913 p = xdr_decode_hyper(p, &length); 4252 p = xdr_decode_hyper(p, &length);
3914 type = be32_to_cpup(p++); 4253 type = be32_to_cpup(p++); /* 4 byte read */
3915 if (fl != NULL) { 4254 if (fl != NULL) { /* manipulate file lock */
3916 fl->fl_start = (loff_t)offset; 4255 fl->fl_start = (loff_t)offset;
3917 fl->fl_end = fl->fl_start + (loff_t)length - 1; 4256 fl->fl_end = fl->fl_start + (loff_t)length - 1;
3918 if (length == ~(uint64_t)0) 4257 if (length == ~(uint64_t)0)
@@ -3922,9 +4261,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3922 fl->fl_type = F_RDLCK; 4261 fl->fl_type = F_RDLCK;
3923 fl->fl_pid = 0; 4262 fl->fl_pid = 0;
3924 } 4263 }
3925 p = xdr_decode_hyper(p, &clientid); 4264 p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
3926 namelen = be32_to_cpup(p); 4265 namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */
3927 p = xdr_inline_decode(xdr, namelen); 4266 p = xdr_inline_decode(xdr, namelen); /* variable size field */
3928 if (likely(p)) 4267 if (likely(p))
3929 return -NFS4ERR_DENIED; 4268 return -NFS4ERR_DENIED;
3930out_overflow: 4269out_overflow:
@@ -3973,6 +4312,11 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
3973 return status; 4312 return status;
3974} 4313}
3975 4314
4315static int decode_release_lockowner(struct xdr_stream *xdr)
4316{
4317 return decode_op_hdr(xdr, OP_RELEASE_LOCKOWNER);
4318}
4319
3976static int decode_lookup(struct xdr_stream *xdr) 4320static int decode_lookup(struct xdr_stream *xdr)
3977{ 4321{
3978 return decode_op_hdr(xdr, OP_LOOKUP); 4322 return decode_op_hdr(xdr, OP_LOOKUP);
@@ -4151,12 +4495,9 @@ out_overflow:
4151static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) 4495static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
4152{ 4496{
4153 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 4497 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
4154 struct page *page = *rcvbuf->pages;
4155 struct kvec *iov = rcvbuf->head; 4498 struct kvec *iov = rcvbuf->head;
4156 size_t hdrlen; 4499 size_t hdrlen;
4157 u32 recvd, pglen = rcvbuf->page_len; 4500 u32 recvd, pglen = rcvbuf->page_len;
4158 __be32 *end, *entry, *p, *kaddr;
4159 unsigned int nr = 0;
4160 int status; 4501 int status;
4161 4502
4162 status = decode_op_hdr(xdr, OP_READDIR); 4503 status = decode_op_hdr(xdr, OP_READDIR);
@@ -4176,71 +4517,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
4176 pglen = recvd; 4517 pglen = recvd;
4177 xdr_read_pages(xdr, pglen); 4518 xdr_read_pages(xdr, pglen);
4178 4519
4179 BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); 4520
4180 kaddr = p = kmap_atomic(page, KM_USER0);
4181 end = p + ((pglen + readdir->pgbase) >> 2);
4182 entry = p;
4183
4184 /* Make sure the packet actually has a value_follows and EOF entry */
4185 if ((entry + 1) > end)
4186 goto short_pkt;
4187
4188 for (; *p++; nr++) {
4189 u32 len, attrlen, xlen;
4190 if (end - p < 3)
4191 goto short_pkt;
4192 dprintk("cookie = %Lu, ", *((unsigned long long *)p));
4193 p += 2; /* cookie */
4194 len = ntohl(*p++); /* filename length */
4195 if (len > NFS4_MAXNAMLEN) {
4196 dprintk("NFS: giant filename in readdir (len 0x%x)\n",
4197 len);
4198 goto err_unmap;
4199 }
4200 xlen = XDR_QUADLEN(len);
4201 if (end - p < xlen + 1)
4202 goto short_pkt;
4203 dprintk("filename = %*s\n", len, (char *)p);
4204 p += xlen;
4205 len = ntohl(*p++); /* bitmap length */
4206 if (end - p < len + 1)
4207 goto short_pkt;
4208 p += len;
4209 attrlen = XDR_QUADLEN(ntohl(*p++));
4210 if (end - p < attrlen + 2)
4211 goto short_pkt;
4212 p += attrlen; /* attributes */
4213 entry = p;
4214 }
4215 /*
4216 * Apparently some server sends responses that are a valid size, but
4217 * contain no entries, and have value_follows==0 and EOF==0. For
4218 * those, just set the EOF marker.
4219 */
4220 if (!nr && entry[1] == 0) {
4221 dprintk("NFS: readdir reply truncated!\n");
4222 entry[1] = 1;
4223 }
4224out:
4225 kunmap_atomic(kaddr, KM_USER0);
4226 return 0; 4521 return 0;
4227short_pkt:
4228 /*
4229 * When we get a short packet there are 2 possibilities. We can
4230 * return an error, or fix up the response to look like a valid
4231 * response and return what we have so far. If there are no
4232 * entries and the packet was short, then return -EIO. If there
4233 * are valid entries in the response, return them and pretend that
4234 * the call was successful, but incomplete. The caller can retry the
4235 * readdir starting at the last cookie.
4236 */
4237 dprintk("%s: short packet at entry %d\n", __func__, nr);
4238 entry[0] = entry[1] = 0;
4239 if (nr)
4240 goto out;
4241err_unmap:
4242 kunmap_atomic(kaddr, KM_USER0);
4243 return -errno_NFSERR_IO;
4244} 4522}
4245 4523
4246static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) 4524static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
@@ -4250,7 +4528,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4250 size_t hdrlen; 4528 size_t hdrlen;
4251 u32 len, recvd; 4529 u32 len, recvd;
4252 __be32 *p; 4530 __be32 *p;
4253 char *kaddr;
4254 int status; 4531 int status;
4255 4532
4256 status = decode_op_hdr(xdr, OP_READLINK); 4533 status = decode_op_hdr(xdr, OP_READLINK);
@@ -4281,9 +4558,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4281 * and and null-terminate the text (the VFS expects 4558 * and and null-terminate the text (the VFS expects
4282 * null-termination). 4559 * null-termination).
4283 */ 4560 */
4284 kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); 4561 xdr_terminate_string(rcvbuf, len);
4285 kaddr[len+rcvbuf->page_base] = '\0';
4286 kunmap_atomic(kaddr, KM_USER0);
4287 return 0; 4562 return 0;
4288out_overflow: 4563out_overflow:
4289 print_overflow_msg(__func__, xdr); 4564 print_overflow_msg(__func__, xdr);
@@ -4619,7 +4894,6 @@ static int decode_sequence(struct xdr_stream *xdr,
4619 struct rpc_rqst *rqstp) 4894 struct rpc_rqst *rqstp)
4620{ 4895{
4621#if defined(CONFIG_NFS_V4_1) 4896#if defined(CONFIG_NFS_V4_1)
4622 struct nfs4_slot *slot;
4623 struct nfs4_sessionid id; 4897 struct nfs4_sessionid id;
4624 u32 dummy; 4898 u32 dummy;
4625 int status; 4899 int status;
@@ -4651,15 +4925,14 @@ static int decode_sequence(struct xdr_stream *xdr,
4651 goto out_overflow; 4925 goto out_overflow;
4652 4926
4653 /* seqid */ 4927 /* seqid */
4654 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4655 dummy = be32_to_cpup(p++); 4928 dummy = be32_to_cpup(p++);
4656 if (dummy != slot->seq_nr) { 4929 if (dummy != res->sr_slot->seq_nr) {
4657 dprintk("%s Invalid sequence number\n", __func__); 4930 dprintk("%s Invalid sequence number\n", __func__);
4658 goto out_err; 4931 goto out_err;
4659 } 4932 }
4660 /* slot id */ 4933 /* slot id */
4661 dummy = be32_to_cpup(p++); 4934 dummy = be32_to_cpup(p++);
4662 if (dummy != res->sr_slotid) { 4935 if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) {
4663 dprintk("%s Invalid slot id\n", __func__); 4936 dprintk("%s Invalid slot id\n", __func__);
4664 goto out_err; 4937 goto out_err;
4665 } 4938 }
@@ -4682,6 +4955,134 @@ out_overflow:
4682#endif /* CONFIG_NFS_V4_1 */ 4955#endif /* CONFIG_NFS_V4_1 */
4683} 4956}
4684 4957
4958#if defined(CONFIG_NFS_V4_1)
4959
4960static int decode_getdeviceinfo(struct xdr_stream *xdr,
4961 struct pnfs_device *pdev)
4962{
4963 __be32 *p;
4964 uint32_t len, type;
4965 int status;
4966
4967 status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
4968 if (status) {
4969 if (status == -ETOOSMALL) {
4970 p = xdr_inline_decode(xdr, 4);
4971 if (unlikely(!p))
4972 goto out_overflow;
4973 pdev->mincount = be32_to_cpup(p);
4974 dprintk("%s: Min count too small. mincnt = %u\n",
4975 __func__, pdev->mincount);
4976 }
4977 return status;
4978 }
4979
4980 p = xdr_inline_decode(xdr, 8);
4981 if (unlikely(!p))
4982 goto out_overflow;
4983 type = be32_to_cpup(p++);
4984 if (type != pdev->layout_type) {
4985 dprintk("%s: layout mismatch req: %u pdev: %u\n",
4986 __func__, pdev->layout_type, type);
4987 return -EINVAL;
4988 }
4989 /*
4990 * Get the length of the opaque device_addr4. xdr_read_pages places
4991 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
4992 * and places the remaining xdr data in xdr_buf->tail
4993 */
4994 pdev->mincount = be32_to_cpup(p);
4995 xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
4996
4997 /* Parse notification bitmap, verifying that it is zero. */
4998 p = xdr_inline_decode(xdr, 4);
4999 if (unlikely(!p))
5000 goto out_overflow;
5001 len = be32_to_cpup(p);
5002 if (len) {
5003 int i;
5004
5005 p = xdr_inline_decode(xdr, 4 * len);
5006 if (unlikely(!p))
5007 goto out_overflow;
5008 for (i = 0; i < len; i++, p++) {
5009 if (be32_to_cpup(p)) {
5010 dprintk("%s: notifications not supported\n",
5011 __func__);
5012 return -EIO;
5013 }
5014 }
5015 }
5016 return 0;
5017out_overflow:
5018 print_overflow_msg(__func__, xdr);
5019 return -EIO;
5020}
5021
5022static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5023 struct nfs4_layoutget_res *res)
5024{
5025 __be32 *p;
5026 int status;
5027 u32 layout_count;
5028
5029 status = decode_op_hdr(xdr, OP_LAYOUTGET);
5030 if (status)
5031 return status;
5032 p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
5033 if (unlikely(!p))
5034 goto out_overflow;
5035 res->return_on_close = be32_to_cpup(p++);
5036 p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
5037 layout_count = be32_to_cpup(p);
5038 if (!layout_count) {
5039 dprintk("%s: server responded with empty layout array\n",
5040 __func__);
5041 return -EINVAL;
5042 }
5043
5044 p = xdr_inline_decode(xdr, 24);
5045 if (unlikely(!p))
5046 goto out_overflow;
5047 p = xdr_decode_hyper(p, &res->range.offset);
5048 p = xdr_decode_hyper(p, &res->range.length);
5049 res->range.iomode = be32_to_cpup(p++);
5050 res->type = be32_to_cpup(p++);
5051
5052 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
5053 if (unlikely(status))
5054 return status;
5055
5056 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
5057 __func__,
5058 (unsigned long)res->range.offset,
5059 (unsigned long)res->range.length,
5060 res->range.iomode,
5061 res->type,
5062 res->layout.len);
5063
5064 /* nfs4_proc_layoutget allocated a single page */
5065 if (res->layout.len > PAGE_SIZE)
5066 return -ENOMEM;
5067 memcpy(res->layout.buf, p, res->layout.len);
5068
5069 if (layout_count > 1) {
5070 /* We only handle a length one array at the moment. Any
5071 * further entries are just ignored. Note that this means
5072 * the client may see a response that is less than the
5073 * minimum it requested.
5074 */
5075 dprintk("%s: server responded with %d layouts, dropping tail\n",
5076 __func__, layout_count);
5077 }
5078
5079 return 0;
5080out_overflow:
5081 print_overflow_msg(__func__, xdr);
5082 return -EIO;
5083}
5084#endif /* CONFIG_NFS_V4_1 */
5085
4685/* 5086/*
4686 * END OF "GENERIC" DECODE ROUTINES. 5087 * END OF "GENERIC" DECODE ROUTINES.
4687 */ 5088 */
@@ -4824,7 +5225,7 @@ out:
4824/* 5225/*
4825 * Decode RENAME response 5226 * Decode RENAME response
4826 */ 5227 */
4827static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res) 5228static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res)
4828{ 5229{
4829 struct xdr_stream xdr; 5230 struct xdr_stream xdr;
4830 struct compound_hdr hdr; 5231 struct compound_hdr hdr;
@@ -5259,6 +5660,19 @@ out:
5259 return status; 5660 return status;
5260} 5661}
5261 5662
5663static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
5664{
5665 struct xdr_stream xdr;
5666 struct compound_hdr hdr;
5667 int status;
5668
5669 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
5670 status = decode_compound_hdr(&xdr, &hdr);
5671 if (!status)
5672 status = decode_release_lockowner(&xdr);
5673 return status;
5674}
5675
5262/* 5676/*
5263 * Decode READLINK response 5677 * Decode READLINK response
5264 */ 5678 */
@@ -5696,25 +6110,84 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
5696 status = decode_reclaim_complete(&xdr, (void *)NULL); 6110 status = decode_reclaim_complete(&xdr, (void *)NULL);
5697 return status; 6111 return status;
5698} 6112}
6113
6114/*
6115 * Decode GETDEVINFO response
6116 */
6117static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
6118 struct nfs4_getdeviceinfo_res *res)
6119{
6120 struct xdr_stream xdr;
6121 struct compound_hdr hdr;
6122 int status;
6123
6124 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6125 status = decode_compound_hdr(&xdr, &hdr);
6126 if (status != 0)
6127 goto out;
6128 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6129 if (status != 0)
6130 goto out;
6131 status = decode_getdeviceinfo(&xdr, res->pdev);
6132out:
6133 return status;
6134}
6135
6136/*
6137 * Decode LAYOUTGET response
6138 */
6139static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
6140 struct nfs4_layoutget_res *res)
6141{
6142 struct xdr_stream xdr;
6143 struct compound_hdr hdr;
6144 int status;
6145
6146 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6147 status = decode_compound_hdr(&xdr, &hdr);
6148 if (status)
6149 goto out;
6150 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6151 if (status)
6152 goto out;
6153 status = decode_putfh(&xdr);
6154 if (status)
6155 goto out;
6156 status = decode_layoutget(&xdr, rqstp, res);
6157out:
6158 return status;
6159}
5699#endif /* CONFIG_NFS_V4_1 */ 6160#endif /* CONFIG_NFS_V4_1 */
5700 6161
5701__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 6162__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6163 struct nfs_server *server, int plus)
5702{ 6164{
5703 uint32_t bitmap[2] = {0}; 6165 uint32_t bitmap[2] = {0};
5704 uint32_t len; 6166 uint32_t len;
5705 6167 __be32 *p = xdr_inline_decode(xdr, 4);
5706 if (!*p++) { 6168 if (unlikely(!p))
5707 if (!*p) 6169 goto out_overflow;
6170 if (!ntohl(*p++)) {
6171 p = xdr_inline_decode(xdr, 4);
6172 if (unlikely(!p))
6173 goto out_overflow;
6174 if (!ntohl(*p++))
5708 return ERR_PTR(-EAGAIN); 6175 return ERR_PTR(-EAGAIN);
5709 entry->eof = 1; 6176 entry->eof = 1;
5710 return ERR_PTR(-EBADCOOKIE); 6177 return ERR_PTR(-EBADCOOKIE);
5711 } 6178 }
5712 6179
6180 p = xdr_inline_decode(xdr, 12);
6181 if (unlikely(!p))
6182 goto out_overflow;
5713 entry->prev_cookie = entry->cookie; 6183 entry->prev_cookie = entry->cookie;
5714 p = xdr_decode_hyper(p, &entry->cookie); 6184 p = xdr_decode_hyper(p, &entry->cookie);
5715 entry->len = ntohl(*p++); 6185 entry->len = ntohl(*p++);
6186
6187 p = xdr_inline_decode(xdr, entry->len);
6188 if (unlikely(!p))
6189 goto out_overflow;
5716 entry->name = (const char *) p; 6190 entry->name = (const char *) p;
5717 p += XDR_QUADLEN(entry->len);
5718 6191
5719 /* 6192 /*
5720 * In case the server doesn't return an inode number, 6193 * In case the server doesn't return an inode number,
@@ -5722,32 +6195,33 @@ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
5722 * since glibc seems to choke on it...) 6195 * since glibc seems to choke on it...)
5723 */ 6196 */
5724 entry->ino = 1; 6197 entry->ino = 1;
6198 entry->fattr->valid = 0;
5725 6199
5726 len = ntohl(*p++); /* bitmap length */ 6200 if (decode_attr_bitmap(xdr, bitmap) < 0)
5727 if (len-- > 0) { 6201 goto out_overflow;
5728 bitmap[0] = ntohl(*p++); 6202
5729 if (len-- > 0) { 6203 if (decode_attr_length(xdr, &len, &p) < 0)
5730 bitmap[1] = ntohl(*p++); 6204 goto out_overflow;
5731 p += len; 6205
5732 } 6206 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0)
5733 } 6207 goto out_overflow;
5734 len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ 6208 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
5735 if (len > 0) { 6209 entry->ino = entry->fattr->fileid;
5736 if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { 6210
5737 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; 6211 if (verify_attr_len(xdr, p, len) < 0)
5738 /* Ignore the return value of rdattr_error for now */ 6212 goto out_overflow;
5739 p++; 6213
5740 len--; 6214 p = xdr_inline_peek(xdr, 8);
5741 } 6215 if (p != NULL)
5742 if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) 6216 entry->eof = !p[0] && p[1];
5743 xdr_decode_hyper(p, &entry->ino); 6217 else
5744 else if (bitmap[0] == FATTR4_WORD0_FILEID) 6218 entry->eof = 0;
5745 xdr_decode_hyper(p, &entry->ino);
5746 p += len;
5747 }
5748 6219
5749 entry->eof = !p[0] && p[1];
5750 return p; 6220 return p;
6221
6222out_overflow:
6223 print_overflow_msg(__func__, xdr);
6224 return ERR_PTR(-EIO);
5751} 6225}
5752 6226
5753/* 6227/*
@@ -5866,6 +6340,7 @@ struct rpc_procinfo nfs4_procedures[] = {
5866 PROC(GETACL, enc_getacl, dec_getacl), 6340 PROC(GETACL, enc_getacl, dec_getacl),
5867 PROC(SETACL, enc_setacl, dec_setacl), 6341 PROC(SETACL, enc_setacl, dec_setacl),
5868 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), 6342 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
6343 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
5869#if defined(CONFIG_NFS_V4_1) 6344#if defined(CONFIG_NFS_V4_1)
5870 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), 6345 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
5871 PROC(CREATE_SESSION, enc_create_session, dec_create_session), 6346 PROC(CREATE_SESSION, enc_create_session, dec_create_session),
@@ -5873,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = {
5873 PROC(SEQUENCE, enc_sequence, dec_sequence), 6348 PROC(SEQUENCE, enc_sequence, dec_sequence),
5874 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 6349 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
5875 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6350 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6351 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6352 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
5876#endif /* CONFIG_NFS_V4_1 */ 6353#endif /* CONFIG_NFS_V4_1 */
5877}; 6354};
5878 6355
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index df101d9f546a..903908a20023 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -3,9 +3,10 @@
3 * 3 *
4 * Allow an NFS filesystem to be mounted as root. The way this works is: 4 * Allow an NFS filesystem to be mounted as root. The way this works is:
5 * (1) Use the IP autoconfig mechanism to set local IP addresses and routes. 5 * (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
6 * (2) Handle RPC negotiation with the system which replied to RARP or 6 * (2) Construct the device string and the options string using DHCP
7 * was reported as a boot server by BOOTP or manually. 7 * option 17 and/or kernel command line options.
8 * (3) The actual mounting is done later, when init() is running. 8 * (3) When mount_root() sets up the root file system, pass these strings
9 * to the NFS client's regular mount interface via sys_mount().
9 * 10 *
10 * 11 *
11 * Changes: 12 * Changes:
@@ -65,470 +66,245 @@
65 * Hua Qin : Support for mounting root file system via 66 * Hua Qin : Support for mounting root file system via
66 * NFS over TCP. 67 * NFS over TCP.
67 * Fabian Frederick: Option parser rebuilt (using parser lib) 68 * Fabian Frederick: Option parser rebuilt (using parser lib)
68*/ 69 * Chuck Lever : Use super.c's text-based mount option parsing
70 * Chuck Lever : Add "nfsrootdebug".
71 */
69 72
70#include <linux/types.h> 73#include <linux/types.h>
71#include <linux/string.h> 74#include <linux/string.h>
72#include <linux/kernel.h>
73#include <linux/time.h>
74#include <linux/fs.h>
75#include <linux/init.h> 75#include <linux/init.h>
76#include <linux/sunrpc/clnt.h>
77#include <linux/sunrpc/xprtsock.h>
78#include <linux/nfs.h> 76#include <linux/nfs.h>
79#include <linux/nfs_fs.h> 77#include <linux/nfs_fs.h>
80#include <linux/nfs_mount.h>
81#include <linux/in.h>
82#include <linux/major.h>
83#include <linux/utsname.h> 78#include <linux/utsname.h>
84#include <linux/inet.h>
85#include <linux/root_dev.h> 79#include <linux/root_dev.h>
86#include <net/ipconfig.h> 80#include <net/ipconfig.h>
87#include <linux/parser.h>
88 81
89#include "internal.h" 82#include "internal.h"
90 83
91/* Define this to allow debugging output */
92#undef NFSROOT_DEBUG
93#define NFSDBG_FACILITY NFSDBG_ROOT 84#define NFSDBG_FACILITY NFSDBG_ROOT
94 85
95/* Default port to use if server is not running a portmapper */
96#define NFS_MNT_PORT 627
97
98/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
99#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
100 88
101/* Parameters passed from the kernel command line */ 89/* Parameters passed from the kernel command line */
102static char nfs_root_name[256] __initdata = ""; 90static char nfs_root_parms[256] __initdata = "";
91
92/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = "";
103 94
104/* Address of NFS server */ 95/* Address of NFS server */
105static __be32 servaddr __initdata = 0; 96static __be32 servaddr __initdata = htonl(INADDR_NONE);
106 97
107/* Name of directory to mount */ 98/* Name of directory to mount */
108static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, }; 99static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
109
110/* NFS-related data */
111static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
112static int nfs_port __initdata = 0; /* Port to connect to for NFS */
113static int mount_port __initdata = 0; /* Mount daemon port number */
114
115
116/***************************************************************************
117
118 Parsing of options
119
120 ***************************************************************************/
121
122enum {
123 /* Options that take integer arguments */
124 Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
125 Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
126 /* Options that take no arguments */
127 Opt_soft, Opt_hard, Opt_intr,
128 Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac,
129 Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
130 Opt_acl, Opt_noacl,
131 /* Error token */
132 Opt_err
133};
134
135static const match_table_t tokens __initconst = {
136 {Opt_port, "port=%u"},
137 {Opt_rsize, "rsize=%u"},
138 {Opt_wsize, "wsize=%u"},
139 {Opt_timeo, "timeo=%u"},
140 {Opt_retrans, "retrans=%u"},
141 {Opt_acregmin, "acregmin=%u"},
142 {Opt_acregmax, "acregmax=%u"},
143 {Opt_acdirmin, "acdirmin=%u"},
144 {Opt_acdirmax, "acdirmax=%u"},
145 {Opt_soft, "soft"},
146 {Opt_hard, "hard"},
147 {Opt_intr, "intr"},
148 {Opt_nointr, "nointr"},
149 {Opt_posix, "posix"},
150 {Opt_noposix, "noposix"},
151 {Opt_cto, "cto"},
152 {Opt_nocto, "nocto"},
153 {Opt_ac, "ac"},
154 {Opt_noac, "noac"},
155 {Opt_lock, "lock"},
156 {Opt_nolock, "nolock"},
157 {Opt_v2, "nfsvers=2"},
158 {Opt_v2, "v2"},
159 {Opt_v3, "nfsvers=3"},
160 {Opt_v3, "v3"},
161 {Opt_udp, "proto=udp"},
162 {Opt_udp, "udp"},
163 {Opt_tcp, "proto=tcp"},
164 {Opt_tcp, "tcp"},
165 {Opt_acl, "acl"},
166 {Opt_noacl, "noacl"},
167 {Opt_err, NULL}
168
169};
170 100
101/* server:export path string passed to super.c */
102static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
103
104#ifdef RPC_DEBUG
171/* 105/*
172 * Parse option string. 106 * When the "nfsrootdebug" kernel command line option is specified,
107 * enable debugging messages for NFSROOT.
173 */ 108 */
174 109static int __init nfs_root_debug(char *__unused)
175static int __init root_nfs_parse(char *name, char *buf)
176{ 110{
177 111 nfs_debug |= NFSDBG_ROOT | NFSDBG_MOUNT;
178 char *p;
179 substring_t args[MAX_OPT_ARGS];
180 int option;
181
182 if (!name)
183 return 1;
184
185 /* Set the NFS remote path */
186 p = strsep(&name, ",");
187 if (p[0] != '\0' && strcmp(p, "default") != 0)
188 strlcpy(buf, p, NFS_MAXPATHLEN);
189
190 while ((p = strsep (&name, ",")) != NULL) {
191 int token;
192 if (!*p)
193 continue;
194 token = match_token(p, tokens, args);
195
196 /* %u tokens only. Beware if you add new tokens! */
197 if (token < Opt_soft && match_int(&args[0], &option))
198 return 0;
199 switch (token) {
200 case Opt_port:
201 nfs_port = option;
202 break;
203 case Opt_rsize:
204 nfs_data.rsize = option;
205 break;
206 case Opt_wsize:
207 nfs_data.wsize = option;
208 break;
209 case Opt_timeo:
210 nfs_data.timeo = option;
211 break;
212 case Opt_retrans:
213 nfs_data.retrans = option;
214 break;
215 case Opt_acregmin:
216 nfs_data.acregmin = option;
217 break;
218 case Opt_acregmax:
219 nfs_data.acregmax = option;
220 break;
221 case Opt_acdirmin:
222 nfs_data.acdirmin = option;
223 break;
224 case Opt_acdirmax:
225 nfs_data.acdirmax = option;
226 break;
227 case Opt_soft:
228 nfs_data.flags |= NFS_MOUNT_SOFT;
229 break;
230 case Opt_hard:
231 nfs_data.flags &= ~NFS_MOUNT_SOFT;
232 break;
233 case Opt_intr:
234 case Opt_nointr:
235 break;
236 case Opt_posix:
237 nfs_data.flags |= NFS_MOUNT_POSIX;
238 break;
239 case Opt_noposix:
240 nfs_data.flags &= ~NFS_MOUNT_POSIX;
241 break;
242 case Opt_cto:
243 nfs_data.flags &= ~NFS_MOUNT_NOCTO;
244 break;
245 case Opt_nocto:
246 nfs_data.flags |= NFS_MOUNT_NOCTO;
247 break;
248 case Opt_ac:
249 nfs_data.flags &= ~NFS_MOUNT_NOAC;
250 break;
251 case Opt_noac:
252 nfs_data.flags |= NFS_MOUNT_NOAC;
253 break;
254 case Opt_lock:
255 nfs_data.flags &= ~NFS_MOUNT_NONLM;
256 break;
257 case Opt_nolock:
258 nfs_data.flags |= NFS_MOUNT_NONLM;
259 break;
260 case Opt_v2:
261 nfs_data.flags &= ~NFS_MOUNT_VER3;
262 break;
263 case Opt_v3:
264 nfs_data.flags |= NFS_MOUNT_VER3;
265 break;
266 case Opt_udp:
267 nfs_data.flags &= ~NFS_MOUNT_TCP;
268 break;
269 case Opt_tcp:
270 nfs_data.flags |= NFS_MOUNT_TCP;
271 break;
272 case Opt_acl:
273 nfs_data.flags &= ~NFS_MOUNT_NOACL;
274 break;
275 case Opt_noacl:
276 nfs_data.flags |= NFS_MOUNT_NOACL;
277 break;
278 default:
279 printk(KERN_WARNING "Root-NFS: unknown "
280 "option: %s\n", p);
281 return 0;
282 }
283 }
284
285 return 1; 112 return 1;
286} 113}
287 114
115__setup("nfsrootdebug", nfs_root_debug);
116#endif
117
288/* 118/*
289 * Prepare the NFS data structure and parse all options. 119 * Parse NFS server and directory information passed on the kernel
120 * command line.
121 *
122 * nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
123 *
124 * If there is a "%s" token in the <root-dir> string, it is replaced
125 * by the ASCII-representation of the client's IP address.
290 */ 126 */
291static int __init root_nfs_name(char *name) 127static int __init nfs_root_setup(char *line)
292{ 128{
293 static char buf[NFS_MAXPATHLEN] __initdata; 129 ROOT_DEV = Root_NFS;
294 char *cp; 130
295 131 if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
296 /* Set some default values */ 132 strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms));
297 memset(&nfs_data, 0, sizeof(nfs_data)); 133 } else {
298 nfs_port = -1; 134 size_t n = strlen(line) + sizeof(NFS_ROOT) - 1;
299 nfs_data.version = NFS_MOUNT_VERSION; 135 if (n >= sizeof(nfs_root_parms))
300 nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ 136 line[sizeof(nfs_root_parms) - sizeof(NFS_ROOT) - 2] = '\0';
301 nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; 137 sprintf(nfs_root_parms, NFS_ROOT, line);
302 nfs_data.wsize = NFS_DEF_FILE_IO_SIZE;
303 nfs_data.acregmin = NFS_DEF_ACREGMIN;
304 nfs_data.acregmax = NFS_DEF_ACREGMAX;
305 nfs_data.acdirmin = NFS_DEF_ACDIRMIN;
306 nfs_data.acdirmax = NFS_DEF_ACDIRMAX;
307 strcpy(buf, NFS_ROOT);
308
309 /* Process options received from the remote server */
310 root_nfs_parse(root_server_path, buf);
311
312 /* Override them by options set on kernel command-line */
313 root_nfs_parse(name, buf);
314
315 cp = utsname()->nodename;
316 if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
317 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
318 return -1;
319 } 138 }
320 sprintf(nfs_export_path, buf, cp); 139
140 /*
141 * Extract the IP address of the NFS server containing our
142 * root file system, if one was specified.
143 *
144 * Note: root_nfs_parse_addr() removes the server-ip from
145 * nfs_root_parms, if it exists.
146 */
147 root_server_addr = root_nfs_parse_addr(nfs_root_parms);
321 148
322 return 1; 149 return 1;
323} 150}
324 151
152__setup("nfsroot=", nfs_root_setup);
325 153
326/* 154static int __init root_nfs_copy(char *dest, const char *src,
327 * Get NFS server address. 155 const size_t destlen)
328 */
329static int __init root_nfs_addr(void)
330{ 156{
331 if ((servaddr = root_server_addr) == htonl(INADDR_NONE)) { 157 if (strlcpy(dest, src, destlen) > destlen)
332 printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
333 return -1; 158 return -1;
334 } 159 return 0;
160}
335 161
336 snprintf(nfs_data.hostname, sizeof(nfs_data.hostname), 162static int __init root_nfs_cat(char *dest, const char *src,
337 "%pI4", &servaddr); 163 const size_t destlen)
164{
165 if (strlcat(dest, src, destlen) > destlen)
166 return -1;
338 return 0; 167 return 0;
339} 168}
340 169
341/* 170/*
342 * Tell the user what's going on. 171 * Parse out root export path and mount options from
172 * passed-in string @incoming.
173 *
174 * Copy the export path into @exppath.
343 */ 175 */
344#ifdef NFSROOT_DEBUG 176static int __init root_nfs_parse_options(char *incoming, char *exppath,
345static void __init root_nfs_print(void) 177 const size_t exppathlen)
346{ 178{
347 printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", 179 char *p;
348 nfs_export_path, nfs_data.hostname);
349 printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
350 nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
351 printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
352 nfs_data.acregmin, nfs_data.acregmax,
353 nfs_data.acdirmin, nfs_data.acdirmax);
354 printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n",
355 nfs_port, mount_port, nfs_data.flags);
356}
357#endif
358
359 180
360static int __init root_nfs_init(void) 181 /*
361{ 182 * Set the NFS remote path
362#ifdef NFSROOT_DEBUG 183 */
363 nfs_debug |= NFSDBG_ROOT; 184 p = strsep(&incoming, ",");
364#endif 185 if (*p != '\0' && strcmp(p, "default") != 0)
186 if (root_nfs_copy(exppath, p, exppathlen))
187 return -1;
365 188
366 /* 189 /*
367 * Decode the root directory path name and NFS options from 190 * @incoming now points to the rest of the string; if it
368 * the kernel command line. This has to go here in order to 191 * contains something, append it to our root options buffer
369 * be able to use the client IP address for the remote root
370 * directory (necessary for pure RARP booting).
371 */ 192 */
372 if (root_nfs_name(nfs_root_name) < 0 || 193 if (incoming != NULL && *incoming != '\0')
373 root_nfs_addr() < 0) 194 if (root_nfs_cat(nfs_root_options, incoming,
374 return -1; 195 sizeof(nfs_root_options)))
196 return -1;
375 197
376#ifdef NFSROOT_DEBUG 198 /*
377 root_nfs_print(); 199 * Possibly prepare for more options to be appended
378#endif 200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
379 206
380 return 0; 207 return 0;
381} 208}
382 209
383
384/* 210/*
385 * Parse NFS server and directory information passed on the kernel 211 * Decode the export directory path name and NFS options from
386 * command line. 212 * the kernel command line. This has to be done late in order to
213 * use a dynamically acquired client IP address for the remote
214 * root directory path.
215 *
216 * Returns zero if successful; otherwise -1 is returned.
387 */ 217 */
388static int __init nfs_root_setup(char *line) 218static int __init root_nfs_data(char *cmdline)
389{ 219{
390 ROOT_DEV = Root_NFS; 220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
391 if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { 221 int len, retval = -1;
392 strlcpy(nfs_root_name, line, sizeof(nfs_root_name)); 222 char *tmp = NULL;
393 } else { 223 const size_t tmplen = sizeof(nfs_export_path);
394 int n = strlen(line) + sizeof(NFS_ROOT) - 1; 224
395 if (n >= sizeof(nfs_root_name)) 225 tmp = kzalloc(tmplen, GFP_KERNEL);
396 line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0'; 226 if (tmp == NULL)
397 sprintf(nfs_root_name, NFS_ROOT, line); 227 goto out_nomem;
228 strcpy(tmp, NFS_ROOT);
229
230 if (root_server_path[0] != '\0') {
231 dprintk("Root-NFS: DHCPv4 option 17: %s\n",
232 root_server_path);
233 if (root_nfs_parse_options(root_server_path, tmp, tmplen))
234 goto out_optionstoolong;
398 } 235 }
399 root_server_addr = root_nfs_parse_addr(nfs_root_name);
400 return 1;
401}
402
403__setup("nfsroot=", nfs_root_setup);
404
405/***************************************************************************
406 236
407 Routines to actually mount the root directory 237 if (cmdline[0] != '\0') {
238 dprintk("Root-NFS: nfsroot=%s\n", cmdline);
239 if (root_nfs_parse_options(cmdline, tmp, tmplen))
240 goto out_optionstoolong;
241 }
408 242
409 ***************************************************************************/ 243 /*
244 * Append mandatory options for nfsroot so they override
245 * what has come before
246 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
248 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option,
250 sizeof(nfs_root_options)))
251 goto out_optionstoolong;
410 252
411/* 253 /*
412 * Construct sockaddr_in from address and port number. 254 * Set up nfs_root_device. For NFS mounts, this looks like
413 */ 255 *
414static inline void 256 * server:/path
415set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) 257 *
416{ 258 * At this point, utsname()->nodename contains our local
417 sin->sin_family = AF_INET; 259 * IP address or hostname, set by ipconfig. If "%s" exists
418 sin->sin_addr.s_addr = addr; 260 * in tmp, substitute the nodename, then shovel the whole
419 sin->sin_port = port; 261 * mess into nfs_root_device.
420} 262 */
263 len = snprintf(nfs_export_path, sizeof(nfs_export_path),
264 tmp, utsname()->nodename);
265 if (len > (int)sizeof(nfs_export_path))
266 goto out_devnametoolong;
267 len = snprintf(nfs_root_device, sizeof(nfs_root_device),
268 "%pI4:%s", &servaddr, nfs_export_path);
269 if (len > (int)sizeof(nfs_root_device))
270 goto out_devnametoolong;
421 271
422/* 272 retval = 0;
423 * Query server portmapper for the port of a daemon program.
424 */
425static int __init root_nfs_getport(int program, int version, int proto)
426{
427 struct sockaddr_in sin;
428 273
429 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %pI4\n", 274out:
430 program, version, &servaddr); 275 kfree(tmp);
431 set_sockaddr(&sin, servaddr, 0); 276 return retval;
432 return rpcb_getport_sync(&sin, program, version, proto); 277out_nomem:
278 printk(KERN_ERR "Root-NFS: could not allocate memory\n");
279 goto out;
280out_optionstoolong:
281 printk(KERN_ERR "Root-NFS: mount options string too long\n");
282 goto out;
283out_devnametoolong:
284 printk(KERN_ERR "Root-NFS: root device name too long.\n");
285 goto out;
433} 286}
434 287
435 288/**
436/* 289 * nfs_root_data - Return prepared 'data' for NFSROOT mount
437 * Use portmapper to find mountd and nfsd port numbers if not overriden 290 * @root_device: OUT: address of string containing NFSROOT device
438 * by the user. Use defaults if portmapper is not available. 291 * @root_data: OUT: address of string containing NFSROOT mount options
439 * XXX: Is there any nfs server with no portmapper? 292 *
293 * Returns zero and sets @root_device and @root_data if successful,
294 * otherwise -1 is returned.
440 */ 295 */
441static int __init root_nfs_ports(void) 296int __init nfs_root_data(char **root_device, char **root_data)
442{ 297{
443 int port; 298 servaddr = root_server_addr;
444 int nfsd_ver, mountd_ver; 299 if (servaddr == htonl(INADDR_NONE)) {
445 int nfsd_port, mountd_port; 300 printk(KERN_ERR "Root-NFS: no NFS server address\n");
446 int proto; 301 return -1;
447
448 if (nfs_data.flags & NFS_MOUNT_VER3) {
449 nfsd_ver = NFS3_VERSION;
450 mountd_ver = NFS_MNT3_VERSION;
451 nfsd_port = NFS_PORT;
452 mountd_port = NFS_MNT_PORT;
453 } else {
454 nfsd_ver = NFS2_VERSION;
455 mountd_ver = NFS_MNT_VERSION;
456 nfsd_port = NFS_PORT;
457 mountd_port = NFS_MNT_PORT;
458 }
459
460 proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
461
462 if (nfs_port < 0) {
463 if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
464 printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
465 "number from server, using default\n");
466 port = nfsd_port;
467 }
468 nfs_port = port;
469 dprintk("Root-NFS: Portmapper on server returned %d "
470 "as nfsd port\n", port);
471 } 302 }
472 303
473 if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) { 304 if (root_nfs_data(nfs_root_parms) < 0)
474 printk(KERN_ERR "Root-NFS: Unable to get mountd port " 305 return -1;
475 "number from server, using default\n");
476 port = mountd_port;
477 }
478 mount_port = port;
479 dprintk("Root-NFS: mountd port is %d\n", port);
480 306
307 *root_device = nfs_root_device;
308 *root_data = nfs_root_options;
481 return 0; 309 return 0;
482} 310}
483
484
485/*
486 * Get a file handle from the server for the directory which is to be
487 * mounted.
488 */
489static int __init root_nfs_get_handle(void)
490{
491 struct sockaddr_in sin;
492 unsigned int auth_flav_len = 0;
493 struct nfs_mount_request request = {
494 .sap = (struct sockaddr *)&sin,
495 .salen = sizeof(sin),
496 .dirpath = nfs_export_path,
497 .version = (nfs_data.flags & NFS_MOUNT_VER3) ?
498 NFS_MNT3_VERSION : NFS_MNT_VERSION,
499 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
500 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
501 .auth_flav_len = &auth_flav_len,
502 };
503 int status = -ENOMEM;
504
505 request.fh = nfs_alloc_fhandle();
506 if (!request.fh)
507 goto out;
508 set_sockaddr(&sin, servaddr, htons(mount_port));
509 status = nfs_mount(&request);
510 if (status < 0)
511 printk(KERN_ERR "Root-NFS: Server returned error %d "
512 "while mounting %s\n", status, nfs_export_path);
513 else {
514 nfs_data.root.size = request.fh->size;
515 memcpy(&nfs_data.root.data, request.fh->data, request.fh->size);
516 }
517 nfs_free_fhandle(request.fh);
518out:
519 return status;
520}
521
522/*
523 * Get the NFS port numbers and file handle, and return the prepared 'data'
524 * argument for mount() if everything went OK. Return NULL otherwise.
525 */
526void * __init nfs_root_data(void)
527{
528 if (root_nfs_init() < 0
529 || root_nfs_ports() < 0
530 || root_nfs_get_handle() < 0)
531 return NULL;
532 set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, htons(nfs_port));
533 return (void*)&nfs_data;
534}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a3654e57b589..137b549e63db 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -65,6 +65,13 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
65 if (req == NULL) 65 if (req == NULL)
66 return ERR_PTR(-ENOMEM); 66 return ERR_PTR(-ENOMEM);
67 67
68 /* get lock context early so we can deal with alloc failures */
69 req->wb_lock_context = nfs_get_lock_context(ctx);
70 if (req->wb_lock_context == NULL) {
71 nfs_page_free(req);
72 return ERR_PTR(-ENOMEM);
73 }
74
68 /* Initialize the request struct. Initially, we assume a 75 /* Initialize the request struct. Initially, we assume a
69 * long write-back delay. This will be adjusted in 76 * long write-back delay. This will be adjusted in
70 * update_nfs_request below if the region is not locked. */ 77 * update_nfs_request below if the region is not locked. */
@@ -141,11 +148,16 @@ void nfs_clear_request(struct nfs_page *req)
141{ 148{
142 struct page *page = req->wb_page; 149 struct page *page = req->wb_page;
143 struct nfs_open_context *ctx = req->wb_context; 150 struct nfs_open_context *ctx = req->wb_context;
151 struct nfs_lock_context *l_ctx = req->wb_lock_context;
144 152
145 if (page != NULL) { 153 if (page != NULL) {
146 page_cache_release(page); 154 page_cache_release(page);
147 req->wb_page = NULL; 155 req->wb_page = NULL;
148 } 156 }
157 if (l_ctx != NULL) {
158 nfs_put_lock_context(l_ctx);
159 req->wb_lock_context = NULL;
160 }
149 if (ctx != NULL) { 161 if (ctx != NULL) {
150 put_nfs_open_context(ctx); 162 put_nfs_open_context(ctx);
151 req->wb_context = NULL; 163 req->wb_context = NULL;
@@ -235,7 +247,7 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
235{ 247{
236 if (req->wb_context->cred != prev->wb_context->cred) 248 if (req->wb_context->cred != prev->wb_context->cred)
237 return 0; 249 return 0;
238 if (req->wb_context->lockowner != prev->wb_context->lockowner) 250 if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
239 return 0; 251 return 0;
240 if (req->wb_context->state != prev->wb_context->state) 252 if (req->wb_context->state != prev->wb_context->state)
241 return 0; 253 return 0;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
new file mode 100644
index 000000000000..db773428f95f
--- /dev/null
+++ b/fs/nfs/pnfs.c
@@ -0,0 +1,783 @@
1/*
2 * pNFS functions to call and manage layout drivers.
3 *
4 * Copyright (c) 2002 [year of first publication]
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#include <linux/nfs_fs.h>
31#include "internal.h"
32#include "pnfs.h"
33
34#define NFSDBG_FACILITY NFSDBG_PNFS
35
36/* Locking:
37 *
38 * pnfs_spinlock:
39 * protects pnfs_modules_tbl.
40 */
41static DEFINE_SPINLOCK(pnfs_spinlock);
42
43/*
44 * pnfs_modules_tbl holds all pnfs modules
45 */
46static LIST_HEAD(pnfs_modules_tbl);
47
48/* Return the registered pnfs layout driver module matching given id */
49static struct pnfs_layoutdriver_type *
50find_pnfs_driver_locked(u32 id)
51{
52 struct pnfs_layoutdriver_type *local;
53
54 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
55 if (local->id == id)
56 goto out;
57 local = NULL;
58out:
59 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
60 return local;
61}
62
63static struct pnfs_layoutdriver_type *
64find_pnfs_driver(u32 id)
65{
66 struct pnfs_layoutdriver_type *local;
67
68 spin_lock(&pnfs_spinlock);
69 local = find_pnfs_driver_locked(id);
70 spin_unlock(&pnfs_spinlock);
71 return local;
72}
73
74void
75unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{
77 if (nfss->pnfs_curr_ld) {
78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 module_put(nfss->pnfs_curr_ld->owner);
80 }
81 nfss->pnfs_curr_ld = NULL;
82}
83
84/*
85 * Try to set the server's pnfs module to the pnfs layout type specified by id.
86 * Currently only one pNFS layout driver per filesystem is supported.
87 *
88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
89 */
90void
91set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
92{
93 struct pnfs_layoutdriver_type *ld_type = NULL;
94
95 if (id == 0)
96 goto out_no_driver;
97 if (!(server->nfs_client->cl_exchange_flags &
98 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
99 printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
100 id, server->nfs_client->cl_exchange_flags);
101 goto out_no_driver;
102 }
103 ld_type = find_pnfs_driver(id);
104 if (!ld_type) {
105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
106 ld_type = find_pnfs_driver(id);
107 if (!ld_type) {
108 dprintk("%s: No pNFS module found for %u.\n",
109 __func__, id);
110 goto out_no_driver;
111 }
112 }
113 if (!try_module_get(ld_type->owner)) {
114 dprintk("%s: Could not grab reference on module\n", __func__);
115 goto out_no_driver;
116 }
117 server->pnfs_curr_ld = ld_type;
118 if (ld_type->set_layoutdriver(server)) {
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
125 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return;
127
128out_no_driver:
129 dprintk("%s: Using NFSv4 I/O\n", __func__);
130 server->pnfs_curr_ld = NULL;
131}
132
133int
134pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
135{
136 int status = -EINVAL;
137 struct pnfs_layoutdriver_type *tmp;
138
139 if (ld_type->id == 0) {
140 printk(KERN_ERR "%s id 0 is reserved\n", __func__);
141 return status;
142 }
143 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
144 printk(KERN_ERR "%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__);
146 return status;
147 }
148
149 spin_lock(&pnfs_spinlock);
150 tmp = find_pnfs_driver_locked(ld_type->id);
151 if (!tmp) {
152 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
153 status = 0;
154 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
155 ld_type->name);
156 } else {
157 printk(KERN_ERR "%s Module with id %d already loaded!\n",
158 __func__, ld_type->id);
159 }
160 spin_unlock(&pnfs_spinlock);
161
162 return status;
163}
164EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
165
166void
167pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
168{
169 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
170 spin_lock(&pnfs_spinlock);
171 list_del(&ld_type->pnfs_tblid);
172 spin_unlock(&pnfs_spinlock);
173}
174EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
175
176/*
177 * pNFS client layout cache
178 */
179
180static void
181get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
182{
183 assert_spin_locked(&lo->inode->i_lock);
184 lo->refcount++;
185}
186
187static void
188put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
189{
190 assert_spin_locked(&lo->inode->i_lock);
191 BUG_ON(lo->refcount == 0);
192
193 lo->refcount--;
194 if (!lo->refcount) {
195 dprintk("%s: freeing layout cache %p\n", __func__, lo);
196 BUG_ON(!list_empty(&lo->layouts));
197 NFS_I(lo->inode)->layout = NULL;
198 kfree(lo);
199 }
200}
201
202void
203put_layout_hdr(struct inode *inode)
204{
205 spin_lock(&inode->i_lock);
206 put_layout_hdr_locked(NFS_I(inode)->layout);
207 spin_unlock(&inode->i_lock);
208}
209
210static void
211init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
212{
213 INIT_LIST_HEAD(&lseg->fi_list);
214 kref_init(&lseg->kref);
215 lseg->layout = lo;
216}
217
218/* Called without i_lock held, as the free_lseg call may sleep */
219static void
220destroy_lseg(struct kref *kref)
221{
222 struct pnfs_layout_segment *lseg =
223 container_of(kref, struct pnfs_layout_segment, kref);
224 struct inode *ino = lseg->layout->inode;
225
226 dprintk("--> %s\n", __func__);
227 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
228 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
229 put_layout_hdr(ino);
230}
231
232static void
233put_lseg(struct pnfs_layout_segment *lseg)
234{
235 if (!lseg)
236 return;
237
238 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
239 atomic_read(&lseg->kref.refcount));
240 kref_put(&lseg->kref, destroy_lseg);
241}
242
243static void
244pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
245{
246 struct pnfs_layout_segment *lseg, *next;
247 struct nfs_client *clp;
248
249 dprintk("%s:Begin lo %p\n", __func__, lo);
250
251 assert_spin_locked(&lo->inode->i_lock);
252 list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
253 dprintk("%s: freeing lseg %p\n", __func__, lseg);
254 list_move(&lseg->fi_list, tmp_list);
255 }
256 clp = NFS_SERVER(lo->inode)->nfs_client;
257 spin_lock(&clp->cl_lock);
258 /* List does not take a reference, so no need for put here */
259 list_del_init(&lo->layouts);
260 spin_unlock(&clp->cl_lock);
261 write_seqlock(&lo->seqlock);
262 clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 write_sequnlock(&lo->seqlock);
264
265 dprintk("%s:Return\n", __func__);
266}
267
268static void
269pnfs_free_lseg_list(struct list_head *tmp_list)
270{
271 struct pnfs_layout_segment *lseg;
272
273 while (!list_empty(tmp_list)) {
274 lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
275 fi_list);
276 dprintk("%s calling put_lseg on %p\n", __func__, lseg);
277 list_del(&lseg->fi_list);
278 put_lseg(lseg);
279 }
280}
281
282void
283pnfs_destroy_layout(struct nfs_inode *nfsi)
284{
285 struct pnfs_layout_hdr *lo;
286 LIST_HEAD(tmp_list);
287
288 spin_lock(&nfsi->vfs_inode.i_lock);
289 lo = nfsi->layout;
290 if (lo) {
291 pnfs_clear_lseg_list(lo, &tmp_list);
292 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
293 put_layout_hdr_locked(lo);
294 }
295 spin_unlock(&nfsi->vfs_inode.i_lock);
296 pnfs_free_lseg_list(&tmp_list);
297}
298
299/*
300 * Called by the state manger to remove all layouts established under an
301 * expired lease.
302 */
303void
304pnfs_destroy_all_layouts(struct nfs_client *clp)
305{
306 struct pnfs_layout_hdr *lo;
307 LIST_HEAD(tmp_list);
308
309 spin_lock(&clp->cl_lock);
310 list_splice_init(&clp->cl_layouts, &tmp_list);
311 spin_unlock(&clp->cl_lock);
312
313 while (!list_empty(&tmp_list)) {
314 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
315 layouts);
316 dprintk("%s freeing layout for inode %lu\n", __func__,
317 lo->inode->i_ino);
318 pnfs_destroy_layout(NFS_I(lo->inode));
319 }
320}
321
322/* update lo->stateid with new if is more recent
323 *
324 * lo->stateid could be the open stateid, in which case we just use what given.
325 */
326static void
327pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
328 const nfs4_stateid *new)
329{
330 nfs4_stateid *old = &lo->stateid;
331 bool overwrite = false;
332
333 write_seqlock(&lo->seqlock);
334 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
335 memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
336 overwrite = true;
337 else {
338 u32 oldseq, newseq;
339
340 oldseq = be32_to_cpu(old->stateid.seqid);
341 newseq = be32_to_cpu(new->stateid.seqid);
342 if ((int)(newseq - oldseq) > 0)
343 overwrite = true;
344 }
345 if (overwrite)
346 memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 write_sequnlock(&lo->seqlock);
348}
349
350static void
351pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
352 struct nfs4_state *state)
353{
354 int seq;
355
356 dprintk("--> %s\n", __func__);
357 write_seqlock(&lo->seqlock);
358 do {
359 seq = read_seqbegin(&state->seqlock);
360 memcpy(lo->stateid.data, state->stateid.data,
361 sizeof(state->stateid.data));
362 } while (read_seqretry(&state->seqlock, seq));
363 set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 write_sequnlock(&lo->seqlock);
365 dprintk("<-- %s\n", __func__);
366}
367
368void
369pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 struct nfs4_state *open_state)
371{
372 int seq;
373
374 dprintk("--> %s\n", __func__);
375 do {
376 seq = read_seqbegin(&lo->seqlock);
377 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
378 /* This will trigger retry of the read */
379 pnfs_layout_from_open_stateid(lo, open_state);
380 } else
381 memcpy(dst->data, lo->stateid.data,
382 sizeof(lo->stateid.data));
383 } while (read_seqretry(&lo->seqlock, seq));
384 dprintk("<-- %s\n", __func__);
385}
386
387/*
388* Get layout from server.
389* for now, assume that whole file layouts are requested.
390* arg->offset: 0
391* arg->length: all ones
392*/
393static struct pnfs_layout_segment *
394send_layoutget(struct pnfs_layout_hdr *lo,
395 struct nfs_open_context *ctx,
396 u32 iomode)
397{
398 struct inode *ino = lo->inode;
399 struct nfs_server *server = NFS_SERVER(ino);
400 struct nfs4_layoutget *lgp;
401 struct pnfs_layout_segment *lseg = NULL;
402
403 dprintk("--> %s\n", __func__);
404
405 BUG_ON(ctx == NULL);
406 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 if (lgp == NULL) {
408 put_layout_hdr(lo->inode);
409 return NULL;
410 }
411 lgp->args.minlength = NFS4_MAX_UINT64;
412 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 lgp->args.range.iomode = iomode;
414 lgp->args.range.offset = 0;
415 lgp->args.range.length = NFS4_MAX_UINT64;
416 lgp->args.type = server->pnfs_curr_ld->id;
417 lgp->args.inode = ino;
418 lgp->args.ctx = get_nfs_open_context(ctx);
419 lgp->lsegpp = &lseg;
420
421 /* Synchronously retrieve layout information from server and
422 * store in lseg.
423 */
424 nfs4_proc_layoutget(lgp);
425 if (!lseg) {
426 /* remember that LAYOUTGET failed and suspend trying */
427 set_bit(lo_fail_bit(iomode), &lo->state);
428 }
429 return lseg;
430}
431
432/*
433 * Compare two layout segments for sorting into layout cache.
434 * We want to preferentially return RW over RO layouts, so ensure those
435 * are seen first.
436 */
437static s64
438cmp_layout(u32 iomode1, u32 iomode2)
439{
440 /* read > read/write */
441 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
442}
443
444static void
445pnfs_insert_layout(struct pnfs_layout_hdr *lo,
446 struct pnfs_layout_segment *lseg)
447{
448 struct pnfs_layout_segment *lp;
449 int found = 0;
450
451 dprintk("%s:Begin\n", __func__);
452
453 assert_spin_locked(&lo->inode->i_lock);
454 if (list_empty(&lo->segs)) {
455 struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
456
457 spin_lock(&clp->cl_lock);
458 BUG_ON(!list_empty(&lo->layouts));
459 list_add_tail(&lo->layouts, &clp->cl_layouts);
460 spin_unlock(&clp->cl_lock);
461 }
462 list_for_each_entry(lp, &lo->segs, fi_list) {
463 if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
464 continue;
465 list_add_tail(&lseg->fi_list, &lp->fi_list);
466 dprintk("%s: inserted lseg %p "
467 "iomode %d offset %llu length %llu before "
468 "lp %p iomode %d offset %llu length %llu\n",
469 __func__, lseg, lseg->range.iomode,
470 lseg->range.offset, lseg->range.length,
471 lp, lp->range.iomode, lp->range.offset,
472 lp->range.length);
473 found = 1;
474 break;
475 }
476 if (!found) {
477 list_add_tail(&lseg->fi_list, &lo->segs);
478 dprintk("%s: inserted lseg %p "
479 "iomode %d offset %llu length %llu at tail\n",
480 __func__, lseg, lseg->range.iomode,
481 lseg->range.offset, lseg->range.length);
482 }
483 get_layout_hdr_locked(lo);
484
485 dprintk("%s:Return\n", __func__);
486}
487
488static struct pnfs_layout_hdr *
489alloc_init_layout_hdr(struct inode *ino)
490{
491 struct pnfs_layout_hdr *lo;
492
493 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
494 if (!lo)
495 return NULL;
496 lo->refcount = 1;
497 INIT_LIST_HEAD(&lo->layouts);
498 INIT_LIST_HEAD(&lo->segs);
499 seqlock_init(&lo->seqlock);
500 lo->inode = ino;
501 return lo;
502}
503
504static struct pnfs_layout_hdr *
505pnfs_find_alloc_layout(struct inode *ino)
506{
507 struct nfs_inode *nfsi = NFS_I(ino);
508 struct pnfs_layout_hdr *new = NULL;
509
510 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
511
512 assert_spin_locked(&ino->i_lock);
513 if (nfsi->layout)
514 return nfsi->layout;
515
516 spin_unlock(&ino->i_lock);
517 new = alloc_init_layout_hdr(ino);
518 spin_lock(&ino->i_lock);
519
520 if (likely(nfsi->layout == NULL)) /* Won the race? */
521 nfsi->layout = new;
522 else
523 kfree(new);
524 return nfsi->layout;
525}
526
527/*
528 * iomode matching rules:
529 * iomode lseg match
530 * ----- ----- -----
531 * ANY READ true
532 * ANY RW true
533 * RW READ false
534 * RW RW true
535 * READ READ true
536 * READ RW true
537 */
538static int
539is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540{
541 return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
542}
543
544/*
545 * lookup range in layout
546 */
547static struct pnfs_layout_segment *
548pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
549{
550 struct pnfs_layout_segment *lseg, *ret = NULL;
551
552 dprintk("%s:Begin\n", __func__);
553
554 assert_spin_locked(&lo->inode->i_lock);
555 list_for_each_entry(lseg, &lo->segs, fi_list) {
556 if (is_matching_lseg(lseg, iomode)) {
557 ret = lseg;
558 break;
559 }
560 if (cmp_layout(iomode, lseg->range.iomode) > 0)
561 break;
562 }
563
564 dprintk("%s:Return lseg %p ref %d\n",
565 __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
566 return ret;
567}
568
569/*
570 * Layout segment is retreived from the server if not cached.
571 * The appropriate layout segment is referenced and returned to the caller.
572 */
573struct pnfs_layout_segment *
574pnfs_update_layout(struct inode *ino,
575 struct nfs_open_context *ctx,
576 enum pnfs_iomode iomode)
577{
578 struct nfs_inode *nfsi = NFS_I(ino);
579 struct pnfs_layout_hdr *lo;
580 struct pnfs_layout_segment *lseg = NULL;
581
582 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
583 return NULL;
584 spin_lock(&ino->i_lock);
585 lo = pnfs_find_alloc_layout(ino);
586 if (lo == NULL) {
587 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
588 goto out_unlock;
589 }
590
591 /* Check to see if the layout for the given range already exists */
592 lseg = pnfs_has_layout(lo, iomode);
593 if (lseg) {
594 dprintk("%s: Using cached lseg %p for iomode %d)\n",
595 __func__, lseg, iomode);
596 goto out_unlock;
597 }
598
599 /* if LAYOUTGET already failed once we don't try again */
600 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
601 goto out_unlock;
602
603 get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
604 spin_unlock(&ino->i_lock);
605
606 lseg = send_layoutget(lo, ctx, iomode);
607out:
608 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
609 nfsi->layout->state, lseg);
610 return lseg;
611out_unlock:
612 spin_unlock(&ino->i_lock);
613 goto out;
614}
615
616int
617pnfs_layout_process(struct nfs4_layoutget *lgp)
618{
619 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 struct nfs4_layoutget_res *res = &lgp->res;
621 struct pnfs_layout_segment *lseg;
622 struct inode *ino = lo->inode;
623 int status = 0;
624
625 /* Inject layout blob into I/O device driver */
626 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 if (!lseg || IS_ERR(lseg)) {
628 if (!lseg)
629 status = -ENOMEM;
630 else
631 status = PTR_ERR(lseg);
632 dprintk("%s: Could not allocate layout: error %d\n",
633 __func__, status);
634 goto out;
635 }
636
637 spin_lock(&ino->i_lock);
638 init_lseg(lo, lseg);
639 lseg->range = res->range;
640 *lgp->lsegpp = lseg;
641 pnfs_insert_layout(lo, lseg);
642
643 /* Done processing layoutget. Set the layout stateid */
644 pnfs_set_layout_stateid(lo, &res->stateid);
645 spin_unlock(&ino->i_lock);
646out:
647 return status;
648}
649
650/*
651 * Device ID cache. Currently supports one layout type per struct nfs_client.
652 * Add layout type to the lookup key to expand to support multiple types.
653 */
654int
655pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
656 void (*free_callback)(struct pnfs_deviceid_node *))
657{
658 struct pnfs_deviceid_cache *c;
659
660 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
661 if (!c)
662 return -ENOMEM;
663 spin_lock(&clp->cl_lock);
664 if (clp->cl_devid_cache != NULL) {
665 atomic_inc(&clp->cl_devid_cache->dc_ref);
666 dprintk("%s [kref [%d]]\n", __func__,
667 atomic_read(&clp->cl_devid_cache->dc_ref));
668 kfree(c);
669 } else {
670 /* kzalloc initializes hlists */
671 spin_lock_init(&c->dc_lock);
672 atomic_set(&c->dc_ref, 1);
673 c->dc_free_callback = free_callback;
674 clp->cl_devid_cache = c;
675 dprintk("%s [new]\n", __func__);
676 }
677 spin_unlock(&clp->cl_lock);
678 return 0;
679}
680EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
681
682/*
683 * Called from pnfs_layoutdriver_type->free_lseg
684 * last layout segment reference frees deviceid
685 */
686void
687pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
688 struct pnfs_deviceid_node *devid)
689{
690 struct nfs4_deviceid *id = &devid->de_id;
691 struct pnfs_deviceid_node *d;
692 struct hlist_node *n;
693 long h = nfs4_deviceid_hash(id);
694
695 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
696 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
697 return;
698
699 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
700 if (!memcmp(&d->de_id, id, sizeof(*id))) {
701 hlist_del_rcu(&d->de_node);
702 spin_unlock(&c->dc_lock);
703 synchronize_rcu();
704 c->dc_free_callback(devid);
705 return;
706 }
707 spin_unlock(&c->dc_lock);
708 /* Why wasn't it found in the list? */
709 BUG();
710}
711EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
712
713/* Find and reference a deviceid */
714struct pnfs_deviceid_node *
715pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
716{
717 struct pnfs_deviceid_node *d;
718 struct hlist_node *n;
719 long hash = nfs4_deviceid_hash(id);
720
721 dprintk("--> %s hash %ld\n", __func__, hash);
722 rcu_read_lock();
723 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
724 if (!memcmp(&d->de_id, id, sizeof(*id))) {
725 if (!atomic_inc_not_zero(&d->de_ref)) {
726 goto fail;
727 } else {
728 rcu_read_unlock();
729 return d;
730 }
731 }
732 }
733fail:
734 rcu_read_unlock();
735 return NULL;
736}
737EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
738
739/*
740 * Add a deviceid to the cache.
741 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
742 */
743struct pnfs_deviceid_node *
744pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
745{
746 struct pnfs_deviceid_node *d;
747 long hash = nfs4_deviceid_hash(&new->de_id);
748
749 dprintk("--> %s hash %ld\n", __func__, hash);
750 spin_lock(&c->dc_lock);
751 d = pnfs_find_get_deviceid(c, &new->de_id);
752 if (d) {
753 spin_unlock(&c->dc_lock);
754 dprintk("%s [discard]\n", __func__);
755 c->dc_free_callback(new);
756 return d;
757 }
758 INIT_HLIST_NODE(&new->de_node);
759 atomic_set(&new->de_ref, 1);
760 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
761 spin_unlock(&c->dc_lock);
762 dprintk("%s [new]\n", __func__);
763 return new;
764}
765EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
766
767void
768pnfs_put_deviceid_cache(struct nfs_client *clp)
769{
770 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
771
772 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
773 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
774 int i;
775 /* Verify cache is empty */
776 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
777 BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
778 clp->cl_devid_cache = NULL;
779 spin_unlock(&clp->cl_lock);
780 kfree(local);
781 }
782}
783EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
new file mode 100644
index 000000000000..e12367d50489
--- /dev/null
+++ b/fs/nfs/pnfs.h
@@ -0,0 +1,189 @@
1/*
2 * pNFS client data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H
32
33struct pnfs_layout_segment {
34 struct list_head fi_list;
35 struct pnfs_layout_range range;
36 struct kref kref;
37 struct pnfs_layout_hdr *layout;
38};
39
40#ifdef CONFIG_NFS_V4_1
41
42#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
43
44enum {
45 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
46 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
47 NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
48};
49
50/* Per-layout driver specific registration structure */
51struct pnfs_layoutdriver_type {
52 struct list_head pnfs_tblid;
53 const u32 id;
54 const char *name;
55 struct module *owner;
56 int (*set_layoutdriver) (struct nfs_server *);
57 int (*clear_layoutdriver) (struct nfs_server *);
58 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
59 void (*free_lseg) (struct pnfs_layout_segment *lseg);
60};
61
62struct pnfs_layout_hdr {
63 unsigned long refcount;
64 struct list_head layouts; /* other client layouts */
65 struct list_head segs; /* layout segments list */
66 seqlock_t seqlock; /* Protects the stateid */
67 nfs4_stateid stateid;
68 unsigned long state;
69 struct inode *inode;
70};
71
72struct pnfs_device {
73 struct nfs4_deviceid dev_id;
74 unsigned int layout_type;
75 unsigned int mincount;
76 struct page **pages;
77 void *area;
78 unsigned int pgbase;
79 unsigned int pglen;
80};
81
82/*
83 * Device ID RCU cache. A device ID is unique per client ID and layout type.
84 */
85#define NFS4_DEVICE_ID_HASH_BITS 5
86#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
87#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
88
89static inline u32
90nfs4_deviceid_hash(struct nfs4_deviceid *id)
91{
92 unsigned char *cptr = (unsigned char *)id->data;
93 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
94 u32 x = 0;
95
96 while (nbytes--) {
97 x *= 37;
98 x += *cptr++;
99 }
100 return x & NFS4_DEVICE_ID_HASH_MASK;
101}
102
103struct pnfs_deviceid_node {
104 struct hlist_node de_node;
105 struct nfs4_deviceid de_id;
106 atomic_t de_ref;
107};
108
109struct pnfs_deviceid_cache {
110 spinlock_t dc_lock;
111 atomic_t dc_ref;
112 void (*dc_free_callback)(struct pnfs_deviceid_node *);
113 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
114};
115
116extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
117 void (*free_callback)(struct pnfs_deviceid_node *));
118extern void pnfs_put_deviceid_cache(struct nfs_client *);
119extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
120 struct pnfs_deviceid_cache *,
121 struct nfs4_deviceid *);
122extern struct pnfs_deviceid_node *pnfs_add_deviceid(
123 struct pnfs_deviceid_cache *,
124 struct pnfs_deviceid_node *);
125extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
126 struct pnfs_deviceid_node *devid);
127
128extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
129extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
130
131/* nfs4proc.c */
132extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
133 struct pnfs_device *dev);
134extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
135
136/* pnfs.c */
137struct pnfs_layout_segment *
138pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
139 enum pnfs_iomode access_type);
140void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
141void unset_pnfs_layoutdriver(struct nfs_server *);
142int pnfs_layout_process(struct nfs4_layoutget *lgp);
143void pnfs_destroy_layout(struct nfs_inode *);
144void pnfs_destroy_all_layouts(struct nfs_client *);
145void put_layout_hdr(struct inode *inode);
146void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
147 struct nfs4_state *open_state);
148
149
150static inline int lo_fail_bit(u32 iomode)
151{
152 return iomode == IOMODE_RW ?
153 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
154}
155
156/* Return true if a layout driver is being used for this mountpoint */
157static inline int pnfs_enabled_sb(struct nfs_server *nfss)
158{
159 return nfss->pnfs_curr_ld != NULL;
160}
161
162#else /* CONFIG_NFS_V4_1 */
163
164static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
165{
166}
167
168static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
169{
170}
171
172static inline struct pnfs_layout_segment *
173pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
174 enum pnfs_iomode access_type)
175{
176 return NULL;
177}
178
179static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
180{
181}
182
183static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
184{
185}
186
187#endif /* CONFIG_NFS_V4_1 */
188
189#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 611bec22f552..58e7f84fc1fd 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -258,7 +258,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data)
258 258
259static int 259static int
260nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 260nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
261 int flags, struct nameidata *nd) 261 int flags, struct nfs_open_context *ctx)
262{ 262{
263 struct nfs_createdata *data; 263 struct nfs_createdata *data;
264 struct rpc_message msg = { 264 struct rpc_message msg = {
@@ -365,17 +365,32 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
365 return 1; 365 return 1;
366} 366}
367 367
368static void
369nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
370{
371 msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
372}
373
374static int
375nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
376 struct inode *new_dir)
377{
378 if (nfs_async_handle_expired_key(task))
379 return 0;
380 nfs_mark_for_revalidate(old_dir);
381 nfs_mark_for_revalidate(new_dir);
382 return 1;
383}
384
368static int 385static int
369nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, 386nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
370 struct inode *new_dir, struct qstr *new_name) 387 struct inode *new_dir, struct qstr *new_name)
371{ 388{
372 struct nfs_renameargs arg = { 389 struct nfs_renameargs arg = {
373 .fromfh = NFS_FH(old_dir), 390 .old_dir = NFS_FH(old_dir),
374 .fromname = old_name->name, 391 .old_name = old_name,
375 .fromlen = old_name->len, 392 .new_dir = NFS_FH(new_dir),
376 .tofh = NFS_FH(new_dir), 393 .new_name = new_name,
377 .toname = new_name->name,
378 .tolen = new_name->len
379 }; 394 };
380 struct rpc_message msg = { 395 struct rpc_message msg = {
381 .rpc_proc = &nfs_procedures[NFSPROC_RENAME], 396 .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
@@ -519,14 +534,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
519 */ 534 */
520static int 535static int
521nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 536nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
522 u64 cookie, struct page *page, unsigned int count, int plus) 537 u64 cookie, struct page **pages, unsigned int count, int plus)
523{ 538{
524 struct inode *dir = dentry->d_inode; 539 struct inode *dir = dentry->d_inode;
525 struct nfs_readdirargs arg = { 540 struct nfs_readdirargs arg = {
526 .fh = NFS_FH(dir), 541 .fh = NFS_FH(dir),
527 .cookie = cookie, 542 .cookie = cookie,
528 .count = count, 543 .count = count,
529 .pages = &page, 544 .pages = pages,
530 }; 545 };
531 struct rpc_message msg = { 546 struct rpc_message msg = {
532 .rpc_proc = &nfs_procedures[NFSPROC_READDIR], 547 .rpc_proc = &nfs_procedures[NFSPROC_READDIR],
@@ -705,6 +720,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
705 .unlink_setup = nfs_proc_unlink_setup, 720 .unlink_setup = nfs_proc_unlink_setup,
706 .unlink_done = nfs_proc_unlink_done, 721 .unlink_done = nfs_proc_unlink_done,
707 .rename = nfs_proc_rename, 722 .rename = nfs_proc_rename,
723 .rename_setup = nfs_proc_rename_setup,
724 .rename_done = nfs_proc_rename_done,
708 .link = nfs_proc_link, 725 .link = nfs_proc_link,
709 .symlink = nfs_proc_symlink, 726 .symlink = nfs_proc_symlink,
710 .mkdir = nfs_proc_mkdir, 727 .mkdir = nfs_proc_mkdir,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6e2b06e6ca79..e4b62c6f5a6e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -25,6 +25,7 @@
25#include "internal.h" 25#include "internal.h"
26#include "iostat.h" 26#include "iostat.h"
27#include "fscache.h" 27#include "fscache.h"
28#include "pnfs.h"
28 29
29#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
30 31
@@ -46,7 +47,6 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
46 memset(p, 0, sizeof(*p)); 47 memset(p, 0, sizeof(*p));
47 INIT_LIST_HEAD(&p->pages); 48 INIT_LIST_HEAD(&p->pages);
48 p->npages = pagecount; 49 p->npages = pagecount;
49 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
50 if (pagecount <= ARRAY_SIZE(p->page_array)) 50 if (pagecount <= ARRAY_SIZE(p->page_array))
51 p->pagevec = p->page_array; 51 p->pagevec = p->page_array;
52 else { 52 else {
@@ -121,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
121 len = nfs_page_length(page); 121 len = nfs_page_length(page);
122 if (len == 0) 122 if (len == 0)
123 return nfs_return_empty_page(page); 123 return nfs_return_empty_page(page);
124 pnfs_update_layout(inode, ctx, IOMODE_READ);
124 new = nfs_create_request(ctx, inode, page, 0, len); 125 new = nfs_create_request(ctx, inode, page, 0, len);
125 if (IS_ERR(new)) { 126 if (IS_ERR(new)) {
126 unlock_page(page); 127 unlock_page(page);
@@ -190,6 +191,7 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
190 data->args.pages = data->pagevec; 191 data->args.pages = data->pagevec;
191 data->args.count = count; 192 data->args.count = count;
192 data->args.context = get_nfs_open_context(req->wb_context); 193 data->args.context = get_nfs_open_context(req->wb_context);
194 data->args.lock_context = req->wb_lock_context;
193 195
194 data->res.fattr = &data->fattr; 196 data->res.fattr = &data->fattr;
195 data->res.count = count; 197 data->res.count = count;
@@ -410,7 +412,7 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata)
410{ 412{
411 struct nfs_read_data *data = calldata; 413 struct nfs_read_data *data = calldata;
412 414
413 if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client, 415 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
414 &data->args.seq_args, &data->res.seq_res, 416 &data->args.seq_args, &data->res.seq_res,
415 0, task)) 417 0, task))
416 return; 418 return;
@@ -624,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
624 if (ret == 0) 626 if (ret == 0)
625 goto read_complete; /* all pages were read */ 627 goto read_complete; /* all pages were read */
626 628
629 pnfs_update_layout(inode, desc.ctx, IOMODE_READ);
627 if (rsize < PAGE_CACHE_SIZE) 630 if (rsize < PAGE_CACHE_SIZE)
628 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 631 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
629 else 632 else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f9df16de4a56..3600ec700d58 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -100,6 +100,7 @@ enum {
100 Opt_addr, Opt_mountaddr, Opt_clientaddr, 100 Opt_addr, Opt_mountaddr, Opt_clientaddr,
101 Opt_lookupcache, 101 Opt_lookupcache,
102 Opt_fscache_uniq, 102 Opt_fscache_uniq,
103 Opt_local_lock,
103 104
104 /* Special mount options */ 105 /* Special mount options */
105 Opt_userspace, Opt_deprecated, Opt_sloppy, 106 Opt_userspace, Opt_deprecated, Opt_sloppy,
@@ -171,6 +172,7 @@ static const match_table_t nfs_mount_option_tokens = {
171 172
172 { Opt_lookupcache, "lookupcache=%s" }, 173 { Opt_lookupcache, "lookupcache=%s" },
173 { Opt_fscache_uniq, "fsc=%s" }, 174 { Opt_fscache_uniq, "fsc=%s" },
175 { Opt_local_lock, "local_lock=%s" },
174 176
175 { Opt_err, NULL } 177 { Opt_err, NULL }
176}; 178};
@@ -236,6 +238,22 @@ static match_table_t nfs_lookupcache_tokens = {
236 { Opt_lookupcache_err, NULL } 238 { Opt_lookupcache_err, NULL }
237}; 239};
238 240
241enum {
242 Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix,
243 Opt_local_lock_none,
244
245 Opt_local_lock_err
246};
247
248static match_table_t nfs_local_lock_tokens = {
249 { Opt_local_lock_all, "all" },
250 { Opt_local_lock_flock, "flock" },
251 { Opt_local_lock_posix, "posix" },
252 { Opt_local_lock_none, "none" },
253
254 { Opt_local_lock_err, NULL }
255};
256
239 257
240static void nfs_umount_begin(struct super_block *); 258static void nfs_umount_begin(struct super_block *);
241static int nfs_statfs(struct dentry *, struct kstatfs *); 259static int nfs_statfs(struct dentry *, struct kstatfs *);
@@ -270,7 +288,7 @@ static const struct super_operations nfs_sops = {
270 .write_inode = nfs_write_inode, 288 .write_inode = nfs_write_inode,
271 .put_super = nfs_put_super, 289 .put_super = nfs_put_super,
272 .statfs = nfs_statfs, 290 .statfs = nfs_statfs,
273 .clear_inode = nfs_clear_inode, 291 .evict_inode = nfs_evict_inode,
274 .umount_begin = nfs_umount_begin, 292 .umount_begin = nfs_umount_begin,
275 .show_options = nfs_show_options, 293 .show_options = nfs_show_options,
276 .show_stats = nfs_show_stats, 294 .show_stats = nfs_show_stats,
@@ -340,7 +358,7 @@ static const struct super_operations nfs4_sops = {
340 .write_inode = nfs_write_inode, 358 .write_inode = nfs_write_inode,
341 .put_super = nfs_put_super, 359 .put_super = nfs_put_super,
342 .statfs = nfs_statfs, 360 .statfs = nfs_statfs,
343 .clear_inode = nfs4_clear_inode, 361 .evict_inode = nfs4_evict_inode,
344 .umount_begin = nfs_umount_begin, 362 .umount_begin = nfs_umount_begin,
345 .show_options = nfs_show_options, 363 .show_options = nfs_show_options,
346 .show_stats = nfs_show_stats, 364 .show_stats = nfs_show_stats,
@@ -431,7 +449,15 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
431 goto out_err; 449 goto out_err;
432 450
433 error = server->nfs_client->rpc_ops->statfs(server, fh, &res); 451 error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
452 if (unlikely(error == -ESTALE)) {
453 struct dentry *pd_dentry;
434 454
455 pd_dentry = dget_parent(dentry);
456 if (pd_dentry != NULL) {
457 nfs_zap_caches(pd_dentry->d_inode);
458 dput(pd_dentry);
459 }
460 }
435 nfs_free_fattr(res.fattr); 461 nfs_free_fattr(res.fattr);
436 if (error < 0) 462 if (error < 0)
437 goto out_err; 463 goto out_err;
@@ -546,6 +572,9 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
546{ 572{
547 struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address; 573 struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address;
548 574
575 if (nfss->flags & NFS_MOUNT_LEGACY_INTERFACE)
576 return;
577
549 switch (sap->sa_family) { 578 switch (sap->sa_family) {
550 case AF_INET: { 579 case AF_INET: {
551 struct sockaddr_in *sin = (struct sockaddr_in *)sap; 580 struct sockaddr_in *sin = (struct sockaddr_in *)sap;
@@ -611,6 +640,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
611 const struct proc_nfs_info *nfs_infop; 640 const struct proc_nfs_info *nfs_infop;
612 struct nfs_client *clp = nfss->nfs_client; 641 struct nfs_client *clp = nfss->nfs_client;
613 u32 version = clp->rpc_ops->version; 642 u32 version = clp->rpc_ops->version;
643 int local_flock, local_fcntl;
614 644
615 seq_printf(m, ",vers=%u", version); 645 seq_printf(m, ",vers=%u", version);
616 seq_printf(m, ",rsize=%u", nfss->rsize); 646 seq_printf(m, ",rsize=%u", nfss->rsize);
@@ -652,6 +682,25 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
652 682
653 if (nfss->options & NFS_OPTION_FSCACHE) 683 if (nfss->options & NFS_OPTION_FSCACHE)
654 seq_printf(m, ",fsc"); 684 seq_printf(m, ",fsc");
685
686 if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) {
687 if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
688 seq_printf(m, ",lookupcache=none");
689 else
690 seq_printf(m, ",lookupcache=pos");
691 }
692
693 local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK;
694 local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL;
695
696 if (!local_flock && !local_fcntl)
697 seq_printf(m, ",local_lock=none");
698 else if (local_flock && local_fcntl)
699 seq_printf(m, ",local_lock=all");
700 else if (local_flock)
701 seq_printf(m, ",local_lock=flock");
702 else
703 seq_printf(m, ",local_lock=posix");
655} 704}
656 705
657/* 706/*
@@ -999,9 +1048,13 @@ static int nfs_parse_mount_options(char *raw,
999 break; 1048 break;
1000 case Opt_lock: 1049 case Opt_lock:
1001 mnt->flags &= ~NFS_MOUNT_NONLM; 1050 mnt->flags &= ~NFS_MOUNT_NONLM;
1051 mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
1052 NFS_MOUNT_LOCAL_FCNTL);
1002 break; 1053 break;
1003 case Opt_nolock: 1054 case Opt_nolock:
1004 mnt->flags |= NFS_MOUNT_NONLM; 1055 mnt->flags |= NFS_MOUNT_NONLM;
1056 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
1057 NFS_MOUNT_LOCAL_FCNTL);
1005 break; 1058 break;
1006 case Opt_v2: 1059 case Opt_v2:
1007 mnt->flags &= ~NFS_MOUNT_VER3; 1060 mnt->flags &= ~NFS_MOUNT_VER3;
@@ -1402,6 +1455,34 @@ static int nfs_parse_mount_options(char *raw,
1402 mnt->fscache_uniq = string; 1455 mnt->fscache_uniq = string;
1403 mnt->options |= NFS_OPTION_FSCACHE; 1456 mnt->options |= NFS_OPTION_FSCACHE;
1404 break; 1457 break;
1458 case Opt_local_lock:
1459 string = match_strdup(args);
1460 if (string == NULL)
1461 goto out_nomem;
1462 token = match_token(string, nfs_local_lock_tokens,
1463 args);
1464 kfree(string);
1465 switch (token) {
1466 case Opt_local_lock_all:
1467 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
1468 NFS_MOUNT_LOCAL_FCNTL);
1469 break;
1470 case Opt_local_lock_flock:
1471 mnt->flags |= NFS_MOUNT_LOCAL_FLOCK;
1472 break;
1473 case Opt_local_lock_posix:
1474 mnt->flags |= NFS_MOUNT_LOCAL_FCNTL;
1475 break;
1476 case Opt_local_lock_none:
1477 mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
1478 NFS_MOUNT_LOCAL_FCNTL);
1479 break;
1480 default:
1481 dfprintk(MOUNT, "NFS: invalid "
1482 "local_lock argument\n");
1483 return 0;
1484 };
1485 break;
1405 1486
1406 /* 1487 /*
1407 * Special options 1488 * Special options
@@ -1780,6 +1861,7 @@ static int nfs_validate_mount_data(void *options,
1780 * can deal with. 1861 * can deal with.
1781 */ 1862 */
1782 args->flags = data->flags & NFS_MOUNT_FLAGMASK; 1863 args->flags = data->flags & NFS_MOUNT_FLAGMASK;
1864 args->flags |= NFS_MOUNT_LEGACY_INTERFACE;
1783 args->rsize = data->rsize; 1865 args->rsize = data->rsize;
1784 args->wsize = data->wsize; 1866 args->wsize = data->wsize;
1785 args->timeo = data->timeo; 1867 args->timeo = data->timeo;
@@ -1806,6 +1888,12 @@ static int nfs_validate_mount_data(void *options,
1806 if (!args->nfs_server.hostname) 1888 if (!args->nfs_server.hostname)
1807 goto out_nomem; 1889 goto out_nomem;
1808 1890
1891 if (!(data->flags & NFS_MOUNT_NONLM))
1892 args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
1893 NFS_MOUNT_LOCAL_FCNTL);
1894 else
1895 args->flags |= (NFS_MOUNT_LOCAL_FLOCK|
1896 NFS_MOUNT_LOCAL_FCNTL);
1809 /* 1897 /*
1810 * The legacy version 6 binary mount data from userspace has a 1898 * The legacy version 6 binary mount data from userspace has a
1811 * field used only to transport selinux information into the 1899 * field used only to transport selinux information into the
@@ -2422,7 +2510,8 @@ static void nfs4_fill_super(struct super_block *sb)
2422 2510
2423static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) 2511static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2424{ 2512{
2425 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); 2513 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3|
2514 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
2426} 2515}
2427 2516
2428static int nfs4_validate_text_mount_data(void *options, 2517static int nfs4_validate_text_mount_data(void *options,
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index ad4d2e787b20..978aaeb8a093 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -32,6 +32,7 @@ static ctl_table nfs_cb_sysctls[] = {
32 .extra1 = (int *)&nfs_set_port_min, 32 .extra1 = (int *)&nfs_set_port_min,
33 .extra2 = (int *)&nfs_set_port_max, 33 .extra2 = (int *)&nfs_set_port_max,
34 }, 34 },
35#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
35 { 36 {
36 .procname = "idmap_cache_timeout", 37 .procname = "idmap_cache_timeout",
37 .data = &nfs_idmap_cache_timeout, 38 .data = &nfs_idmap_cache_timeout,
@@ -39,6 +40,7 @@ static ctl_table nfs_cb_sysctls[] = {
39 .mode = 0644, 40 .mode = 0644,
40 .proc_handler = proc_dointvec_jiffies, 41 .proc_handler = proc_dointvec_jiffies,
41 }, 42 },
43#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
42#endif 44#endif
43 { 45 {
44 .procname = "nfs_mountpoint_timeout", 46 .procname = "nfs_mountpoint_timeout",
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a2242af6a17d..9a16bad5d2ea 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -13,9 +13,12 @@
13#include <linux/nfs_fs.h> 13#include <linux/nfs_fs.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/wait.h> 15#include <linux/wait.h>
16#include <linux/namei.h>
16 17
17#include "internal.h" 18#include "internal.h"
18#include "nfs4_fs.h" 19#include "nfs4_fs.h"
20#include "iostat.h"
21#include "delegation.h"
19 22
20struct nfs_unlinkdata { 23struct nfs_unlinkdata {
21 struct hlist_node list; 24 struct hlist_node list;
@@ -110,7 +113,7 @@ void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
110 struct nfs_unlinkdata *data = calldata; 113 struct nfs_unlinkdata *data = calldata;
111 struct nfs_server *server = NFS_SERVER(data->dir); 114 struct nfs_server *server = NFS_SERVER(data->dir);
112 115
113 if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, 116 if (nfs4_setup_sequence(server, &data->args.seq_args,
114 &data->res.seq_res, 1, task)) 117 &data->res.seq_res, 1, task))
115 return; 118 return;
116 rpc_call_start(task); 119 rpc_call_start(task);
@@ -244,7 +247,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry)
244 * @dir: parent directory of dentry 247 * @dir: parent directory of dentry
245 * @dentry: dentry to unlink 248 * @dentry: dentry to unlink
246 */ 249 */
247int 250static int
248nfs_async_unlink(struct inode *dir, struct dentry *dentry) 251nfs_async_unlink(struct inode *dir, struct dentry *dentry)
249{ 252{
250 struct nfs_unlinkdata *data; 253 struct nfs_unlinkdata *data;
@@ -259,7 +262,6 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
259 status = PTR_ERR(data->cred); 262 status = PTR_ERR(data->cred);
260 goto out_free; 263 goto out_free;
261 } 264 }
262 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
263 data->res.dir_attr = &data->dir_attr; 265 data->res.dir_attr = &data->dir_attr;
264 266
265 status = -EBUSY; 267 status = -EBUSY;
@@ -303,3 +305,256 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
303 if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) 305 if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)))
304 nfs_free_unlinkdata(data); 306 nfs_free_unlinkdata(data);
305} 307}
308
309/* Cancel a queued async unlink. Called when a sillyrename run fails. */
310static void
311nfs_cancel_async_unlink(struct dentry *dentry)
312{
313 spin_lock(&dentry->d_lock);
314 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
315 struct nfs_unlinkdata *data = dentry->d_fsdata;
316
317 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
318 spin_unlock(&dentry->d_lock);
319 nfs_free_unlinkdata(data);
320 return;
321 }
322 spin_unlock(&dentry->d_lock);
323}
324
325struct nfs_renamedata {
326 struct nfs_renameargs args;
327 struct nfs_renameres res;
328 struct rpc_cred *cred;
329 struct inode *old_dir;
330 struct dentry *old_dentry;
331 struct nfs_fattr old_fattr;
332 struct inode *new_dir;
333 struct dentry *new_dentry;
334 struct nfs_fattr new_fattr;
335};
336
337/**
338 * nfs_async_rename_done - Sillyrename post-processing
339 * @task: rpc_task of the sillyrename
340 * @calldata: nfs_renamedata for the sillyrename
341 *
342 * Do the directory attribute updates and the d_move
343 */
344static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
345{
346 struct nfs_renamedata *data = calldata;
347 struct inode *old_dir = data->old_dir;
348 struct inode *new_dir = data->new_dir;
349
350 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
351 nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
352 return;
353 }
354
355 if (task->tk_status != 0) {
356 nfs_cancel_async_unlink(data->old_dentry);
357 return;
358 }
359
360 nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir));
361 d_move(data->old_dentry, data->new_dentry);
362}
363
364/**
365 * nfs_async_rename_release - Release the sillyrename data.
366 * @calldata: the struct nfs_renamedata to be released
367 */
368static void nfs_async_rename_release(void *calldata)
369{
370 struct nfs_renamedata *data = calldata;
371 struct super_block *sb = data->old_dir->i_sb;
372
373 if (data->old_dentry->d_inode)
374 nfs_mark_for_revalidate(data->old_dentry->d_inode);
375
376 dput(data->old_dentry);
377 dput(data->new_dentry);
378 iput(data->old_dir);
379 iput(data->new_dir);
380 nfs_sb_deactive(sb);
381 put_rpccred(data->cred);
382 kfree(data);
383}
384
385#if defined(CONFIG_NFS_V4_1)
386static void nfs_rename_prepare(struct rpc_task *task, void *calldata)
387{
388 struct nfs_renamedata *data = calldata;
389 struct nfs_server *server = NFS_SERVER(data->old_dir);
390
391 if (nfs4_setup_sequence(server, &data->args.seq_args,
392 &data->res.seq_res, 1, task))
393 return;
394 rpc_call_start(task);
395}
396#endif /* CONFIG_NFS_V4_1 */
397
398static const struct rpc_call_ops nfs_rename_ops = {
399 .rpc_call_done = nfs_async_rename_done,
400 .rpc_release = nfs_async_rename_release,
401#if defined(CONFIG_NFS_V4_1)
402 .rpc_call_prepare = nfs_rename_prepare,
403#endif /* CONFIG_NFS_V4_1 */
404};
405
406/**
407 * nfs_async_rename - perform an asynchronous rename operation
408 * @old_dir: directory that currently holds the dentry to be renamed
409 * @new_dir: target directory for the rename
410 * @old_dentry: original dentry to be renamed
411 * @new_dentry: dentry to which the old_dentry should be renamed
412 *
413 * It's expected that valid references to the dentries and inodes are held
414 */
415static struct rpc_task *
416nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
417 struct dentry *old_dentry, struct dentry *new_dentry)
418{
419 struct nfs_renamedata *data;
420 struct rpc_message msg = { };
421 struct rpc_task_setup task_setup_data = {
422 .rpc_message = &msg,
423 .callback_ops = &nfs_rename_ops,
424 .workqueue = nfsiod_workqueue,
425 .rpc_client = NFS_CLIENT(old_dir),
426 .flags = RPC_TASK_ASYNC,
427 };
428
429 data = kzalloc(sizeof(*data), GFP_KERNEL);
430 if (data == NULL)
431 return ERR_PTR(-ENOMEM);
432 task_setup_data.callback_data = data,
433
434 data->cred = rpc_lookup_cred();
435 if (IS_ERR(data->cred)) {
436 struct rpc_task *task = ERR_CAST(data->cred);
437 kfree(data);
438 return task;
439 }
440
441 msg.rpc_argp = &data->args;
442 msg.rpc_resp = &data->res;
443 msg.rpc_cred = data->cred;
444
445 /* set up nfs_renamedata */
446 data->old_dir = old_dir;
447 atomic_inc(&old_dir->i_count);
448 data->new_dir = new_dir;
449 atomic_inc(&new_dir->i_count);
450 data->old_dentry = dget(old_dentry);
451 data->new_dentry = dget(new_dentry);
452 nfs_fattr_init(&data->old_fattr);
453 nfs_fattr_init(&data->new_fattr);
454
455 /* set up nfs_renameargs */
456 data->args.old_dir = NFS_FH(old_dir);
457 data->args.old_name = &old_dentry->d_name;
458 data->args.new_dir = NFS_FH(new_dir);
459 data->args.new_name = &new_dentry->d_name;
460
461 /* set up nfs_renameres */
462 data->res.old_fattr = &data->old_fattr;
463 data->res.new_fattr = &data->new_fattr;
464
465 nfs_sb_active(old_dir->i_sb);
466
467 NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir);
468
469 return rpc_run_task(&task_setup_data);
470}
471
472/**
473 * nfs_sillyrename - Perform a silly-rename of a dentry
474 * @dir: inode of directory that contains dentry
475 * @dentry: dentry to be sillyrenamed
476 *
477 * NFSv2/3 is stateless and the server doesn't know when the client is
478 * holding a file open. To prevent application problems when a file is
479 * unlinked while it's still open, the client performs a "silly-rename".
480 * That is, it renames the file to a hidden file in the same directory,
481 * and only performs the unlink once the last reference to it is put.
482 *
483 * The final cleanup is done during dentry_iput.
484 */
485int
486nfs_sillyrename(struct inode *dir, struct dentry *dentry)
487{
488 static unsigned int sillycounter;
489 const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
490 const int countersize = sizeof(sillycounter)*2;
491 const int slen = sizeof(".nfs")+fileidsize+countersize-1;
492 char silly[slen+1];
493 struct dentry *sdentry;
494 struct rpc_task *task;
495 int error = -EIO;
496
497 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
498 dentry->d_parent->d_name.name, dentry->d_name.name,
499 atomic_read(&dentry->d_count));
500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
501
502 /*
503 * We don't allow a dentry to be silly-renamed twice.
504 */
505 error = -EBUSY;
506 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
507 goto out;
508
509 sprintf(silly, ".nfs%*.*Lx",
510 fileidsize, fileidsize,
511 (unsigned long long)NFS_FILEID(dentry->d_inode));
512
513 /* Return delegation in anticipation of the rename */
514 nfs_inode_return_delegation(dentry->d_inode);
515
516 sdentry = NULL;
517 do {
518 char *suffix = silly + slen - countersize;
519
520 dput(sdentry);
521 sillycounter++;
522 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
523
524 dfprintk(VFS, "NFS: trying to rename %s to %s\n",
525 dentry->d_name.name, silly);
526
527 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
528 /*
529 * N.B. Better to return EBUSY here ... it could be
530 * dangerous to delete the file while it's in use.
531 */
532 if (IS_ERR(sdentry))
533 goto out;
534 } while (sdentry->d_inode != NULL); /* need negative lookup */
535
536 /* queue unlink first. Can't do this from rpc_release as it
537 * has to allocate memory
538 */
539 error = nfs_async_unlink(dir, dentry);
540 if (error)
541 goto out_dput;
542
543 /* run the rename task, undo unlink if it fails */
544 task = nfs_async_rename(dir, dir, dentry, sdentry);
545 if (IS_ERR(task)) {
546 error = -EBUSY;
547 nfs_cancel_async_unlink(dentry);
548 goto out_dput;
549 }
550
551 /* wait for the RPC task to complete, unless a SIGKILL intervenes */
552 error = rpc_wait_for_completion_task(task);
553 if (error == 0)
554 error = task->tk_status;
555 rpc_put_task(task);
556out_dput:
557 dput(sdentry);
558out:
559 return error;
560}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9f81bdd91c55..4c14c17a5276 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -55,7 +55,6 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
55 if (p) { 55 if (p) {
56 memset(p, 0, sizeof(*p)); 56 memset(p, 0, sizeof(*p));
57 INIT_LIST_HEAD(&p->pages); 57 INIT_LIST_HEAD(&p->pages);
58 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
59 } 58 }
60 return p; 59 return p;
61} 60}
@@ -75,7 +74,6 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
75 memset(p, 0, sizeof(*p)); 74 memset(p, 0, sizeof(*p));
76 INIT_LIST_HEAD(&p->pages); 75 INIT_LIST_HEAD(&p->pages);
77 p->npages = pagecount; 76 p->npages = pagecount;
78 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
79 if (pagecount <= ARRAY_SIZE(p->page_array)) 77 if (pagecount <= ARRAY_SIZE(p->page_array))
80 p->pagevec = p->page_array; 78 p->pagevec = p->page_array;
81 else { 79 else {
@@ -292,9 +290,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
292 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 290 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
293 291
294 nfs_pageio_cond_complete(pgio, page->index); 292 nfs_pageio_cond_complete(pgio, page->index);
295 ret = nfs_page_async_flush(pgio, page, 293 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
296 wbc->sync_mode == WB_SYNC_NONE ||
297 wbc->nonblocking != 0);
298 if (ret == -EAGAIN) { 294 if (ret == -EAGAIN) {
299 redirty_page_for_writepage(wbc, page); 295 redirty_page_for_writepage(wbc, page);
300 ret = 0; 296 ret = 0;
@@ -700,7 +696,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
700 req = nfs_page_find_request(page); 696 req = nfs_page_find_request(page);
701 if (req == NULL) 697 if (req == NULL)
702 return 0; 698 return 0;
703 do_flush = req->wb_page != page || req->wb_context != ctx; 699 do_flush = req->wb_page != page || req->wb_context != ctx ||
700 req->wb_lock_context->lockowner != current->files ||
701 req->wb_lock_context->pid != current->tgid;
704 nfs_release_request(req); 702 nfs_release_request(req);
705 if (!do_flush) 703 if (!do_flush)
706 return 0; 704 return 0;
@@ -824,6 +822,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
824 data->args.pages = data->pagevec; 822 data->args.pages = data->pagevec;
825 data->args.count = count; 823 data->args.count = count;
826 data->args.context = get_nfs_open_context(req->wb_context); 824 data->args.context = get_nfs_open_context(req->wb_context);
825 data->args.lock_context = req->wb_lock_context;
827 data->args.stable = NFS_UNSTABLE; 826 data->args.stable = NFS_UNSTABLE;
828 if (how & FLUSH_STABLE) { 827 if (how & FLUSH_STABLE) {
829 data->args.stable = NFS_DATA_SYNC; 828 data->args.stable = NFS_DATA_SYNC;
@@ -1047,9 +1046,9 @@ out:
1047void nfs_write_prepare(struct rpc_task *task, void *calldata) 1046void nfs_write_prepare(struct rpc_task *task, void *calldata)
1048{ 1047{
1049 struct nfs_write_data *data = calldata; 1048 struct nfs_write_data *data = calldata;
1050 struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;
1051 1049
1052 if (nfs4_setup_sequence(clp, &data->args.seq_args, 1050 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
1051 &data->args.seq_args,
1053 &data->res.seq_res, 1, task)) 1052 &data->res.seq_res, 1, task))
1054 return; 1053 return;
1055 rpc_call_start(task); 1054 rpc_call_start(task);
@@ -1430,15 +1429,17 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1430 int flags = FLUSH_SYNC; 1429 int flags = FLUSH_SYNC;
1431 int ret = 0; 1430 int ret = 0;
1432 1431
1433 /* Don't commit yet if this is a non-blocking flush and there are 1432 if (wbc->sync_mode == WB_SYNC_NONE) {
1434 * lots of outstanding writes for this mapping. 1433 /* Don't commit yet if this is a non-blocking flush and there
1435 */ 1434 * are a lot of outstanding writes for this mapping.
1436 if (wbc->sync_mode == WB_SYNC_NONE && 1435 */
1437 nfsi->ncommit <= (nfsi->npages >> 1)) 1436 if (nfsi->ncommit <= (nfsi->npages >> 1))
1438 goto out_mark_dirty; 1437 goto out_mark_dirty;
1439 1438
1440 if (wbc->nonblocking || wbc->for_background) 1439 /* don't wait for the COMMIT response */
1441 flags = 0; 1440 flags = 0;
1441 }
1442
1442 ret = nfs_commit_inode(inode, flags); 1443 ret = nfs_commit_inode(inode, flags);
1443 if (ret >= 0) { 1444 if (ret >= 0) {
1444 if (wbc->sync_mode == WB_SYNC_NONE) { 1445 if (wbc->sync_mode == WB_SYNC_NONE) {