aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Kconfig19
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/callback_proc.c8
-rw-r--r--fs/nfs/client.c28
-rw-r--r--fs/nfs/delegation.c10
-rw-r--r--fs/nfs/dir.c1015
-rw-r--r--fs/nfs/dns_resolve.c6
-rw-r--r--fs/nfs/file.c86
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/idmap.c211
-rw-r--r--fs/nfs/inode.c39
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/mount_clnt.c4
-rw-r--r--fs/nfs/nfs2xdr.c107
-rw-r--r--fs/nfs/nfs3proc.c62
-rw-r--r--fs/nfs/nfs3xdr.c196
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4filelayout.c280
-rw-r--r--fs/nfs/nfs4filelayout.h94
-rw-r--r--fs/nfs/nfs4filelayoutdev.c448
-rw-r--r--fs/nfs/nfs4proc.c497
-rw-r--r--fs/nfs/nfs4state.c52
-rw-r--r--fs/nfs/nfs4xdr.c700
-rw-r--r--fs/nfs/nfsroot.c568
-rw-r--r--fs/nfs/pnfs.c783
-rw-r--r--fs/nfs/pnfs.h189
-rw-r--r--fs/nfs/proc.c35
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfs/super.c72
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/unlink.c259
-rw-r--r--fs/nfs/write.c22
33 files changed, 4383 insertions, 1440 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index f7e13db613cb..ba306658a6db 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -76,13 +76,17 @@ config NFS_V4
76 76
77config NFS_V4_1 77config NFS_V4_1
78 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" 78 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
79 depends on NFS_V4 && EXPERIMENTAL 79 depends on NFS_FS && NFS_V4 && EXPERIMENTAL
80 select PNFS_FILE_LAYOUT
80 help 81 help
81 This option enables support for minor version 1 of the NFSv4 protocol 82 This option enables support for minor version 1 of the NFSv4 protocol
82 (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. 83 (RFC 5661) in the kernel's NFS client.
83 84
84 If unsure, say N. 85 If unsure, say N.
85 86
87config PNFS_FILE_LAYOUT
88 tristate
89
86config ROOT_NFS 90config ROOT_NFS
87 bool "Root file system on NFS" 91 bool "Root file system on NFS"
88 depends on NFS_FS=y && IP_PNP 92 depends on NFS_FS=y && IP_PNP
@@ -117,3 +121,14 @@ config NFS_USE_KERNEL_DNS
117 select DNS_RESOLVER 121 select DNS_RESOLVER
118 select KEYS 122 select KEYS
119 default y 123 default y
124
125config NFS_USE_NEW_IDMAPPER
126 bool "Use the new idmapper upcall routine"
127 depends on NFS_V4 && KEYS
128 help
129 Say Y here if you want NFS to use the new idmapper upcall functions.
130 You will need /sbin/request-key (usually provided by the keyutils
131 package). For details, read
132 <file:Documentation/filesystems/nfs/idmapper.txt>.
133
134 If you are unsure, say N.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index da7fda639eac..4776ff9e3814 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
15 delegation.o idmap.o \ 15 delegation.o idmap.o \
16 callback.o callback_xdr.o callback_proc.o \ 16 callback.o callback_xdr.o callback_proc.o \
17 nfs4namespace.o 17 nfs4namespace.o
18nfs-$(CONFIG_NFS_V4_1) += pnfs.o
18nfs-$(CONFIG_SYSCTL) += sysctl.o 19nfs-$(CONFIG_SYSCTL) += sysctl.o
19nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o 20nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
21
22obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
23nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index e17b49e2eabd..aeec017fe814 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -109,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv)
109{ 109{
110 int ret; 110 int ret;
111 111
112 ret = svc_create_xprt(serv, "tcp", PF_INET, 112 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
114 if (ret <= 0) 114 if (ret <= 0)
115 goto out_err; 115 goto out_err;
@@ -117,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv)
117 dprintk("NFS: Callback listener port = %u (af %u)\n", 117 dprintk("NFS: Callback listener port = %u (af %u)\n",
118 nfs_callback_tcpport, PF_INET); 118 nfs_callback_tcpport, PF_INET);
119 119
120 ret = svc_create_xprt(serv, "tcp", PF_INET6, 120 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
122 if (ret > 0) { 122 if (ret > 0) {
123 nfs_callback_tcpport6 = ret; 123 nfs_callback_tcpport6 = ret;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 930d10fecdaf..2950fca0c61b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -118,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
118 if (delegation == NULL) 118 if (delegation == NULL)
119 return 0; 119 return 0;
120 120
121 /* seqid is 4-bytes long */ 121 if (stateid->stateid.seqid != 0)
122 if (((u32 *) &stateid->data)[0] != 0)
123 return 0; 122 return 0;
124 if (memcmp(&delegation->stateid.data[4], &stateid->data[4], 123 if (memcmp(&delegation->stateid.stateid.other,
125 sizeof(stateid->data)-4)) 124 &stateid->stateid.other,
125 NFS4_STATEID_OTHER_SIZE))
126 return 0; 126 return 0;
127 127
128 return 1; 128 return 1;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e7340729af89..0870d0d4efc0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -48,6 +48,7 @@
48#include "iostat.h" 48#include "iostat.h"
49#include "internal.h" 49#include "internal.h"
50#include "fscache.h" 50#include "fscache.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_CLIENT 53#define NFSDBG_FACILITY NFSDBG_CLIENT
53 54
@@ -155,7 +156,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
155 cred = rpc_lookup_machine_cred(); 156 cred = rpc_lookup_machine_cred();
156 if (!IS_ERR(cred)) 157 if (!IS_ERR(cred))
157 clp->cl_machine_cred = cred; 158 clp->cl_machine_cred = cred;
158 159#if defined(CONFIG_NFS_V4_1)
160 INIT_LIST_HEAD(&clp->cl_layouts);
161#endif
159 nfs_fscache_get_client_cookie(clp); 162 nfs_fscache_get_client_cookie(clp);
160 163
161 return clp; 164 return clp;
@@ -252,6 +255,7 @@ void nfs_put_client(struct nfs_client *clp)
252 nfs_free_client(clp); 255 nfs_free_client(clp);
253 } 256 }
254} 257}
258EXPORT_SYMBOL_GPL(nfs_put_client);
255 259
256#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 260#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
257/* 261/*
@@ -601,6 +605,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
601{ 605{
602 struct rpc_clnt *clnt = NULL; 606 struct rpc_clnt *clnt = NULL;
603 struct rpc_create_args args = { 607 struct rpc_create_args args = {
608 .net = &init_net,
604 .protocol = clp->cl_proto, 609 .protocol = clp->cl_proto,
605 .address = (struct sockaddr *)&clp->cl_addr, 610 .address = (struct sockaddr *)&clp->cl_addr,
606 .addrsize = clp->cl_addrlen, 611 .addrsize = clp->cl_addrlen,
@@ -635,7 +640,8 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
635 */ 640 */
636static void nfs_destroy_server(struct nfs_server *server) 641static void nfs_destroy_server(struct nfs_server *server)
637{ 642{
638 if (!(server->flags & NFS_MOUNT_NONLM)) 643 if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) ||
644 !(server->flags & NFS_MOUNT_LOCAL_FCNTL))
639 nlmclnt_done(server->nlm_host); 645 nlmclnt_done(server->nlm_host);
640} 646}
641 647
@@ -657,7 +663,8 @@ static int nfs_start_lockd(struct nfs_server *server)
657 663
658 if (nlm_init.nfs_version > 3) 664 if (nlm_init.nfs_version > 3)
659 return 0; 665 return 0;
660 if (server->flags & NFS_MOUNT_NONLM) 666 if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) &&
667 (server->flags & NFS_MOUNT_LOCAL_FCNTL))
661 return 0; 668 return 0;
662 669
663 switch (clp->cl_proto) { 670 switch (clp->cl_proto) {
@@ -898,11 +905,13 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
898 if (server->wsize > NFS_MAX_FILE_IO_SIZE) 905 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
899 server->wsize = NFS_MAX_FILE_IO_SIZE; 906 server->wsize = NFS_MAX_FILE_IO_SIZE;
900 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 907 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
908 set_pnfs_layoutdriver(server, fsinfo->layouttype);
909
901 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); 910 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
902 911
903 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); 912 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
904 if (server->dtsize > PAGE_CACHE_SIZE) 913 if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES)
905 server->dtsize = PAGE_CACHE_SIZE; 914 server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES;
906 if (server->dtsize > server->rsize) 915 if (server->dtsize > server->rsize)
907 server->dtsize = server->rsize; 916 server->dtsize = server->rsize;
908 917
@@ -913,6 +922,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
913 922
914 server->maxfilesize = fsinfo->maxfilesize; 923 server->maxfilesize = fsinfo->maxfilesize;
915 924
925 server->time_delta = fsinfo->time_delta;
926
916 /* We're airborne Set socket buffersize */ 927 /* We're airborne Set socket buffersize */
917 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); 928 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
918} 929}
@@ -935,6 +946,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
935 } 946 }
936 947
937 fsinfo.fattr = fattr; 948 fsinfo.fattr = fattr;
949 fsinfo.layouttype = 0;
938 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); 950 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
939 if (error < 0) 951 if (error < 0)
940 goto out_error; 952 goto out_error;
@@ -1017,6 +1029,7 @@ void nfs_free_server(struct nfs_server *server)
1017{ 1029{
1018 dprintk("--> nfs_free_server()\n"); 1030 dprintk("--> nfs_free_server()\n");
1019 1031
1032 unset_pnfs_layoutdriver(server);
1020 spin_lock(&nfs_client_lock); 1033 spin_lock(&nfs_client_lock);
1021 list_del(&server->client_link); 1034 list_del(&server->client_link);
1022 list_del(&server->master_link); 1035 list_del(&server->master_link);
@@ -1356,8 +1369,9 @@ static int nfs4_init_server(struct nfs_server *server,
1356 1369
1357 /* Initialise the client representation from the mount data */ 1370 /* Initialise the client representation from the mount data */
1358 server->flags = data->flags; 1371 server->flags = data->flags;
1359 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| 1372 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
1360 NFS_CAP_POSIX_LOCK; 1373 if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
1374 server->caps |= NFS_CAP_READDIRPLUS;
1361 server->options = data->options; 1375 server->options = data->options;
1362 1376
1363 /* Get a client record */ 1377 /* Get a client record */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index b9c3c43cea1d..232a7eead33a 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -71,20 +71,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
71 if (inode->i_flock == NULL) 71 if (inode->i_flock == NULL)
72 goto out; 72 goto out;
73 73
74 /* Protect inode->i_flock using the BKL */ 74 /* Protect inode->i_flock using the file locks lock */
75 lock_kernel(); 75 lock_flocks();
76 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 76 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
77 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 77 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
78 continue; 78 continue;
79 if (nfs_file_open_context(fl->fl_file) != ctx) 79 if (nfs_file_open_context(fl->fl_file) != ctx)
80 continue; 80 continue;
81 unlock_kernel(); 81 unlock_flocks();
82 status = nfs4_lock_delegation_recall(state, fl); 82 status = nfs4_lock_delegation_recall(state, fl);
83 if (status < 0) 83 if (status < 0)
84 goto out; 84 goto out;
85 lock_kernel(); 85 lock_flocks();
86 } 86 }
87 unlock_kernel(); 87 unlock_flocks();
88out: 88out:
89 return status; 89 return status;
90} 90}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e257172d438c..07ac3847e562 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,11 +33,12 @@
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/mount.h> 34#include <linux/mount.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/vmalloc.h>
36 37
37#include "nfs4_fs.h"
38#include "delegation.h" 38#include "delegation.h"
39#include "iostat.h" 39#include "iostat.h"
40#include "internal.h" 40#include "internal.h"
41#include "fscache.h"
41 42
42/* #define NFS_DEBUG_VERBOSE 1 */ 43/* #define NFS_DEBUG_VERBOSE 1 */
43 44
@@ -55,6 +56,7 @@ static int nfs_rename(struct inode *, struct dentry *,
55 struct inode *, struct dentry *); 56 struct inode *, struct dentry *);
56static int nfs_fsync_dir(struct file *, int); 57static int nfs_fsync_dir(struct file *, int);
57static loff_t nfs_llseek_dir(struct file *, loff_t, int); 58static loff_t nfs_llseek_dir(struct file *, loff_t, int);
59static int nfs_readdir_clear_array(struct page*, gfp_t);
58 60
59const struct file_operations nfs_dir_operations = { 61const struct file_operations nfs_dir_operations = {
60 .llseek = nfs_llseek_dir, 62 .llseek = nfs_llseek_dir,
@@ -80,6 +82,10 @@ const struct inode_operations nfs_dir_inode_operations = {
80 .setattr = nfs_setattr, 82 .setattr = nfs_setattr,
81}; 83};
82 84
85const struct address_space_operations nfs_dir_addr_space_ops = {
86 .releasepage = nfs_readdir_clear_array,
87};
88
83#ifdef CONFIG_NFS_V3 89#ifdef CONFIG_NFS_V3
84const struct inode_operations nfs3_dir_inode_operations = { 90const struct inode_operations nfs3_dir_inode_operations = {
85 .create = nfs_create, 91 .create = nfs_create,
@@ -104,8 +110,9 @@ const struct inode_operations nfs3_dir_inode_operations = {
104#ifdef CONFIG_NFS_V4 110#ifdef CONFIG_NFS_V4
105 111
106static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); 112static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
113static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
107const struct inode_operations nfs4_dir_inode_operations = { 114const struct inode_operations nfs4_dir_inode_operations = {
108 .create = nfs_create, 115 .create = nfs_open_create,
109 .lookup = nfs_atomic_lookup, 116 .lookup = nfs_atomic_lookup,
110 .link = nfs_link, 117 .link = nfs_link,
111 .unlink = nfs_unlink, 118 .unlink = nfs_unlink,
@@ -150,51 +157,197 @@ nfs_opendir(struct inode *inode, struct file *filp)
150 return res; 157 return res;
151} 158}
152 159
153typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int); 160struct nfs_cache_array_entry {
161 u64 cookie;
162 u64 ino;
163 struct qstr string;
164};
165
166struct nfs_cache_array {
167 unsigned int size;
168 int eof_index;
169 u64 last_cookie;
170 struct nfs_cache_array_entry array[0];
171};
172
173#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
174
175typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
154typedef struct { 176typedef struct {
155 struct file *file; 177 struct file *file;
156 struct page *page; 178 struct page *page;
157 unsigned long page_index; 179 unsigned long page_index;
158 __be32 *ptr;
159 u64 *dir_cookie; 180 u64 *dir_cookie;
160 loff_t current_index; 181 loff_t current_index;
161 struct nfs_entry *entry;
162 decode_dirent_t decode; 182 decode_dirent_t decode;
163 int plus; 183
164 unsigned long timestamp; 184 unsigned long timestamp;
165 unsigned long gencount; 185 unsigned long gencount;
166 int timestamp_valid; 186 unsigned int cache_entry_index;
187 unsigned int plus:1;
188 unsigned int eof:1;
167} nfs_readdir_descriptor_t; 189} nfs_readdir_descriptor_t;
168 190
169/* Now we cache directories properly, by stuffing the dirent 191/*
170 * data directly in the page cache. 192 * The caller is responsible for calling nfs_readdir_release_array(page)
171 *
172 * Inode invalidation due to refresh etc. takes care of
173 * _everything_, no sloppy entry flushing logic, no extraneous
174 * copying, network direct to page cache, the way it was meant
175 * to be.
176 *
177 * NOTE: Dirent information verification is done always by the
178 * page-in of the RPC reply, nowhere else, this simplies
179 * things substantially.
180 */ 193 */
181static 194static
182int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) 195struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
196{
197 if (page == NULL)
198 return ERR_PTR(-EIO);
199 return (struct nfs_cache_array *)kmap(page);
200}
201
202static
203void nfs_readdir_release_array(struct page *page)
204{
205 kunmap(page);
206}
207
208/*
209 * we are freeing strings created by nfs_add_to_readdir_array()
210 */
211static
212int nfs_readdir_clear_array(struct page *page, gfp_t mask)
213{
214 struct nfs_cache_array *array = nfs_readdir_get_array(page);
215 int i;
216 for (i = 0; i < array->size; i++)
217 kfree(array->array[i].string.name);
218 nfs_readdir_release_array(page);
219 return 0;
220}
221
222/*
223 * the caller is responsible for freeing qstr.name
224 * when called by nfs_readdir_add_to_array, the strings will be freed in
225 * nfs_clear_readdir_array()
226 */
227static
228int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
229{
230 string->len = len;
231 string->name = kmemdup(name, len, GFP_KERNEL);
232 if (string->name == NULL)
233 return -ENOMEM;
234 string->hash = full_name_hash(name, len);
235 return 0;
236}
237
238static
239int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
240{
241 struct nfs_cache_array *array = nfs_readdir_get_array(page);
242 struct nfs_cache_array_entry *cache_entry;
243 int ret;
244
245 if (IS_ERR(array))
246 return PTR_ERR(array);
247 ret = -EIO;
248 if (array->size >= MAX_READDIR_ARRAY)
249 goto out;
250
251 cache_entry = &array->array[array->size];
252 cache_entry->cookie = entry->prev_cookie;
253 cache_entry->ino = entry->ino;
254 ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
255 if (ret)
256 goto out;
257 array->last_cookie = entry->cookie;
258 if (entry->eof == 1)
259 array->eof_index = array->size;
260 array->size++;
261out:
262 nfs_readdir_release_array(page);
263 return ret;
264}
265
266static
267int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
268{
269 loff_t diff = desc->file->f_pos - desc->current_index;
270 unsigned int index;
271
272 if (diff < 0)
273 goto out_eof;
274 if (diff >= array->size) {
275 if (array->eof_index > 0)
276 goto out_eof;
277 desc->current_index += array->size;
278 return -EAGAIN;
279 }
280
281 index = (unsigned int)diff;
282 *desc->dir_cookie = array->array[index].cookie;
283 desc->cache_entry_index = index;
284 if (index == array->eof_index)
285 desc->eof = 1;
286 return 0;
287out_eof:
288 desc->eof = 1;
289 return -EBADCOOKIE;
290}
291
292static
293int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
294{
295 int i;
296 int status = -EAGAIN;
297
298 for (i = 0; i < array->size; i++) {
299 if (i == array->eof_index) {
300 desc->eof = 1;
301 status = -EBADCOOKIE;
302 }
303 if (array->array[i].cookie == *desc->dir_cookie) {
304 desc->cache_entry_index = i;
305 status = 0;
306 break;
307 }
308 }
309
310 return status;
311}
312
313static
314int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
315{
316 struct nfs_cache_array *array;
317 int status = -EBADCOOKIE;
318
319 if (desc->dir_cookie == NULL)
320 goto out;
321
322 array = nfs_readdir_get_array(desc->page);
323 if (IS_ERR(array)) {
324 status = PTR_ERR(array);
325 goto out;
326 }
327
328 if (*desc->dir_cookie == 0)
329 status = nfs_readdir_search_for_pos(array, desc);
330 else
331 status = nfs_readdir_search_for_cookie(array, desc);
332
333 nfs_readdir_release_array(desc->page);
334out:
335 return status;
336}
337
338/* Fill a page with xdr information before transferring to the cache page */
339static
340int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
341 struct nfs_entry *entry, struct file *file, struct inode *inode)
183{ 342{
184 struct file *file = desc->file;
185 struct inode *inode = file->f_path.dentry->d_inode;
186 struct rpc_cred *cred = nfs_file_cred(file); 343 struct rpc_cred *cred = nfs_file_cred(file);
187 unsigned long timestamp, gencount; 344 unsigned long timestamp, gencount;
188 int error; 345 int error;
189 346
190 dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
191 __func__, (long long)desc->entry->cookie,
192 page->index);
193
194 again: 347 again:
195 timestamp = jiffies; 348 timestamp = jiffies;
196 gencount = nfs_inc_attr_generation_counter(); 349 gencount = nfs_inc_attr_generation_counter();
197 error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, 350 error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages,
198 NFS_SERVER(inode)->dtsize, desc->plus); 351 NFS_SERVER(inode)->dtsize, desc->plus);
199 if (error < 0) { 352 if (error < 0) {
200 /* We requested READDIRPLUS, but the server doesn't grok it */ 353 /* We requested READDIRPLUS, but the server doesn't grok it */
@@ -208,190 +361,292 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
208 } 361 }
209 desc->timestamp = timestamp; 362 desc->timestamp = timestamp;
210 desc->gencount = gencount; 363 desc->gencount = gencount;
211 desc->timestamp_valid = 1; 364error:
212 SetPageUptodate(page); 365 return error;
213 /* Ensure consistent page alignment of the data.
214 * Note: assumes we have exclusive access to this mapping either
215 * through inode->i_mutex or some other mechanism.
216 */
217 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
218 /* Should never happen */
219 nfs_zap_mapping(inode, inode->i_mapping);
220 }
221 unlock_page(page);
222 return 0;
223 error:
224 unlock_page(page);
225 return -EIO;
226} 366}
227 367
228static inline 368/* Fill in an entry based on the xdr code stored in desc->page */
229int dir_decode(nfs_readdir_descriptor_t *desc) 369static
370int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
230{ 371{
231 __be32 *p = desc->ptr; 372 __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
232 p = desc->decode(p, desc->entry, desc->plus);
233 if (IS_ERR(p)) 373 if (IS_ERR(p))
234 return PTR_ERR(p); 374 return PTR_ERR(p);
235 desc->ptr = p; 375
236 if (desc->timestamp_valid) { 376 entry->fattr->time_start = desc->timestamp;
237 desc->entry->fattr->time_start = desc->timestamp; 377 entry->fattr->gencount = desc->gencount;
238 desc->entry->fattr->gencount = desc->gencount;
239 } else
240 desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
241 return 0; 378 return 0;
242} 379}
243 380
244static inline 381static
245void dir_page_release(nfs_readdir_descriptor_t *desc) 382int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
246{ 383{
247 kunmap(desc->page); 384 struct nfs_inode *node;
248 page_cache_release(desc->page); 385 if (dentry->d_inode == NULL)
249 desc->page = NULL; 386 goto different;
250 desc->ptr = NULL; 387 node = NFS_I(dentry->d_inode);
388 if (node->fh.size != entry->fh->size)
389 goto different;
390 if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
391 goto different;
392 return 1;
393different:
394 return 0;
251} 395}
252 396
253/* 397static
254 * Given a pointer to a buffer that has already been filled by a call 398void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
255 * to readdir, find the next entry with cookie '*desc->dir_cookie'.
256 *
257 * If the end of the buffer has been reached, return -EAGAIN, if not,
258 * return the offset within the buffer of the next entry to be
259 * read.
260 */
261static inline
262int find_dirent(nfs_readdir_descriptor_t *desc)
263{ 399{
264 struct nfs_entry *entry = desc->entry; 400 struct qstr filename = {
265 int loop_count = 0, 401 .len = entry->len,
266 status; 402 .name = entry->name,
403 };
404 struct dentry *dentry;
405 struct dentry *alias;
406 struct inode *dir = parent->d_inode;
407 struct inode *inode;
267 408
268 while((status = dir_decode(desc)) == 0) { 409 if (filename.name[0] == '.') {
269 dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", 410 if (filename.len == 1)
270 __func__, (unsigned long long)entry->cookie); 411 return;
271 if (entry->prev_cookie == *desc->dir_cookie) 412 if (filename.len == 2 && filename.name[1] == '.')
272 break; 413 return;
273 if (loop_count++ > 200) { 414 }
274 loop_count = 0; 415 filename.hash = full_name_hash(filename.name, filename.len);
275 schedule(); 416
417 dentry = d_lookup(parent, &filename);
418 if (dentry != NULL) {
419 if (nfs_same_file(dentry, entry)) {
420 nfs_refresh_inode(dentry->d_inode, entry->fattr);
421 goto out;
422 } else {
423 d_drop(dentry);
424 dput(dentry);
276 } 425 }
277 } 426 }
278 return status; 427
428 dentry = d_alloc(parent, &filename);
429 if (dentry == NULL)
430 return;
431
432 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
433 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
434 if (IS_ERR(inode))
435 goto out;
436
437 alias = d_materialise_unique(dentry, inode);
438 if (IS_ERR(alias))
439 goto out;
440 else if (alias) {
441 nfs_set_verifier(alias, nfs_save_change_attribute(dir));
442 dput(alias);
443 } else
444 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
445
446out:
447 dput(dentry);
448}
449
450/* Perform conversion from xdr to cache array */
451static
452void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
453 void *xdr_page, struct page *page, unsigned int buflen)
454{
455 struct xdr_stream stream;
456 struct xdr_buf buf;
457 __be32 *ptr = xdr_page;
458 int status;
459 struct nfs_cache_array *array;
460
461 buf.head->iov_base = xdr_page;
462 buf.head->iov_len = buflen;
463 buf.tail->iov_len = 0;
464 buf.page_base = 0;
465 buf.page_len = 0;
466 buf.buflen = buf.head->iov_len;
467 buf.len = buf.head->iov_len;
468
469 xdr_init_decode(&stream, &buf, ptr);
470
471
472 do {
473 status = xdr_decode(desc, entry, &stream);
474 if (status != 0)
475 break;
476
477 if (nfs_readdir_add_to_array(entry, page) == -1)
478 break;
479 if (desc->plus == 1)
480 nfs_prime_dcache(desc->file->f_path.dentry, entry);
481 } while (!entry->eof);
482
483 if (status == -EBADCOOKIE && entry->eof) {
484 array = nfs_readdir_get_array(page);
485 array->eof_index = array->size - 1;
486 status = 0;
487 nfs_readdir_release_array(page);
488 }
489}
490
491static
492void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages)
493{
494 unsigned int i;
495 for (i = 0; i < npages; i++)
496 put_page(pages[i]);
497}
498
499static
500void nfs_readdir_free_large_page(void *ptr, struct page **pages,
501 unsigned int npages)
502{
503 vm_unmap_ram(ptr, npages);
504 nfs_readdir_free_pagearray(pages, npages);
279} 505}
280 506
281/* 507/*
282 * Given a pointer to a buffer that has already been filled by a call 508 * nfs_readdir_large_page will allocate pages that must be freed with a call
283 * to readdir, find the entry at offset 'desc->file->f_pos'. 509 * to nfs_readdir_free_large_page
284 *
285 * If the end of the buffer has been reached, return -EAGAIN, if not,
286 * return the offset within the buffer of the next entry to be
287 * read.
288 */ 510 */
289static inline 511static
290int find_dirent_index(nfs_readdir_descriptor_t *desc) 512void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
291{ 513{
292 struct nfs_entry *entry = desc->entry; 514 void *ptr;
293 int loop_count = 0, 515 unsigned int i;
294 status; 516
517 for (i = 0; i < npages; i++) {
518 struct page *page = alloc_page(GFP_KERNEL);
519 if (page == NULL)
520 goto out_freepages;
521 pages[i] = page;
522 }
295 523
296 for(;;) { 524 ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
297 status = dir_decode(desc); 525 if (!IS_ERR_OR_NULL(ptr))
298 if (status) 526 return ptr;
299 break; 527out_freepages:
528 nfs_readdir_free_pagearray(pages, i);
529 return NULL;
530}
531
532static
533int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
534{
535 struct page *pages[NFS_MAX_READDIR_PAGES];
536 void *pages_ptr = NULL;
537 struct nfs_entry entry;
538 struct file *file = desc->file;
539 struct nfs_cache_array *array;
540 int status = 0;
541 unsigned int array_size = ARRAY_SIZE(pages);
542
543 entry.prev_cookie = 0;
544 entry.cookie = *desc->dir_cookie;
545 entry.eof = 0;
546 entry.fh = nfs_alloc_fhandle();
547 entry.fattr = nfs_alloc_fattr();
548 if (entry.fh == NULL || entry.fattr == NULL)
549 goto out;
300 550
301 dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", 551 array = nfs_readdir_get_array(page);
302 (unsigned long long)entry->cookie, desc->current_index); 552 memset(array, 0, sizeof(struct nfs_cache_array));
553 array->eof_index = -1;
303 554
304 if (desc->file->f_pos == desc->current_index) { 555 pages_ptr = nfs_readdir_large_page(pages, array_size);
305 *desc->dir_cookie = entry->cookie; 556 if (!pages_ptr)
557 goto out_release_array;
558 do {
559 status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
560
561 if (status < 0)
306 break; 562 break;
307 } 563 nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
308 desc->current_index++; 564 } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
309 if (loop_count++ > 200) { 565
310 loop_count = 0; 566 nfs_readdir_free_large_page(pages_ptr, pages, array_size);
311 schedule(); 567out_release_array:
312 } 568 nfs_readdir_release_array(page);
313 } 569out:
570 nfs_free_fattr(entry.fattr);
571 nfs_free_fhandle(entry.fh);
314 return status; 572 return status;
315} 573}
316 574
317/* 575/*
318 * Find the given page, and call find_dirent() or find_dirent_index in 576 * Now we cache directories properly, by converting xdr information
319 * order to try to return the next entry. 577 * to an array that can be used for lookups later. This results in
578 * fewer cache pages, since we can store more information on each page.
579 * We only need to convert from xdr once so future lookups are much simpler
320 */ 580 */
321static inline 581static
322int find_dirent_page(nfs_readdir_descriptor_t *desc) 582int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
323{ 583{
324 struct inode *inode = desc->file->f_path.dentry->d_inode; 584 struct inode *inode = desc->file->f_path.dentry->d_inode;
325 struct page *page;
326 int status;
327 585
328 dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", 586 if (nfs_readdir_xdr_to_array(desc, page, inode) < 0)
329 __func__, desc->page_index, 587 goto error;
330 (long long) *desc->dir_cookie); 588 SetPageUptodate(page);
331 589
332 /* If we find the page in the page_cache, we cannot be sure 590 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
333 * how fresh the data is, so we will ignore readdir_plus attributes. 591 /* Should never happen */
334 */ 592 nfs_zap_mapping(inode, inode->i_mapping);
335 desc->timestamp_valid = 0;
336 page = read_cache_page(inode->i_mapping, desc->page_index,
337 (filler_t *)nfs_readdir_filler, desc);
338 if (IS_ERR(page)) {
339 status = PTR_ERR(page);
340 goto out;
341 } 593 }
594 unlock_page(page);
595 return 0;
596 error:
597 unlock_page(page);
598 return -EIO;
599}
342 600
343 /* NOTE: Someone else may have changed the READDIRPLUS flag */ 601static
344 desc->page = page; 602void cache_page_release(nfs_readdir_descriptor_t *desc)
345 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ 603{
346 if (*desc->dir_cookie != 0) 604 page_cache_release(desc->page);
347 status = find_dirent(desc); 605 desc->page = NULL;
348 else 606}
349 status = find_dirent_index(desc); 607
350 if (status < 0) 608static
351 dir_page_release(desc); 609struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
352 out: 610{
353 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); 611 struct page *page;
354 return status; 612 page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
613 desc->page_index, (filler_t *)nfs_readdir_filler, desc);
614 if (IS_ERR(page))
615 desc->eof = 1;
616 return page;
355} 617}
356 618
357/* 619/*
358 * Recurse through the page cache pages, and return a 620 * Returns 0 if desc->dir_cookie was found on page desc->page_index
359 * filled nfs_entry structure of the next directory entry if possible.
360 *
361 * The target for the search is '*desc->dir_cookie' if non-0,
362 * 'desc->file->f_pos' otherwise
363 */ 621 */
622static
623int find_cache_page(nfs_readdir_descriptor_t *desc)
624{
625 int res;
626
627 desc->page = get_cache_page(desc);
628 if (IS_ERR(desc->page))
629 return PTR_ERR(desc->page);
630
631 res = nfs_readdir_search_array(desc);
632 if (res == 0)
633 return 0;
634 cache_page_release(desc);
635 return res;
636}
637
638/* Search for desc->dir_cookie from the beginning of the page cache */
364static inline 639static inline
365int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) 640int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
366{ 641{
367 int loop_count = 0; 642 int res = -EAGAIN;
368 int res;
369
370 /* Always search-by-index from the beginning of the cache */
371 if (*desc->dir_cookie == 0) {
372 dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
373 (long long)desc->file->f_pos);
374 desc->page_index = 0;
375 desc->entry->cookie = desc->entry->prev_cookie = 0;
376 desc->entry->eof = 0;
377 desc->current_index = 0;
378 } else
379 dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
380 (unsigned long long)*desc->dir_cookie);
381 643
382 for (;;) { 644 while (1) {
383 res = find_dirent_page(desc); 645 res = find_cache_page(desc);
384 if (res != -EAGAIN) 646 if (res != -EAGAIN)
385 break; 647 break;
386 /* Align to beginning of next page */ 648 desc->page_index++;
387 desc->page_index ++;
388 if (loop_count++ > 200) {
389 loop_count = 0;
390 schedule();
391 }
392 } 649 }
393
394 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res);
395 return res; 650 return res;
396} 651}
397 652
@@ -400,8 +655,6 @@ static inline unsigned int dt_type(struct inode *inode)
400 return (inode->i_mode >> 12) & 15; 655 return (inode->i_mode >> 12) & 15;
401} 656}
402 657
403static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
404
405/* 658/*
406 * Once we've found the start of the dirent within a page: fill 'er up... 659 * Once we've found the start of the dirent within a page: fill 'er up...
407 */ 660 */
@@ -410,49 +663,36 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
410 filldir_t filldir) 663 filldir_t filldir)
411{ 664{
412 struct file *file = desc->file; 665 struct file *file = desc->file;
413 struct nfs_entry *entry = desc->entry; 666 int i = 0;
414 struct dentry *dentry = NULL; 667 int res = 0;
415 u64 fileid; 668 struct nfs_cache_array *array = NULL;
416 int loop_count = 0, 669 unsigned int d_type = DT_UNKNOWN;
417 res; 670 struct dentry *dentry = NULL;
418
419 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
420 (unsigned long long)entry->cookie);
421
422 for(;;) {
423 unsigned d_type = DT_UNKNOWN;
424 /* Note: entry->prev_cookie contains the cookie for
425 * retrieving the current dirent on the server */
426 fileid = entry->ino;
427
428 /* Get a dentry if we have one */
429 if (dentry != NULL)
430 dput(dentry);
431 dentry = nfs_readdir_lookup(desc);
432 671
433 /* Use readdirplus info */ 672 array = nfs_readdir_get_array(desc->page);
434 if (dentry != NULL && dentry->d_inode != NULL) {
435 d_type = dt_type(dentry->d_inode);
436 fileid = NFS_FILEID(dentry->d_inode);
437 }
438 673
439 res = filldir(dirent, entry->name, entry->len, 674 for (i = desc->cache_entry_index; i < array->size; i++) {
440 file->f_pos, nfs_compat_user_ino64(fileid), 675 d_type = DT_UNKNOWN;
441 d_type); 676
677 res = filldir(dirent, array->array[i].string.name,
678 array->array[i].string.len, file->f_pos,
679 nfs_compat_user_ino64(array->array[i].ino), d_type);
442 if (res < 0) 680 if (res < 0)
443 break; 681 break;
444 file->f_pos++; 682 file->f_pos++;
445 *desc->dir_cookie = entry->cookie; 683 desc->cache_entry_index = i;
446 if (dir_decode(desc) != 0) { 684 if (i < (array->size-1))
447 desc->page_index ++; 685 *desc->dir_cookie = array->array[i+1].cookie;
686 else
687 *desc->dir_cookie = array->last_cookie;
688 if (i == array->eof_index) {
689 desc->eof = 1;
448 break; 690 break;
449 } 691 }
450 if (loop_count++ > 200) {
451 loop_count = 0;
452 schedule();
453 }
454 } 692 }
455 dir_page_release(desc); 693
694 nfs_readdir_release_array(desc->page);
695 cache_page_release(desc);
456 if (dentry != NULL) 696 if (dentry != NULL)
457 dput(dentry); 697 dput(dentry);
458 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", 698 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
@@ -476,12 +716,9 @@ static inline
476int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, 716int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
477 filldir_t filldir) 717 filldir_t filldir)
478{ 718{
479 struct file *file = desc->file;
480 struct inode *inode = file->f_path.dentry->d_inode;
481 struct rpc_cred *cred = nfs_file_cred(file);
482 struct page *page = NULL; 719 struct page *page = NULL;
483 int status; 720 int status;
484 unsigned long timestamp, gencount; 721 struct inode *inode = desc->file->f_path.dentry->d_inode;
485 722
486 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", 723 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
487 (unsigned long long)*desc->dir_cookie); 724 (unsigned long long)*desc->dir_cookie);
@@ -491,38 +728,22 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
491 status = -ENOMEM; 728 status = -ENOMEM;
492 goto out; 729 goto out;
493 } 730 }
494 timestamp = jiffies; 731
495 gencount = nfs_inc_attr_generation_counter(); 732 if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
496 status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred,
497 *desc->dir_cookie, page,
498 NFS_SERVER(inode)->dtsize,
499 desc->plus);
500 desc->page = page;
501 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
502 if (status >= 0) {
503 desc->timestamp = timestamp;
504 desc->gencount = gencount;
505 desc->timestamp_valid = 1;
506 if ((status = dir_decode(desc)) == 0)
507 desc->entry->prev_cookie = *desc->dir_cookie;
508 } else
509 status = -EIO; 733 status = -EIO;
510 if (status < 0)
511 goto out_release; 734 goto out_release;
735 }
512 736
737 desc->page_index = 0;
738 desc->page = page;
513 status = nfs_do_filldir(desc, dirent, filldir); 739 status = nfs_do_filldir(desc, dirent, filldir);
514 740
515 /* Reset read descriptor so it searches the page cache from
516 * the start upon the next call to readdir_search_pagecache() */
517 desc->page_index = 0;
518 desc->entry->cookie = desc->entry->prev_cookie = 0;
519 desc->entry->eof = 0;
520 out: 741 out:
521 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", 742 dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
522 __func__, status); 743 __func__, status);
523 return status; 744 return status;
524 out_release: 745 out_release:
525 dir_page_release(desc); 746 cache_page_release(desc);
526 goto out; 747 goto out;
527} 748}
528 749
@@ -536,7 +757,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
536 struct inode *inode = dentry->d_inode; 757 struct inode *inode = dentry->d_inode;
537 nfs_readdir_descriptor_t my_desc, 758 nfs_readdir_descriptor_t my_desc,
538 *desc = &my_desc; 759 *desc = &my_desc;
539 struct nfs_entry my_entry;
540 int res = -ENOMEM; 760 int res = -ENOMEM;
541 761
542 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 762 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -557,26 +777,17 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
557 desc->decode = NFS_PROTO(inode)->decode_dirent; 777 desc->decode = NFS_PROTO(inode)->decode_dirent;
558 desc->plus = NFS_USE_READDIRPLUS(inode); 778 desc->plus = NFS_USE_READDIRPLUS(inode);
559 779
560 my_entry.cookie = my_entry.prev_cookie = 0;
561 my_entry.eof = 0;
562 my_entry.fh = nfs_alloc_fhandle();
563 my_entry.fattr = nfs_alloc_fattr();
564 if (my_entry.fh == NULL || my_entry.fattr == NULL)
565 goto out_alloc_failed;
566
567 desc->entry = &my_entry;
568
569 nfs_block_sillyrename(dentry); 780 nfs_block_sillyrename(dentry);
570 res = nfs_revalidate_mapping(inode, filp->f_mapping); 781 res = nfs_revalidate_mapping(inode, filp->f_mapping);
571 if (res < 0) 782 if (res < 0)
572 goto out; 783 goto out;
573 784
574 while(!desc->entry->eof) { 785 while (desc->eof != 1) {
575 res = readdir_search_pagecache(desc); 786 res = readdir_search_pagecache(desc);
576 787
577 if (res == -EBADCOOKIE) { 788 if (res == -EBADCOOKIE) {
578 /* This means either end of directory */ 789 /* This means either end of directory */
579 if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { 790 if (*desc->dir_cookie && desc->eof == 0) {
580 /* Or that the server has 'lost' a cookie */ 791 /* Or that the server has 'lost' a cookie */
581 res = uncached_readdir(desc, dirent, filldir); 792 res = uncached_readdir(desc, dirent, filldir);
582 if (res >= 0) 793 if (res >= 0)
@@ -588,8 +799,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
588 if (res == -ETOOSMALL && desc->plus) { 799 if (res == -ETOOSMALL && desc->plus) {
589 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 800 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
590 nfs_zap_caches(inode); 801 nfs_zap_caches(inode);
802 desc->page_index = 0;
591 desc->plus = 0; 803 desc->plus = 0;
592 desc->entry->eof = 0; 804 desc->eof = 0;
593 continue; 805 continue;
594 } 806 }
595 if (res < 0) 807 if (res < 0)
@@ -605,9 +817,6 @@ out:
605 nfs_unblock_sillyrename(dentry); 817 nfs_unblock_sillyrename(dentry);
606 if (res > 0) 818 if (res > 0)
607 res = 0; 819 res = 0;
608out_alloc_failed:
609 nfs_free_fattr(my_entry.fattr);
610 nfs_free_fhandle(my_entry.fh);
611 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", 820 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
612 dentry->d_parent->d_name.name, dentry->d_name.name, 821 dentry->d_parent->d_name.name, dentry->d_name.name,
613 res); 822 res);
@@ -1029,10 +1238,63 @@ static int is_atomic_open(struct nameidata *nd)
1029 return 1; 1238 return 1;
1030} 1239}
1031 1240
1241static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd)
1242{
1243 struct path path = {
1244 .mnt = nd->path.mnt,
1245 .dentry = dentry,
1246 };
1247 struct nfs_open_context *ctx;
1248 struct rpc_cred *cred;
1249 fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
1250
1251 cred = rpc_lookup_cred();
1252 if (IS_ERR(cred))
1253 return ERR_CAST(cred);
1254 ctx = alloc_nfs_open_context(&path, cred, fmode);
1255 put_rpccred(cred);
1256 if (ctx == NULL)
1257 return ERR_PTR(-ENOMEM);
1258 return ctx;
1259}
1260
1261static int do_open(struct inode *inode, struct file *filp)
1262{
1263 nfs_fscache_set_inode_cookie(inode, filp);
1264 return 0;
1265}
1266
1267static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
1268{
1269 struct file *filp;
1270 int ret = 0;
1271
1272 /* If the open_intent is for execute, we have an extra check to make */
1273 if (ctx->mode & FMODE_EXEC) {
1274 ret = nfs_may_open(ctx->path.dentry->d_inode,
1275 ctx->cred,
1276 nd->intent.open.flags);
1277 if (ret < 0)
1278 goto out;
1279 }
1280 filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open);
1281 if (IS_ERR(filp))
1282 ret = PTR_ERR(filp);
1283 else
1284 nfs_file_set_open_context(filp, ctx);
1285out:
1286 put_nfs_open_context(ctx);
1287 return ret;
1288}
1289
1032static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 1290static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1033{ 1291{
1292 struct nfs_open_context *ctx;
1293 struct iattr attr;
1034 struct dentry *res = NULL; 1294 struct dentry *res = NULL;
1035 int error; 1295 struct inode *inode;
1296 int open_flags;
1297 int err;
1036 1298
1037 dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", 1299 dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
1038 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1300 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1054,13 +1316,32 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1054 goto out; 1316 goto out;
1055 } 1317 }
1056 1318
1319 ctx = nameidata_to_nfs_open_context(dentry, nd);
1320 res = ERR_CAST(ctx);
1321 if (IS_ERR(ctx))
1322 goto out;
1323
1324 open_flags = nd->intent.open.flags;
1325 if (nd->flags & LOOKUP_CREATE) {
1326 attr.ia_mode = nd->intent.open.create_mode;
1327 attr.ia_valid = ATTR_MODE;
1328 if (!IS_POSIXACL(dir))
1329 attr.ia_mode &= ~current_umask();
1330 } else {
1331 open_flags &= ~(O_EXCL | O_CREAT);
1332 attr.ia_valid = 0;
1333 }
1334
1057 /* Open the file on the server */ 1335 /* Open the file on the server */
1058 res = nfs4_atomic_open(dir, dentry, nd); 1336 nfs_block_sillyrename(dentry->d_parent);
1059 if (IS_ERR(res)) { 1337 inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
1060 error = PTR_ERR(res); 1338 if (IS_ERR(inode)) {
1061 switch (error) { 1339 nfs_unblock_sillyrename(dentry->d_parent);
1340 put_nfs_open_context(ctx);
1341 switch (PTR_ERR(inode)) {
1062 /* Make a negative dentry */ 1342 /* Make a negative dentry */
1063 case -ENOENT: 1343 case -ENOENT:
1344 d_add(dentry, NULL);
1064 res = NULL; 1345 res = NULL;
1065 goto out; 1346 goto out;
1066 /* This turned out not to be a regular file */ 1347 /* This turned out not to be a regular file */
@@ -1072,11 +1353,25 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1072 goto no_open; 1353 goto no_open;
1073 /* case -EINVAL: */ 1354 /* case -EINVAL: */
1074 default: 1355 default:
1356 res = ERR_CAST(inode);
1075 goto out; 1357 goto out;
1076 } 1358 }
1077 } else if (res != NULL) 1359 }
1360 res = d_add_unique(dentry, inode);
1361 nfs_unblock_sillyrename(dentry->d_parent);
1362 if (res != NULL) {
1363 dput(ctx->path.dentry);
1364 ctx->path.dentry = dget(res);
1078 dentry = res; 1365 dentry = res;
1366 }
1367 err = nfs_intent_set_file(nd, ctx);
1368 if (err < 0) {
1369 if (res != NULL)
1370 dput(res);
1371 return ERR_PTR(err);
1372 }
1079out: 1373out:
1374 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1080 return res; 1375 return res;
1081no_open: 1376no_open:
1082 return nfs_lookup(dir, dentry, nd); 1377 return nfs_lookup(dir, dentry, nd);
@@ -1087,12 +1382,15 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1087 struct dentry *parent = NULL; 1382 struct dentry *parent = NULL;
1088 struct inode *inode = dentry->d_inode; 1383 struct inode *inode = dentry->d_inode;
1089 struct inode *dir; 1384 struct inode *dir;
1385 struct nfs_open_context *ctx;
1090 int openflags, ret = 0; 1386 int openflags, ret = 0;
1091 1387
1092 if (!is_atomic_open(nd) || d_mountpoint(dentry)) 1388 if (!is_atomic_open(nd) || d_mountpoint(dentry))
1093 goto no_open; 1389 goto no_open;
1390
1094 parent = dget_parent(dentry); 1391 parent = dget_parent(dentry);
1095 dir = parent->d_inode; 1392 dir = parent->d_inode;
1393
1096 /* We can't create new files in nfs_open_revalidate(), so we 1394 /* We can't create new files in nfs_open_revalidate(), so we
1097 * optimize away revalidation of negative dentries. 1395 * optimize away revalidation of negative dentries.
1098 */ 1396 */
@@ -1112,99 +1410,96 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1112 /* We can't create new files, or truncate existing ones here */ 1410 /* We can't create new files, or truncate existing ones here */
1113 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); 1411 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
1114 1412
1413 ctx = nameidata_to_nfs_open_context(dentry, nd);
1414 ret = PTR_ERR(ctx);
1415 if (IS_ERR(ctx))
1416 goto out;
1115 /* 1417 /*
1116 * Note: we're not holding inode->i_mutex and so may be racing with 1418 * Note: we're not holding inode->i_mutex and so may be racing with
1117 * operations that change the directory. We therefore save the 1419 * operations that change the directory. We therefore save the
1118 * change attribute *before* we do the RPC call. 1420 * change attribute *before* we do the RPC call.
1119 */ 1421 */
1120 ret = nfs4_open_revalidate(dir, dentry, openflags, nd); 1422 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL);
1423 if (IS_ERR(inode)) {
1424 ret = PTR_ERR(inode);
1425 switch (ret) {
1426 case -EPERM:
1427 case -EACCES:
1428 case -EDQUOT:
1429 case -ENOSPC:
1430 case -EROFS:
1431 goto out_put_ctx;
1432 default:
1433 goto out_drop;
1434 }
1435 }
1436 iput(inode);
1437 if (inode != dentry->d_inode)
1438 goto out_drop;
1439
1440 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1441 ret = nfs_intent_set_file(nd, ctx);
1442 if (ret >= 0)
1443 ret = 1;
1121out: 1444out:
1122 dput(parent); 1445 dput(parent);
1123 if (!ret)
1124 d_drop(dentry);
1125 return ret; 1446 return ret;
1447out_drop:
1448 d_drop(dentry);
1449 ret = 0;
1450out_put_ctx:
1451 put_nfs_open_context(ctx);
1452 goto out;
1453
1126no_open_dput: 1454no_open_dput:
1127 dput(parent); 1455 dput(parent);
1128no_open: 1456no_open:
1129 return nfs_lookup_revalidate(dentry, nd); 1457 return nfs_lookup_revalidate(dentry, nd);
1130} 1458}
1131#endif /* CONFIG_NFSV4 */
1132 1459
1133static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) 1460static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
1461 struct nameidata *nd)
1134{ 1462{
1135 struct dentry *parent = desc->file->f_path.dentry; 1463 struct nfs_open_context *ctx = NULL;
1136 struct inode *dir = parent->d_inode; 1464 struct iattr attr;
1137 struct nfs_entry *entry = desc->entry; 1465 int error;
1138 struct dentry *dentry, *alias; 1466 int open_flags = 0;
1139 struct qstr name = {
1140 .name = entry->name,
1141 .len = entry->len,
1142 };
1143 struct inode *inode;
1144 unsigned long verf = nfs_save_change_attribute(dir);
1145 1467
1146 switch (name.len) { 1468 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1147 case 2: 1469 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1148 if (name.name[0] == '.' && name.name[1] == '.')
1149 return dget_parent(parent);
1150 break;
1151 case 1:
1152 if (name.name[0] == '.')
1153 return dget(parent);
1154 }
1155 1470
1156 spin_lock(&dir->i_lock); 1471 attr.ia_mode = mode;
1157 if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { 1472 attr.ia_valid = ATTR_MODE;
1158 spin_unlock(&dir->i_lock);
1159 return NULL;
1160 }
1161 spin_unlock(&dir->i_lock);
1162 1473
1163 name.hash = full_name_hash(name.name, name.len); 1474 if ((nd->flags & LOOKUP_CREATE) != 0) {
1164 dentry = d_lookup(parent, &name); 1475 open_flags = nd->intent.open.flags;
1165 if (dentry != NULL) {
1166 /* Is this a positive dentry that matches the readdir info? */
1167 if (dentry->d_inode != NULL &&
1168 (NFS_FILEID(dentry->d_inode) == entry->ino ||
1169 d_mountpoint(dentry))) {
1170 if (!desc->plus || entry->fh->size == 0)
1171 return dentry;
1172 if (nfs_compare_fh(NFS_FH(dentry->d_inode),
1173 entry->fh) == 0)
1174 goto out_renew;
1175 }
1176 /* No, so d_drop to allow one to be created */
1177 d_drop(dentry);
1178 dput(dentry);
1179 }
1180 if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
1181 return NULL;
1182 if (name.len > NFS_SERVER(dir)->namelen)
1183 return NULL;
1184 /* Note: caller is already holding the dir->i_mutex! */
1185 dentry = d_alloc(parent, &name);
1186 if (dentry == NULL)
1187 return NULL;
1188 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
1189 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
1190 if (IS_ERR(inode)) {
1191 dput(dentry);
1192 return NULL;
1193 }
1194 1476
1195 alias = d_materialise_unique(dentry, inode); 1477 ctx = nameidata_to_nfs_open_context(dentry, nd);
1196 if (alias != NULL) { 1478 error = PTR_ERR(ctx);
1197 dput(dentry); 1479 if (IS_ERR(ctx))
1198 if (IS_ERR(alias)) 1480 goto out_err_drop;
1199 return NULL;
1200 dentry = alias;
1201 } 1481 }
1202 1482
1203out_renew: 1483 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
1204 nfs_set_verifier(dentry, verf); 1484 if (error != 0)
1205 return dentry; 1485 goto out_put_ctx;
1486 if (ctx != NULL) {
1487 error = nfs_intent_set_file(nd, ctx);
1488 if (error < 0)
1489 goto out_err;
1490 }
1491 return 0;
1492out_put_ctx:
1493 if (ctx != NULL)
1494 put_nfs_open_context(ctx);
1495out_err_drop:
1496 d_drop(dentry);
1497out_err:
1498 return error;
1206} 1499}
1207 1500
1501#endif /* CONFIG_NFSV4 */
1502
1208/* 1503/*
1209 * Code common to create, mkdir, and mknod. 1504 * Code common to create, mkdir, and mknod.
1210 */ 1505 */
@@ -1258,7 +1553,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1258{ 1553{
1259 struct iattr attr; 1554 struct iattr attr;
1260 int error; 1555 int error;
1261 int open_flags = 0;
1262 1556
1263 dfprintk(VFS, "NFS: create(%s/%ld), %s\n", 1557 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1264 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1558 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1266,10 +1560,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1266 attr.ia_mode = mode; 1560 attr.ia_mode = mode;
1267 attr.ia_valid = ATTR_MODE; 1561 attr.ia_valid = ATTR_MODE;
1268 1562
1269 if ((nd->flags & LOOKUP_CREATE) != 0) 1563 error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL);
1270 open_flags = nd->intent.open.flags;
1271
1272 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
1273 if (error != 0) 1564 if (error != 0)
1274 goto out_err; 1565 goto out_err;
1275 return 0; 1566 return 0;
@@ -1351,76 +1642,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1351 return error; 1642 return error;
1352} 1643}
1353 1644
1354static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
1355{
1356 static unsigned int sillycounter;
1357 const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
1358 const int countersize = sizeof(sillycounter)*2;
1359 const int slen = sizeof(".nfs")+fileidsize+countersize-1;
1360 char silly[slen+1];
1361 struct qstr qsilly;
1362 struct dentry *sdentry;
1363 int error = -EIO;
1364
1365 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
1366 dentry->d_parent->d_name.name, dentry->d_name.name,
1367 atomic_read(&dentry->d_count));
1368 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
1369
1370 /*
1371 * We don't allow a dentry to be silly-renamed twice.
1372 */
1373 error = -EBUSY;
1374 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1375 goto out;
1376
1377 sprintf(silly, ".nfs%*.*Lx",
1378 fileidsize, fileidsize,
1379 (unsigned long long)NFS_FILEID(dentry->d_inode));
1380
1381 /* Return delegation in anticipation of the rename */
1382 nfs_inode_return_delegation(dentry->d_inode);
1383
1384 sdentry = NULL;
1385 do {
1386 char *suffix = silly + slen - countersize;
1387
1388 dput(sdentry);
1389 sillycounter++;
1390 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
1391
1392 dfprintk(VFS, "NFS: trying to rename %s to %s\n",
1393 dentry->d_name.name, silly);
1394
1395 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
1396 /*
1397 * N.B. Better to return EBUSY here ... it could be
1398 * dangerous to delete the file while it's in use.
1399 */
1400 if (IS_ERR(sdentry))
1401 goto out;
1402 } while(sdentry->d_inode != NULL); /* need negative lookup */
1403
1404 qsilly.name = silly;
1405 qsilly.len = strlen(silly);
1406 if (dentry->d_inode) {
1407 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1408 dir, &qsilly);
1409 nfs_mark_for_revalidate(dentry->d_inode);
1410 } else
1411 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1412 dir, &qsilly);
1413 if (!error) {
1414 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1415 d_move(dentry, sdentry);
1416 error = nfs_async_unlink(dir, dentry);
1417 /* If we return 0 we don't unlink */
1418 }
1419 dput(sdentry);
1420out:
1421 return error;
1422}
1423
1424/* 1645/*
1425 * Remove a file after making sure there are no pending writes, 1646 * Remove a file after making sure there are no pending writes,
1426 * and after checking that the file has only one user. 1647 * and after checking that the file has only one user.
@@ -1580,7 +1801,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1580 d_drop(dentry); 1801 d_drop(dentry);
1581 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); 1802 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
1582 if (error == 0) { 1803 if (error == 0) {
1583 atomic_inc(&inode->i_count); 1804 ihold(inode);
1584 d_add(dentry, inode); 1805 d_add(dentry, inode);
1585 } 1806 }
1586 return error; 1807 return error;
@@ -1711,14 +1932,14 @@ static void nfs_access_free_list(struct list_head *head)
1711int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 1932int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
1712{ 1933{
1713 LIST_HEAD(head); 1934 LIST_HEAD(head);
1714 struct nfs_inode *nfsi; 1935 struct nfs_inode *nfsi, *next;
1715 struct nfs_access_entry *cache; 1936 struct nfs_access_entry *cache;
1716 1937
1717 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 1938 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
1718 return (nr_to_scan == 0) ? 0 : -1; 1939 return (nr_to_scan == 0) ? 0 : -1;
1719 1940
1720 spin_lock(&nfs_access_lru_lock); 1941 spin_lock(&nfs_access_lru_lock);
1721 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { 1942 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
1722 struct inode *inode; 1943 struct inode *inode;
1723 1944
1724 if (nr_to_scan-- == 0) 1945 if (nr_to_scan-- == 0)
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index dba50a5625db..a6e711ad130f 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
167 return 0; 167 return 0;
168 } 168 }
169 item = container_of(h, struct nfs_dns_ent, h); 169 item = container_of(h, struct nfs_dns_ent, h);
170 ttl = (long)item->h.expiry_time - (long)get_seconds(); 170 ttl = item->h.expiry_time - seconds_since_boot();
171 if (ttl < 0) 171 if (ttl < 0)
172 ttl = 0; 172 ttl = 0;
173 173
@@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
239 ttl = get_expiry(&buf); 239 ttl = get_expiry(&buf);
240 if (ttl == 0) 240 if (ttl == 0)
241 goto out; 241 goto out;
242 key.h.expiry_time = ttl + get_seconds(); 242 key.h.expiry_time = ttl + seconds_since_boot();
243 243
244 ret = -ENOMEM; 244 ret = -ENOMEM;
245 item = nfs_dns_lookup(cd, &key); 245 item = nfs_dns_lookup(cd, &key);
@@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd,
301 goto out_err; 301 goto out_err;
302 ret = -ETIMEDOUT; 302 ret = -ETIMEDOUT;
303 if (!test_bit(CACHE_VALID, &(*item)->h.flags) 303 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
304 || (*item)->h.expiry_time < get_seconds() 304 || (*item)->h.expiry_time < seconds_since_boot()
305 || cd->flush_time > (*item)->h.last_refresh) 305 || cd->flush_time > (*item)->h.last_refresh)
306 goto out_put; 306 goto out_put;
307 ret = -ENOENT; 307 ret = -ENOENT;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 05bf3c0dc751..e756075637b0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -36,6 +36,7 @@
36#include "internal.h" 36#include "internal.h"
37#include "iostat.h" 37#include "iostat.h"
38#include "fscache.h" 38#include "fscache.h"
39#include "pnfs.h"
39 40
40#define NFSDBG_FACILITY NFSDBG_FILE 41#define NFSDBG_FACILITY NFSDBG_FILE
41 42
@@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
386 file->f_path.dentry->d_name.name, 387 file->f_path.dentry->d_name.name,
387 mapping->host->i_ino, len, (long long) pos); 388 mapping->host->i_ino, len, (long long) pos);
388 389
390 pnfs_update_layout(mapping->host,
391 nfs_file_open_context(file),
392 IOMODE_RW);
393
389start: 394start:
390 /* 395 /*
391 * Prevent starvation issues if someone is doing a consistency 396 * Prevent starvation issues if someone is doing a consistency
@@ -551,7 +556,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
551 struct file *filp = vma->vm_file; 556 struct file *filp = vma->vm_file;
552 struct dentry *dentry = filp->f_path.dentry; 557 struct dentry *dentry = filp->f_path.dentry;
553 unsigned pagelen; 558 unsigned pagelen;
554 int ret = -EINVAL; 559 int ret = VM_FAULT_NOPAGE;
555 struct address_space *mapping; 560 struct address_space *mapping;
556 561
557 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", 562 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
@@ -567,21 +572,20 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
567 if (mapping != dentry->d_inode->i_mapping) 572 if (mapping != dentry->d_inode->i_mapping)
568 goto out_unlock; 573 goto out_unlock;
569 574
570 ret = 0;
571 pagelen = nfs_page_length(page); 575 pagelen = nfs_page_length(page);
572 if (pagelen == 0) 576 if (pagelen == 0)
573 goto out_unlock; 577 goto out_unlock;
574 578
575 ret = nfs_flush_incompatible(filp, page); 579 ret = VM_FAULT_LOCKED;
576 if (ret != 0) 580 if (nfs_flush_incompatible(filp, page) == 0 &&
577 goto out_unlock; 581 nfs_updatepage(filp, page, 0, pagelen) == 0)
582 goto out;
578 583
579 ret = nfs_updatepage(filp, page, 0, pagelen); 584 ret = VM_FAULT_SIGBUS;
580out_unlock: 585out_unlock:
581 if (!ret)
582 return VM_FAULT_LOCKED;
583 unlock_page(page); 586 unlock_page(page);
584 return VM_FAULT_SIGBUS; 587out:
588 return ret;
585} 589}
586 590
587static const struct vm_operations_struct nfs_file_vm_ops = { 591static const struct vm_operations_struct nfs_file_vm_ops = {
@@ -684,7 +688,8 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
684 return ret; 688 return ret;
685} 689}
686 690
687static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) 691static int
692do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
688{ 693{
689 struct inode *inode = filp->f_mapping->host; 694 struct inode *inode = filp->f_mapping->host;
690 int status = 0; 695 int status = 0;
@@ -699,7 +704,7 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
699 if (nfs_have_delegation(inode, FMODE_READ)) 704 if (nfs_have_delegation(inode, FMODE_READ))
700 goto out_noconflict; 705 goto out_noconflict;
701 706
702 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) 707 if (is_local)
703 goto out_noconflict; 708 goto out_noconflict;
704 709
705 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 710 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
@@ -726,7 +731,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
726 return res; 731 return res;
727} 732}
728 733
729static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) 734static int
735do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
730{ 736{
731 struct inode *inode = filp->f_mapping->host; 737 struct inode *inode = filp->f_mapping->host;
732 int status; 738 int status;
@@ -741,15 +747,24 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
741 * If we're signalled while cleaning up locks on process exit, we 747 * If we're signalled while cleaning up locks on process exit, we
742 * still need to complete the unlock. 748 * still need to complete the unlock.
743 */ 749 */
744 /* Use local locking if mounted with "-onolock" */ 750 /*
745 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 751 * Use local locking if mounted with "-onolock" or with appropriate
752 * "-olocal_lock="
753 */
754 if (!is_local)
746 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 755 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
747 else 756 else
748 status = do_vfs_lock(filp, fl); 757 status = do_vfs_lock(filp, fl);
749 return status; 758 return status;
750} 759}
751 760
752static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) 761static int
762is_time_granular(struct timespec *ts) {
763 return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
764}
765
766static int
767do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
753{ 768{
754 struct inode *inode = filp->f_mapping->host; 769 struct inode *inode = filp->f_mapping->host;
755 int status; 770 int status;
@@ -762,20 +777,31 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
762 if (status != 0) 777 if (status != 0)
763 goto out; 778 goto out;
764 779
765 /* Use local locking if mounted with "-onolock" */ 780 /*
766 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 781 * Use local locking if mounted with "-onolock" or with appropriate
782 * "-olocal_lock="
783 */
784 if (!is_local)
767 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 785 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
768 else 786 else
769 status = do_vfs_lock(filp, fl); 787 status = do_vfs_lock(filp, fl);
770 if (status < 0) 788 if (status < 0)
771 goto out; 789 goto out;
790
772 /* 791 /*
773 * Make sure we clear the cache whenever we try to get the lock. 792 * Revalidate the cache if the server has time stamps granular
793 * enough to detect subsecond changes. Otherwise, clear the
794 * cache to prevent missing any changes.
795 *
774 * This makes locking act as a cache coherency point. 796 * This makes locking act as a cache coherency point.
775 */ 797 */
776 nfs_sync_mapping(filp->f_mapping); 798 nfs_sync_mapping(filp->f_mapping);
777 if (!nfs_have_delegation(inode, FMODE_READ)) 799 if (!nfs_have_delegation(inode, FMODE_READ)) {
778 nfs_zap_caches(inode); 800 if (is_time_granular(&NFS_SERVER(inode)->time_delta))
801 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
802 else
803 nfs_zap_caches(inode);
804 }
779out: 805out:
780 return status; 806 return status;
781} 807}
@@ -787,6 +813,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
787{ 813{
788 struct inode *inode = filp->f_mapping->host; 814 struct inode *inode = filp->f_mapping->host;
789 int ret = -ENOLCK; 815 int ret = -ENOLCK;
816 int is_local = 0;
790 817
791 dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", 818 dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
792 filp->f_path.dentry->d_parent->d_name.name, 819 filp->f_path.dentry->d_parent->d_name.name,
@@ -800,6 +827,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
800 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) 827 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
801 goto out_err; 828 goto out_err;
802 829
830 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
831 is_local = 1;
832
803 if (NFS_PROTO(inode)->lock_check_bounds != NULL) { 833 if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
804 ret = NFS_PROTO(inode)->lock_check_bounds(fl); 834 ret = NFS_PROTO(inode)->lock_check_bounds(fl);
805 if (ret < 0) 835 if (ret < 0)
@@ -807,11 +837,11 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
807 } 837 }
808 838
809 if (IS_GETLK(cmd)) 839 if (IS_GETLK(cmd))
810 ret = do_getlk(filp, cmd, fl); 840 ret = do_getlk(filp, cmd, fl, is_local);
811 else if (fl->fl_type == F_UNLCK) 841 else if (fl->fl_type == F_UNLCK)
812 ret = do_unlk(filp, cmd, fl); 842 ret = do_unlk(filp, cmd, fl, is_local);
813 else 843 else
814 ret = do_setlk(filp, cmd, fl); 844 ret = do_setlk(filp, cmd, fl, is_local);
815out_err: 845out_err:
816 return ret; 846 return ret;
817} 847}
@@ -821,6 +851,9 @@ out_err:
821 */ 851 */
822static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 852static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
823{ 853{
854 struct inode *inode = filp->f_mapping->host;
855 int is_local = 0;
856
824 dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", 857 dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
825 filp->f_path.dentry->d_parent->d_name.name, 858 filp->f_path.dentry->d_parent->d_name.name,
826 filp->f_path.dentry->d_name.name, 859 filp->f_path.dentry->d_name.name,
@@ -829,14 +862,17 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
829 if (!(fl->fl_flags & FL_FLOCK)) 862 if (!(fl->fl_flags & FL_FLOCK))
830 return -ENOLCK; 863 return -ENOLCK;
831 864
865 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
866 is_local = 1;
867
832 /* We're simulating flock() locks using posix locks on the server */ 868 /* We're simulating flock() locks using posix locks on the server */
833 fl->fl_owner = (fl_owner_t)filp; 869 fl->fl_owner = (fl_owner_t)filp;
834 fl->fl_start = 0; 870 fl->fl_start = 0;
835 fl->fl_end = OFFSET_MAX; 871 fl->fl_end = OFFSET_MAX;
836 872
837 if (fl->fl_type == F_UNLCK) 873 if (fl->fl_type == F_UNLCK)
838 return do_unlk(filp, cmd, fl); 874 return do_unlk(filp, cmd, fl, is_local);
839 return do_setlk(filp, cmd, fl); 875 return do_setlk(filp, cmd, fl, is_local);
840} 876}
841 877
842/* 878/*
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index a70e446e1605..ac7b814ce162 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
54 iput(inode); 54 iput(inode);
55 return -ENOMEM; 55 return -ENOMEM;
56 } 56 }
57 /* Circumvent igrab(): we know the inode is not being freed */ 57 ihold(inode);
58 atomic_inc(&inode->i_count);
59 /* 58 /*
60 * Ensure that this dentry is invisible to d_find_alias(). 59 * Ensure that this dentry is invisible to d_find_alias().
61 * Otherwise, it may be spliced into the tree by 60 * Otherwise, it may be spliced into the tree by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 21a84d45916f..dec47ed8b6b9 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -34,6 +34,212 @@
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36 36
37#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
38
39#include <linux/slab.h>
40#include <linux/cred.h>
41#include <linux/nfs_idmap.h>
42#include <linux/keyctl.h>
43#include <linux/key-type.h>
44#include <linux/rcupdate.h>
45#include <linux/kernel.h>
46#include <linux/err.h>
47
48#include <keys/user-type.h>
49
50#define NFS_UINT_MAXLEN 11
51
52const struct cred *id_resolver_cache;
53
54struct key_type key_type_id_resolver = {
55 .name = "id_resolver",
56 .instantiate = user_instantiate,
57 .match = user_match,
58 .revoke = user_revoke,
59 .destroy = user_destroy,
60 .describe = user_describe,
61 .read = user_read,
62};
63
64int nfs_idmap_init(void)
65{
66 struct cred *cred;
67 struct key *keyring;
68 int ret = 0;
69
70 printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name);
71
72 cred = prepare_kernel_cred(NULL);
73 if (!cred)
74 return -ENOMEM;
75
76 keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred,
77 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
78 KEY_USR_VIEW | KEY_USR_READ,
79 KEY_ALLOC_NOT_IN_QUOTA);
80 if (IS_ERR(keyring)) {
81 ret = PTR_ERR(keyring);
82 goto failed_put_cred;
83 }
84
85 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
86 if (ret < 0)
87 goto failed_put_key;
88
89 ret = register_key_type(&key_type_id_resolver);
90 if (ret < 0)
91 goto failed_put_key;
92
93 cred->thread_keyring = keyring;
94 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
95 id_resolver_cache = cred;
96 return 0;
97
98failed_put_key:
99 key_put(keyring);
100failed_put_cred:
101 put_cred(cred);
102 return ret;
103}
104
105void nfs_idmap_quit(void)
106{
107 key_revoke(id_resolver_cache->thread_keyring);
108 unregister_key_type(&key_type_id_resolver);
109 put_cred(id_resolver_cache);
110}
111
112/*
113 * Assemble the description to pass to request_key()
114 * This function will allocate a new string and update dest to point
115 * at it. The caller is responsible for freeing dest.
116 *
117 * On error 0 is returned. Otherwise, the length of dest is returned.
118 */
119static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
120 const char *type, size_t typelen, char **desc)
121{
122 char *cp;
123 size_t desclen = typelen + namelen + 2;
124
125 *desc = kmalloc(desclen, GFP_KERNEL);
126 if (!desc)
127 return -ENOMEM;
128
129 cp = *desc;
130 memcpy(cp, type, typelen);
131 cp += typelen;
132 *cp++ = ':';
133
134 memcpy(cp, name, namelen);
135 cp += namelen;
136 *cp = '\0';
137 return desclen;
138}
139
140static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
141 const char *type, void *data, size_t data_size)
142{
143 const struct cred *saved_cred;
144 struct key *rkey;
145 char *desc;
146 struct user_key_payload *payload;
147 ssize_t ret;
148
149 ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
150 if (ret <= 0)
151 goto out;
152
153 saved_cred = override_creds(id_resolver_cache);
154 rkey = request_key(&key_type_id_resolver, desc, "");
155 revert_creds(saved_cred);
156 kfree(desc);
157 if (IS_ERR(rkey)) {
158 ret = PTR_ERR(rkey);
159 goto out;
160 }
161
162 rcu_read_lock();
163 rkey->perm |= KEY_USR_VIEW;
164
165 ret = key_validate(rkey);
166 if (ret < 0)
167 goto out_up;
168
169 payload = rcu_dereference(rkey->payload.data);
170 if (IS_ERR_OR_NULL(payload)) {
171 ret = PTR_ERR(payload);
172 goto out_up;
173 }
174
175 ret = payload->datalen;
176 if (ret > 0 && ret <= data_size)
177 memcpy(data, payload->data, ret);
178 else
179 ret = -EINVAL;
180
181out_up:
182 rcu_read_unlock();
183 key_put(rkey);
184out:
185 return ret;
186}
187
188
189/* ID -> Name */
190static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen)
191{
192 char id_str[NFS_UINT_MAXLEN];
193 int id_len;
194 ssize_t ret;
195
196 id_len = snprintf(id_str, sizeof(id_str), "%u", id);
197 ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen);
198 if (ret < 0)
199 return -EINVAL;
200 return ret;
201}
202
203/* Name -> ID */
204static int nfs_idmap_lookup_id(const char *name, size_t namelen,
205 const char *type, __u32 *id)
206{
207 char id_str[NFS_UINT_MAXLEN];
208 long id_long;
209 ssize_t data_size;
210 int ret = 0;
211
212 data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN);
213 if (data_size <= 0) {
214 ret = -EINVAL;
215 } else {
216 ret = strict_strtol(id_str, 10, &id_long);
217 *id = (__u32)id_long;
218 }
219 return ret;
220}
221
222int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
223{
224 return nfs_idmap_lookup_id(name, namelen, "uid", uid);
225}
226
227int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid)
228{
229 return nfs_idmap_lookup_id(name, namelen, "gid", gid);
230}
231
232int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
233{
234 return nfs_idmap_lookup_name(uid, "user", buf, buflen);
235}
236int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen)
237{
238 return nfs_idmap_lookup_name(gid, "group", buf, buflen);
239}
240
241#else /* CONFIG_NFS_USE_IDMAPPER not defined */
242
37#include <linux/module.h> 243#include <linux/module.h>
38#include <linux/mutex.h> 244#include <linux/mutex.h>
39#include <linux/init.h> 245#include <linux/init.h>
@@ -503,16 +709,17 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele
503 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 709 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
504} 710}
505 711
506int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) 712int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
507{ 713{
508 struct idmap *idmap = clp->cl_idmap; 714 struct idmap *idmap = clp->cl_idmap;
509 715
510 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 716 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
511} 717}
512int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) 718int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
513{ 719{
514 struct idmap *idmap = clp->cl_idmap; 720 struct idmap *idmap = clp->cl_idmap;
515 721
516 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); 722 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
517} 723}
518 724
725#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d2d6c72aa78..314f57164602 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -48,6 +48,7 @@
48#include "internal.h" 48#include "internal.h"
49#include "fscache.h" 49#include "fscache.h"
50#include "dns_resolve.h" 50#include "dns_resolve.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_VFS 53#define NFSDBG_FACILITY NFSDBG_VFS
53 54
@@ -234,9 +235,6 @@ nfs_init_locked(struct inode *inode, void *opaque)
234 return 0; 235 return 0;
235} 236}
236 237
237/* Don't use READDIRPLUS on directories that we believe are too large */
238#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
239
240/* 238/*
241 * This is our front-end to iget that looks up inodes by file handle 239 * This is our front-end to iget that looks up inodes by file handle
242 * instead of inode number. 240 * instead of inode number.
@@ -291,8 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
291 } else if (S_ISDIR(inode->i_mode)) { 289 } else if (S_ISDIR(inode->i_mode)) {
292 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 290 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
293 inode->i_fop = &nfs_dir_operations; 291 inode->i_fop = &nfs_dir_operations;
294 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) 292 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
295 && fattr->size <= NFS_LIMIT_READDIRPLUS)
296 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 293 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
297 /* Deal with crossing mountpoints */ 294 /* Deal with crossing mountpoints */
298 if ((fattr->valid & NFS_ATTR_FATTR_FSID) 295 if ((fattr->valid & NFS_ATTR_FATTR_FSID)
@@ -623,7 +620,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
623 nfs_revalidate_inode(server, inode); 620 nfs_revalidate_inode(server, inode);
624} 621}
625 622
626static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) 623struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode)
627{ 624{
628 struct nfs_open_context *ctx; 625 struct nfs_open_context *ctx;
629 626
@@ -633,11 +630,13 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
633 path_get(&ctx->path); 630 path_get(&ctx->path);
634 ctx->cred = get_rpccred(cred); 631 ctx->cred = get_rpccred(cred);
635 ctx->state = NULL; 632 ctx->state = NULL;
633 ctx->mode = f_mode;
636 ctx->flags = 0; 634 ctx->flags = 0;
637 ctx->error = 0; 635 ctx->error = 0;
638 ctx->dir_cookie = 0; 636 ctx->dir_cookie = 0;
639 nfs_init_lock_context(&ctx->lock_context); 637 nfs_init_lock_context(&ctx->lock_context);
640 ctx->lock_context.open_context = ctx; 638 ctx->lock_context.open_context = ctx;
639 INIT_LIST_HEAD(&ctx->list);
641 } 640 }
642 return ctx; 641 return ctx;
643} 642}
@@ -653,11 +652,15 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
653{ 652{
654 struct inode *inode = ctx->path.dentry->d_inode; 653 struct inode *inode = ctx->path.dentry->d_inode;
655 654
656 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) 655 if (!list_empty(&ctx->list)) {
656 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
657 return;
658 list_del(&ctx->list);
659 spin_unlock(&inode->i_lock);
660 } else if (!atomic_dec_and_test(&ctx->lock_context.count))
657 return; 661 return;
658 list_del(&ctx->list); 662 if (inode != NULL)
659 spin_unlock(&inode->i_lock); 663 NFS_PROTO(inode)->close_context(ctx, is_sync);
660 NFS_PROTO(inode)->close_context(ctx, is_sync);
661 if (ctx->cred != NULL) 664 if (ctx->cred != NULL)
662 put_rpccred(ctx->cred); 665 put_rpccred(ctx->cred);
663 path_put(&ctx->path); 666 path_put(&ctx->path);
@@ -673,7 +676,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
673 * Ensure that mmap has a recent RPC credential for use when writing out 676 * Ensure that mmap has a recent RPC credential for use when writing out
674 * shared pages 677 * shared pages
675 */ 678 */
676static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) 679void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
677{ 680{
678 struct inode *inode = filp->f_path.dentry->d_inode; 681 struct inode *inode = filp->f_path.dentry->d_inode;
679 struct nfs_inode *nfsi = NFS_I(inode); 682 struct nfs_inode *nfsi = NFS_I(inode);
@@ -730,11 +733,10 @@ int nfs_open(struct inode *inode, struct file *filp)
730 cred = rpc_lookup_cred(); 733 cred = rpc_lookup_cred();
731 if (IS_ERR(cred)) 734 if (IS_ERR(cred))
732 return PTR_ERR(cred); 735 return PTR_ERR(cred);
733 ctx = alloc_nfs_open_context(&filp->f_path, cred); 736 ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode);
734 put_rpccred(cred); 737 put_rpccred(cred);
735 if (ctx == NULL) 738 if (ctx == NULL)
736 return -ENOMEM; 739 return -ENOMEM;
737 ctx->mode = filp->f_mode;
738 nfs_file_set_open_context(filp, ctx); 740 nfs_file_set_open_context(filp, ctx);
739 put_nfs_open_context(ctx); 741 put_nfs_open_context(ctx);
740 nfs_fscache_set_inode_cookie(inode, filp); 742 nfs_fscache_set_inode_cookie(inode, filp);
@@ -1409,6 +1411,7 @@ void nfs4_evict_inode(struct inode *inode)
1409{ 1411{
1410 truncate_inode_pages(&inode->i_data, 0); 1412 truncate_inode_pages(&inode->i_data, 0);
1411 end_writeback(inode); 1413 end_writeback(inode);
1414 pnfs_destroy_layout(NFS_I(inode));
1412 /* If we are holding a delegation, return it! */ 1415 /* If we are holding a delegation, return it! */
1413 nfs_inode_return_delegation_noreclaim(inode); 1416 nfs_inode_return_delegation_noreclaim(inode);
1414 /* First call standard NFS clear_inode() code */ 1417 /* First call standard NFS clear_inode() code */
@@ -1446,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1446 nfsi->delegation = NULL; 1449 nfsi->delegation = NULL;
1447 nfsi->delegation_state = 0; 1450 nfsi->delegation_state = 0;
1448 init_rwsem(&nfsi->rwsem); 1451 init_rwsem(&nfsi->rwsem);
1452 nfsi->layout = NULL;
1449#endif 1453#endif
1450} 1454}
1451 1455
@@ -1493,7 +1497,7 @@ static int nfsiod_start(void)
1493{ 1497{
1494 struct workqueue_struct *wq; 1498 struct workqueue_struct *wq;
1495 dprintk("RPC: creating workqueue nfsiod\n"); 1499 dprintk("RPC: creating workqueue nfsiod\n");
1496 wq = create_singlethread_workqueue("nfsiod"); 1500 wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0);
1497 if (wq == NULL) 1501 if (wq == NULL)
1498 return -ENOMEM; 1502 return -ENOMEM;
1499 nfsiod_workqueue = wq; 1503 nfsiod_workqueue = wq;
@@ -1521,6 +1525,10 @@ static int __init init_nfs_fs(void)
1521{ 1525{
1522 int err; 1526 int err;
1523 1527
1528 err = nfs_idmap_init();
1529 if (err < 0)
1530 goto out9;
1531
1524 err = nfs_dns_resolver_init(); 1532 err = nfs_dns_resolver_init();
1525 if (err < 0) 1533 if (err < 0)
1526 goto out8; 1534 goto out8;
@@ -1585,6 +1593,8 @@ out6:
1585out7: 1593out7:
1586 nfs_dns_resolver_destroy(); 1594 nfs_dns_resolver_destroy();
1587out8: 1595out8:
1596 nfs_idmap_quit();
1597out9:
1588 return err; 1598 return err;
1589} 1599}
1590 1600
@@ -1597,6 +1607,7 @@ static void __exit exit_nfs_fs(void)
1597 nfs_destroy_nfspagecache(); 1607 nfs_destroy_nfspagecache();
1598 nfs_fscache_unregister(); 1608 nfs_fscache_unregister();
1599 nfs_dns_resolver_destroy(); 1609 nfs_dns_resolver_destroy();
1610 nfs_idmap_quit();
1600#ifdef CONFIG_PROC_FS 1611#ifdef CONFIG_PROC_FS
1601 rpc_proc_unregister("nfs"); 1612 rpc_proc_unregister("nfs");
1602#endif 1613#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c961bc92c107..db08ff3ff454 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -63,6 +63,12 @@ struct nfs_clone_mount {
63#define NFS_UNSPEC_PORT (-1) 63#define NFS_UNSPEC_PORT (-1)
64 64
65/* 65/*
66 * Maximum number of pages that readdir can use for creating
67 * a vmapped array of pages.
68 */
69#define NFS_MAX_READDIR_PAGES 8
70
71/*
66 * In-kernel mount arguments 72 * In-kernel mount arguments
67 */ 73 */
68struct nfs_parsed_mount_data { 74struct nfs_parsed_mount_data {
@@ -181,15 +187,15 @@ extern void nfs_destroy_directcache(void);
181/* nfs2xdr.c */ 187/* nfs2xdr.c */
182extern int nfs_stat_to_errno(int); 188extern int nfs_stat_to_errno(int);
183extern struct rpc_procinfo nfs_procedures[]; 189extern struct rpc_procinfo nfs_procedures[];
184extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); 190extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
185 191
186/* nfs3xdr.c */ 192/* nfs3xdr.c */
187extern struct rpc_procinfo nfs3_procedures[]; 193extern struct rpc_procinfo nfs3_procedures[];
188extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); 194extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
189 195
190/* nfs4xdr.c */ 196/* nfs4xdr.c */
191#ifdef CONFIG_NFS_V4 197#ifdef CONFIG_NFS_V4
192extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); 198extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
193#endif 199#endif
194#ifdef CONFIG_NFS_V4_1 200#ifdef CONFIG_NFS_V4_1
195extern const u32 nfs41_maxread_overhead; 201extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 59047f8d7d72..eceafe74f473 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
153 .rpc_resp = &result, 153 .rpc_resp = &result,
154 }; 154 };
155 struct rpc_create_args args = { 155 struct rpc_create_args args = {
156 .net = &init_net,
156 .protocol = info->protocol, 157 .protocol = info->protocol,
157 .address = info->sap, 158 .address = info->sap,
158 .addrsize = info->salen, 159 .addrsize = info->salen,
@@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
224 .to_retries = 2, 225 .to_retries = 2,
225 }; 226 };
226 struct rpc_create_args args = { 227 struct rpc_create_args args = {
228 .net = &init_net,
227 .protocol = IPPROTO_UDP, 229 .protocol = IPPROTO_UDP,
228 .address = info->sap, 230 .address = info->sap,
229 .addrsize = info->salen, 231 .addrsize = info->salen,
@@ -436,7 +438,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
436 438
437 for (i = 0; i < entries; i++) { 439 for (i = 0; i < entries; i++) {
438 flavors[i] = ntohl(*p++); 440 flavors[i] = ntohl(*p++);
439 dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]); 441 dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]);
440 } 442 }
441 *count = i; 443 *count = i;
442 444
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index db8846a0e82e..e6bf45710cc7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -337,10 +337,10 @@ nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args)
337static int 337static int
338nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) 338nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
339{ 339{
340 p = xdr_encode_fhandle(p, args->fromfh); 340 p = xdr_encode_fhandle(p, args->old_dir);
341 p = xdr_encode_array(p, args->fromname, args->fromlen); 341 p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
342 p = xdr_encode_fhandle(p, args->tofh); 342 p = xdr_encode_fhandle(p, args->new_dir);
343 p = xdr_encode_array(p, args->toname, args->tolen); 343 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
344 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 344 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
345 return 0; 345 return 0;
346} 346}
@@ -423,9 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
423 struct page **page; 423 struct page **page;
424 size_t hdrlen; 424 size_t hdrlen;
425 unsigned int pglen, recvd; 425 unsigned int pglen, recvd;
426 u32 len;
427 int status, nr = 0; 426 int status, nr = 0;
428 __be32 *end, *entry, *kaddr;
429 427
430 if ((status = ntohl(*p++))) 428 if ((status = ntohl(*p++)))
431 return nfs_stat_to_errno(status); 429 return nfs_stat_to_errno(status);
@@ -445,80 +443,59 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
445 if (pglen > recvd) 443 if (pglen > recvd)
446 pglen = recvd; 444 pglen = recvd;
447 page = rcvbuf->pages; 445 page = rcvbuf->pages;
448 kaddr = p = kmap_atomic(*page, KM_USER0);
449 end = (__be32 *)((char *)p + pglen);
450 entry = p;
451
452 /* Make sure the packet actually has a value_follows and EOF entry */
453 if ((entry + 1) > end)
454 goto short_pkt;
455
456 for (; *p++; nr++) {
457 if (p + 2 > end)
458 goto short_pkt;
459 p++; /* fileid */
460 len = ntohl(*p++);
461 p += XDR_QUADLEN(len) + 1; /* name plus cookie */
462 if (len > NFS2_MAXNAMLEN) {
463 dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
464 len);
465 goto err_unmap;
466 }
467 if (p + 2 > end)
468 goto short_pkt;
469 entry = p;
470 }
471
472 /*
473 * Apparently some server sends responses that are a valid size, but
474 * contain no entries, and have value_follows==0 and EOF==0. For
475 * those, just set the EOF marker.
476 */
477 if (!nr && entry[1] == 0) {
478 dprintk("NFS: readdir reply truncated!\n");
479 entry[1] = 1;
480 }
481 out:
482 kunmap_atomic(kaddr, KM_USER0);
483 return nr; 446 return nr;
484 short_pkt: 447}
485 /* 448
486 * When we get a short packet there are 2 possibilities. We can 449static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
487 * return an error, or fix up the response to look like a valid 450{
488 * response and return what we have so far. If there are no 451 dprintk("nfs: %s: prematurely hit end of receive buffer. "
489 * entries and the packet was short, then return -EIO. If there 452 "Remaining buffer length is %tu words.\n",
490 * are valid entries in the response, return them and pretend that 453 func, xdr->end - xdr->p);
491 * the call was successful, but incomplete. The caller can retry the
492 * readdir starting at the last cookie.
493 */
494 entry[0] = entry[1] = 0;
495 if (!nr)
496 nr = -errno_NFSERR_IO;
497 goto out;
498err_unmap:
499 nr = -errno_NFSERR_IO;
500 goto out;
501} 454}
502 455
503__be32 * 456__be32 *
504nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 457nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
505{ 458{
506 if (!*p++) { 459 __be32 *p;
507 if (!*p) 460 p = xdr_inline_decode(xdr, 4);
461 if (unlikely(!p))
462 goto out_overflow;
463 if (!ntohl(*p++)) {
464 p = xdr_inline_decode(xdr, 4);
465 if (unlikely(!p))
466 goto out_overflow;
467 if (!ntohl(*p++))
508 return ERR_PTR(-EAGAIN); 468 return ERR_PTR(-EAGAIN);
509 entry->eof = 1; 469 entry->eof = 1;
510 return ERR_PTR(-EBADCOOKIE); 470 return ERR_PTR(-EBADCOOKIE);
511 } 471 }
512 472
473 p = xdr_inline_decode(xdr, 8);
474 if (unlikely(!p))
475 goto out_overflow;
476
513 entry->ino = ntohl(*p++); 477 entry->ino = ntohl(*p++);
514 entry->len = ntohl(*p++); 478 entry->len = ntohl(*p++);
479
480 p = xdr_inline_decode(xdr, entry->len + 4);
481 if (unlikely(!p))
482 goto out_overflow;
515 entry->name = (const char *) p; 483 entry->name = (const char *) p;
516 p += XDR_QUADLEN(entry->len); 484 p += XDR_QUADLEN(entry->len);
517 entry->prev_cookie = entry->cookie; 485 entry->prev_cookie = entry->cookie;
518 entry->cookie = ntohl(*p++); 486 entry->cookie = ntohl(*p++);
519 entry->eof = !p[0] && p[1]; 487
488 p = xdr_inline_peek(xdr, 8);
489 if (p != NULL)
490 entry->eof = !p[0] && p[1];
491 else
492 entry->eof = 0;
520 493
521 return p; 494 return p;
495
496out_overflow:
497 print_overflow_msg(__func__, xdr);
498 return ERR_PTR(-EIO);
522} 499}
523 500
524/* 501/*
@@ -596,7 +573,6 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
596 struct kvec *iov = rcvbuf->head; 573 struct kvec *iov = rcvbuf->head;
597 size_t hdrlen; 574 size_t hdrlen;
598 u32 len, recvd; 575 u32 len, recvd;
599 char *kaddr;
600 int status; 576 int status;
601 577
602 if ((status = ntohl(*p++))) 578 if ((status = ntohl(*p++)))
@@ -623,10 +599,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
623 return -EIO; 599 return -EIO;
624 } 600 }
625 601
626 /* NULL terminate the string we got */ 602 xdr_terminate_string(rcvbuf, len);
627 kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
628 kaddr[len+rcvbuf->page_base] = '\0';
629 kunmap_atomic(kaddr, KM_USER0);
630 return 0; 603 return 0;
631} 604}
632 605
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index fabb4f2849a1..ce939c062a52 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -313,7 +313,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
313 */ 313 */
314static int 314static int
315nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 315nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
316 int flags, struct nameidata *nd) 316 int flags, struct nfs_open_context *ctx)
317{ 317{
318 struct nfs3_createdata *data; 318 struct nfs3_createdata *data;
319 mode_t mode = sattr->ia_mode; 319 mode_t mode = sattr->ia_mode;
@@ -438,19 +438,38 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
438 return 1; 438 return 1;
439} 439}
440 440
441static void
442nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
443{
444 msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
445}
446
447static int
448nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
449 struct inode *new_dir)
450{
451 struct nfs_renameres *res;
452
453 if (nfs3_async_handle_jukebox(task, old_dir))
454 return 0;
455 res = task->tk_msg.rpc_resp;
456
457 nfs_post_op_update_inode(old_dir, res->old_fattr);
458 nfs_post_op_update_inode(new_dir, res->new_fattr);
459 return 1;
460}
461
441static int 462static int
442nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, 463nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
443 struct inode *new_dir, struct qstr *new_name) 464 struct inode *new_dir, struct qstr *new_name)
444{ 465{
445 struct nfs3_renameargs arg = { 466 struct nfs_renameargs arg = {
446 .fromfh = NFS_FH(old_dir), 467 .old_dir = NFS_FH(old_dir),
447 .fromname = old_name->name, 468 .old_name = old_name,
448 .fromlen = old_name->len, 469 .new_dir = NFS_FH(new_dir),
449 .tofh = NFS_FH(new_dir), 470 .new_name = new_name,
450 .toname = new_name->name,
451 .tolen = new_name->len
452 }; 471 };
453 struct nfs3_renameres res; 472 struct nfs_renameres res;
454 struct rpc_message msg = { 473 struct rpc_message msg = {
455 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], 474 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
456 .rpc_argp = &arg, 475 .rpc_argp = &arg,
@@ -460,17 +479,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
460 479
461 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); 480 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
462 481
463 res.fromattr = nfs_alloc_fattr(); 482 res.old_fattr = nfs_alloc_fattr();
464 res.toattr = nfs_alloc_fattr(); 483 res.new_fattr = nfs_alloc_fattr();
465 if (res.fromattr == NULL || res.toattr == NULL) 484 if (res.old_fattr == NULL || res.new_fattr == NULL)
466 goto out; 485 goto out;
467 486
468 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); 487 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
469 nfs_post_op_update_inode(old_dir, res.fromattr); 488 nfs_post_op_update_inode(old_dir, res.old_fattr);
470 nfs_post_op_update_inode(new_dir, res.toattr); 489 nfs_post_op_update_inode(new_dir, res.new_fattr);
471out: 490out:
472 nfs_free_fattr(res.toattr); 491 nfs_free_fattr(res.old_fattr);
473 nfs_free_fattr(res.fromattr); 492 nfs_free_fattr(res.new_fattr);
474 dprintk("NFS reply rename: %d\n", status); 493 dprintk("NFS reply rename: %d\n", status);
475 return status; 494 return status;
476} 495}
@@ -611,7 +630,7 @@ out:
611 */ 630 */
612static int 631static int
613nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 632nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
614 u64 cookie, struct page *page, unsigned int count, int plus) 633 u64 cookie, struct page **pages, unsigned int count, int plus)
615{ 634{
616 struct inode *dir = dentry->d_inode; 635 struct inode *dir = dentry->d_inode;
617 __be32 *verf = NFS_COOKIEVERF(dir); 636 __be32 *verf = NFS_COOKIEVERF(dir);
@@ -621,7 +640,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
621 .verf = {verf[0], verf[1]}, 640 .verf = {verf[0], verf[1]},
622 .plus = plus, 641 .plus = plus,
623 .count = count, 642 .count = count,
624 .pages = &page 643 .pages = pages
625 }; 644 };
626 struct nfs3_readdirres res = { 645 struct nfs3_readdirres res = {
627 .verf = verf, 646 .verf = verf,
@@ -652,7 +671,8 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
652 671
653 nfs_free_fattr(res.dir_attr); 672 nfs_free_fattr(res.dir_attr);
654out: 673out:
655 dprintk("NFS reply readdir: %d\n", status); 674 dprintk("NFS reply readdir%s: %d\n",
675 plus? "plus" : "", status);
656 return status; 676 return status;
657} 677}
658 678
@@ -722,7 +742,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
722 dprintk("NFS call fsstat\n"); 742 dprintk("NFS call fsstat\n");
723 nfs_fattr_init(stat->fattr); 743 nfs_fattr_init(stat->fattr);
724 status = rpc_call_sync(server->client, &msg, 0); 744 status = rpc_call_sync(server->client, &msg, 0);
725 dprintk("NFS reply statfs: %d\n", status); 745 dprintk("NFS reply fsstat: %d\n", status);
726 return status; 746 return status;
727} 747}
728 748
@@ -844,6 +864,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
844 .unlink_setup = nfs3_proc_unlink_setup, 864 .unlink_setup = nfs3_proc_unlink_setup,
845 .unlink_done = nfs3_proc_unlink_done, 865 .unlink_done = nfs3_proc_unlink_done,
846 .rename = nfs3_proc_rename, 866 .rename = nfs3_proc_rename,
867 .rename_setup = nfs3_proc_rename_setup,
868 .rename_done = nfs3_proc_rename_done,
847 .link = nfs3_proc_link, 869 .link = nfs3_proc_link,
848 .symlink = nfs3_proc_symlink, 870 .symlink = nfs3_proc_symlink,
849 .mkdir = nfs3_proc_mkdir, 871 .mkdir = nfs3_proc_mkdir,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 9769704f8ce6..d9a5e832c257 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -100,6 +100,13 @@ static const umode_t nfs_type2fmt[] = {
100 [NF3FIFO] = S_IFIFO, 100 [NF3FIFO] = S_IFIFO,
101}; 101};
102 102
103static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
104{
105 dprintk("nfs: %s: prematurely hit end of receive buffer. "
106 "Remaining buffer length is %tu words.\n",
107 func, xdr->end - xdr->p);
108}
109
103/* 110/*
104 * Common NFS XDR functions as inlines 111 * Common NFS XDR functions as inlines
105 */ 112 */
@@ -119,6 +126,29 @@ xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
119 return NULL; 126 return NULL;
120} 127}
121 128
129static inline __be32 *
130xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh)
131{
132 __be32 *p;
133 p = xdr_inline_decode(xdr, 4);
134 if (unlikely(!p))
135 goto out_overflow;
136 fh->size = ntohl(*p++);
137
138 if (fh->size <= NFS3_FHSIZE) {
139 p = xdr_inline_decode(xdr, fh->size);
140 if (unlikely(!p))
141 goto out_overflow;
142 memcpy(fh->data, p, fh->size);
143 return p + XDR_QUADLEN(fh->size);
144 }
145 return NULL;
146
147out_overflow:
148 print_overflow_msg(__func__, xdr);
149 return ERR_PTR(-EIO);
150}
151
122/* 152/*
123 * Encode/decode time. 153 * Encode/decode time.
124 */ 154 */
@@ -241,6 +271,26 @@ xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr)
241} 271}
242 272
243static inline __be32 * 273static inline __be32 *
274xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr)
275{
276 __be32 *p;
277
278 p = xdr_inline_decode(xdr, 4);
279 if (unlikely(!p))
280 goto out_overflow;
281 if (ntohl(*p++)) {
282 p = xdr_inline_decode(xdr, 84);
283 if (unlikely(!p))
284 goto out_overflow;
285 p = xdr_decode_fattr(p, fattr);
286 }
287 return p;
288out_overflow:
289 print_overflow_msg(__func__, xdr);
290 return ERR_PTR(-EIO);
291}
292
293static inline __be32 *
244xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr) 294xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
245{ 295{
246 if (*p++) 296 if (*p++)
@@ -442,12 +492,12 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args)
442 * Encode RENAME arguments 492 * Encode RENAME arguments
443 */ 493 */
444static int 494static int
445nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args) 495nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
446{ 496{
447 p = xdr_encode_fhandle(p, args->fromfh); 497 p = xdr_encode_fhandle(p, args->old_dir);
448 p = xdr_encode_array(p, args->fromname, args->fromlen); 498 p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
449 p = xdr_encode_fhandle(p, args->tofh); 499 p = xdr_encode_fhandle(p, args->new_dir);
450 p = xdr_encode_array(p, args->toname, args->tolen); 500 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
451 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 501 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
452 return 0; 502 return 0;
453} 503}
@@ -504,9 +554,8 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
504 struct kvec *iov = rcvbuf->head; 554 struct kvec *iov = rcvbuf->head;
505 struct page **page; 555 struct page **page;
506 size_t hdrlen; 556 size_t hdrlen;
507 u32 len, recvd, pglen; 557 u32 recvd, pglen;
508 int status, nr = 0; 558 int status, nr = 0;
509 __be32 *entry, *end, *kaddr;
510 559
511 status = ntohl(*p++); 560 status = ntohl(*p++);
512 /* Decode post_op_attrs */ 561 /* Decode post_op_attrs */
@@ -536,99 +585,38 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
536 if (pglen > recvd) 585 if (pglen > recvd)
537 pglen = recvd; 586 pglen = recvd;
538 page = rcvbuf->pages; 587 page = rcvbuf->pages;
539 kaddr = p = kmap_atomic(*page, KM_USER0);
540 end = (__be32 *)((char *)p + pglen);
541 entry = p;
542
543 /* Make sure the packet actually has a value_follows and EOF entry */
544 if ((entry + 1) > end)
545 goto short_pkt;
546
547 for (; *p++; nr++) {
548 if (p + 3 > end)
549 goto short_pkt;
550 p += 2; /* inode # */
551 len = ntohl(*p++); /* string length */
552 p += XDR_QUADLEN(len) + 2; /* name + cookie */
553 if (len > NFS3_MAXNAMLEN) {
554 dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
555 len);
556 goto err_unmap;
557 }
558 588
559 if (res->plus) {
560 /* post_op_attr */
561 if (p + 2 > end)
562 goto short_pkt;
563 if (*p++) {
564 p += 21;
565 if (p + 1 > end)
566 goto short_pkt;
567 }
568 /* post_op_fh3 */
569 if (*p++) {
570 if (p + 1 > end)
571 goto short_pkt;
572 len = ntohl(*p++);
573 if (len > NFS3_FHSIZE) {
574 dprintk("NFS: giant filehandle in "
575 "readdir (len 0x%x)!\n", len);
576 goto err_unmap;
577 }
578 p += XDR_QUADLEN(len);
579 }
580 }
581
582 if (p + 2 > end)
583 goto short_pkt;
584 entry = p;
585 }
586
587 /*
588 * Apparently some server sends responses that are a valid size, but
589 * contain no entries, and have value_follows==0 and EOF==0. For
590 * those, just set the EOF marker.
591 */
592 if (!nr && entry[1] == 0) {
593 dprintk("NFS: readdir reply truncated!\n");
594 entry[1] = 1;
595 }
596 out:
597 kunmap_atomic(kaddr, KM_USER0);
598 return nr; 589 return nr;
599 short_pkt:
600 /*
601 * When we get a short packet there are 2 possibilities. We can
602 * return an error, or fix up the response to look like a valid
603 * response and return what we have so far. If there are no
604 * entries and the packet was short, then return -EIO. If there
605 * are valid entries in the response, return them and pretend that
606 * the call was successful, but incomplete. The caller can retry the
607 * readdir starting at the last cookie.
608 */
609 entry[0] = entry[1] = 0;
610 if (!nr)
611 nr = -errno_NFSERR_IO;
612 goto out;
613err_unmap:
614 nr = -errno_NFSERR_IO;
615 goto out;
616} 590}
617 591
618__be32 * 592__be32 *
619nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 593nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
620{ 594{
595 __be32 *p;
621 struct nfs_entry old = *entry; 596 struct nfs_entry old = *entry;
622 597
623 if (!*p++) { 598 p = xdr_inline_decode(xdr, 4);
624 if (!*p) 599 if (unlikely(!p))
600 goto out_overflow;
601 if (!ntohl(*p++)) {
602 p = xdr_inline_decode(xdr, 4);
603 if (unlikely(!p))
604 goto out_overflow;
605 if (!ntohl(*p++))
625 return ERR_PTR(-EAGAIN); 606 return ERR_PTR(-EAGAIN);
626 entry->eof = 1; 607 entry->eof = 1;
627 return ERR_PTR(-EBADCOOKIE); 608 return ERR_PTR(-EBADCOOKIE);
628 } 609 }
629 610
611 p = xdr_inline_decode(xdr, 12);
612 if (unlikely(!p))
613 goto out_overflow;
630 p = xdr_decode_hyper(p, &entry->ino); 614 p = xdr_decode_hyper(p, &entry->ino);
631 entry->len = ntohl(*p++); 615 entry->len = ntohl(*p++);
616
617 p = xdr_inline_decode(xdr, entry->len + 8);
618 if (unlikely(!p))
619 goto out_overflow;
632 entry->name = (const char *) p; 620 entry->name = (const char *) p;
633 p += XDR_QUADLEN(entry->len); 621 p += XDR_QUADLEN(entry->len);
634 entry->prev_cookie = entry->cookie; 622 entry->prev_cookie = entry->cookie;
@@ -636,10 +624,17 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
636 624
637 if (plus) { 625 if (plus) {
638 entry->fattr->valid = 0; 626 entry->fattr->valid = 0;
639 p = xdr_decode_post_op_attr(p, entry->fattr); 627 p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
628 if (IS_ERR(p))
629 goto out_overflow_exit;
640 /* In fact, a post_op_fh3: */ 630 /* In fact, a post_op_fh3: */
631 p = xdr_inline_decode(xdr, 4);
632 if (unlikely(!p))
633 goto out_overflow;
641 if (*p++) { 634 if (*p++) {
642 p = xdr_decode_fhandle(p, entry->fh); 635 p = xdr_decode_fhandle_stream(xdr, entry->fh);
636 if (IS_ERR(p))
637 goto out_overflow_exit;
643 /* Ugh -- server reply was truncated */ 638 /* Ugh -- server reply was truncated */
644 if (p == NULL) { 639 if (p == NULL) {
645 dprintk("NFS: FH truncated\n"); 640 dprintk("NFS: FH truncated\n");
@@ -650,8 +645,18 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
650 memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); 645 memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
651 } 646 }
652 647
653 entry->eof = !p[0] && p[1]; 648 p = xdr_inline_peek(xdr, 8);
649 if (p != NULL)
650 entry->eof = !p[0] && p[1];
651 else
652 entry->eof = 0;
653
654 return p; 654 return p;
655
656out_overflow:
657 print_overflow_msg(__func__, xdr);
658out_overflow_exit:
659 return ERR_PTR(-EIO);
655} 660}
656 661
657/* 662/*
@@ -824,7 +829,6 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
824 struct kvec *iov = rcvbuf->head; 829 struct kvec *iov = rcvbuf->head;
825 size_t hdrlen; 830 size_t hdrlen;
826 u32 len, recvd; 831 u32 len, recvd;
827 char *kaddr;
828 int status; 832 int status;
829 833
830 status = ntohl(*p++); 834 status = ntohl(*p++);
@@ -857,10 +861,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
857 return -EIO; 861 return -EIO;
858 } 862 }
859 863
860 /* NULL terminate the string we got */ 864 xdr_terminate_string(rcvbuf, len);
861 kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0);
862 kaddr[len+rcvbuf->page_base] = '\0';
863 kunmap_atomic(kaddr, KM_USER0);
864 return 0; 865 return 0;
865} 866}
866 867
@@ -970,14 +971,14 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
970 * Decode RENAME reply 971 * Decode RENAME reply
971 */ 972 */
972static int 973static int
973nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res) 974nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
974{ 975{
975 int status; 976 int status;
976 977
977 if ((status = ntohl(*p++)) != 0) 978 if ((status = ntohl(*p++)) != 0)
978 status = nfs_stat_to_errno(status); 979 status = nfs_stat_to_errno(status);
979 p = xdr_decode_wcc_data(p, res->fromattr); 980 p = xdr_decode_wcc_data(p, res->old_fattr);
980 p = xdr_decode_wcc_data(p, res->toattr); 981 p = xdr_decode_wcc_data(p, res->new_fattr);
981 return status; 982 return status;
982} 983}
983 984
@@ -1043,8 +1044,9 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
1043 res->wtmult = ntohl(*p++); 1044 res->wtmult = ntohl(*p++);
1044 res->dtpref = ntohl(*p++); 1045 res->dtpref = ntohl(*p++);
1045 p = xdr_decode_hyper(p, &res->maxfilesize); 1046 p = xdr_decode_hyper(p, &res->maxfilesize);
1047 p = xdr_decode_time3(p, &res->time_delta);
1046 1048
1047 /* ignore time_delta and properties */ 1049 /* ignore properties */
1048 res->lease_time = 0; 1050 res->lease_time = 0;
1049 return 0; 1051 return 0;
1050} 1052}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 311e15cc8af0..9fa496387fdf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -242,8 +242,6 @@ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
242extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 242extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
243extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 243extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
244extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); 244extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
245extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
246extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
247extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 245extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
248extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 246extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
249 struct nfs4_fs_locations *fs_locations, struct page *page); 247 struct nfs4_fs_locations *fs_locations, struct page *page);
@@ -333,7 +331,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
333extern const nfs4_stateid zero_stateid; 331extern const nfs4_stateid zero_stateid;
334 332
335/* nfs4xdr.c */ 333/* nfs4xdr.c */
336extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); 334extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
337extern struct rpc_procinfo nfs4_procedures[]; 335extern struct rpc_procinfo nfs4_procedures[];
338 336
339struct nfs4_mount_data; 337struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
new file mode 100644
index 000000000000..2e92f0d8d654
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.c
@@ -0,0 +1,280 @@
1/*
2 * Module for the pnfs nfs4 file layout driver.
3 * Defines all I/O and Policy interface operations, plus code
4 * to register itself with the pNFS client.
5 *
6 * Copyright (c) 2002
7 * The Regents of the University of Michigan
8 * All Rights Reserved
9 *
10 * Dean Hildebrand <dhildebz@umich.edu>
11 *
12 * Permission is granted to use, copy, create derivative works, and
13 * redistribute this software and such derivative works for any purpose,
14 * so long as the name of the University of Michigan is not used in
15 * any advertising or publicity pertaining to the use or distribution
16 * of this software without specific, written prior authorization. If
17 * the above copyright notice or any other identification of the
18 * University of Michigan is included in any copy of any portion of
19 * this software, then the disclaimer below must also be included.
20 *
21 * This software is provided as is, without representation or warranty
22 * of any kind either express or implied, including without limitation
23 * the implied warranties of merchantability, fitness for a particular
24 * purpose, or noninfringement. The Regents of the University of
25 * Michigan shall not be liable for any damages, including special,
26 * indirect, incidental, or consequential damages, with respect to any
27 * claim arising out of or in connection with the use of the software,
28 * even if it has been or is hereafter advised of the possibility of
29 * such damages.
30 */
31
32#include <linux/nfs_fs.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
41MODULE_DESCRIPTION("The NFSv4 file layout driver");
42
43static int
44filelayout_set_layoutdriver(struct nfs_server *nfss)
45{
46 int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client,
47 nfs4_fl_free_deviceid_callback);
48 if (status) {
49 printk(KERN_WARNING "%s: deviceid cache could not be "
50 "initialized\n", __func__);
51 return status;
52 }
53 dprintk("%s: deviceid cache has been initialized successfully\n",
54 __func__);
55 return 0;
56}
57
58/* Clear out the layout by destroying its device list */
59static int
60filelayout_clear_layoutdriver(struct nfs_server *nfss)
61{
62 dprintk("--> %s\n", __func__);
63
64 if (nfss->nfs_client->cl_devid_cache)
65 pnfs_put_deviceid_cache(nfss->nfs_client);
66 return 0;
67}
68
69/*
70 * filelayout_check_layout()
71 *
72 * Make sure layout segment parameters are sane WRT the device.
73 * At this point no generic layer initialization of the lseg has occurred,
74 * and nothing has been added to the layout_hdr cache.
75 *
76 */
77static int
78filelayout_check_layout(struct pnfs_layout_hdr *lo,
79 struct nfs4_filelayout_segment *fl,
80 struct nfs4_layoutget_res *lgr,
81 struct nfs4_deviceid *id)
82{
83 struct nfs4_file_layout_dsaddr *dsaddr;
84 int status = -EINVAL;
85 struct nfs_server *nfss = NFS_SERVER(lo->inode);
86
87 dprintk("--> %s\n", __func__);
88
89 if (fl->pattern_offset > lgr->range.offset) {
90 dprintk("%s pattern_offset %lld to large\n",
91 __func__, fl->pattern_offset);
92 goto out;
93 }
94
95 if (fl->stripe_unit % PAGE_SIZE) {
96 dprintk("%s Stripe unit (%u) not page aligned\n",
97 __func__, fl->stripe_unit);
98 goto out;
99 }
100
101 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
103 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->inode, id);
105 if (dsaddr == NULL)
106 goto out;
107 }
108 fl->dsaddr = dsaddr;
109
110 if (fl->first_stripe_index < 0 ||
111 fl->first_stripe_index >= dsaddr->stripe_count) {
112 dprintk("%s Bad first_stripe_index %d\n",
113 __func__, fl->first_stripe_index);
114 goto out_put;
115 }
116
117 if ((fl->stripe_type == STRIPE_SPARSE &&
118 fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
119 (fl->stripe_type == STRIPE_DENSE &&
120 fl->num_fh != dsaddr->stripe_count)) {
121 dprintk("%s num_fh %u not valid for given packing\n",
122 __func__, fl->num_fh);
123 goto out_put;
124 }
125
126 if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
127 dprintk("%s Stripe unit (%u) not aligned with rsize %u "
128 "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
129 nfss->wsize);
130 }
131
132 status = 0;
133out:
134 dprintk("--> %s returns %d\n", __func__, status);
135 return status;
136out_put:
137 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid);
138 goto out;
139}
140
141static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
142{
143 int i;
144
145 for (i = 0; i < fl->num_fh; i++) {
146 if (!fl->fh_array[i])
147 break;
148 kfree(fl->fh_array[i]);
149 }
150 kfree(fl->fh_array);
151 fl->fh_array = NULL;
152}
153
154static void
155_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
156{
157 filelayout_free_fh_array(fl);
158 kfree(fl);
159}
160
161static int
162filelayout_decode_layout(struct pnfs_layout_hdr *flo,
163 struct nfs4_filelayout_segment *fl,
164 struct nfs4_layoutget_res *lgr,
165 struct nfs4_deviceid *id)
166{
167 uint32_t *p = (uint32_t *)lgr->layout.buf;
168 uint32_t nfl_util;
169 int i;
170
171 dprintk("%s: set_layout_map Begin\n", __func__);
172
173 memcpy(id, p, sizeof(*id));
174 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
175 print_deviceid(id);
176
177 nfl_util = be32_to_cpup(p++);
178 if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
179 fl->commit_through_mds = 1;
180 if (nfl_util & NFL4_UFLG_DENSE)
181 fl->stripe_type = STRIPE_DENSE;
182 else
183 fl->stripe_type = STRIPE_SPARSE;
184 fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
185
186 fl->first_stripe_index = be32_to_cpup(p++);
187 p = xdr_decode_hyper(p, &fl->pattern_offset);
188 fl->num_fh = be32_to_cpup(p++);
189
190 dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n",
191 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
192 fl->pattern_offset);
193
194 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
195 GFP_KERNEL);
196 if (!fl->fh_array)
197 return -ENOMEM;
198
199 for (i = 0; i < fl->num_fh; i++) {
200 /* Do we want to use a mempool here? */
201 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
202 if (!fl->fh_array[i]) {
203 filelayout_free_fh_array(fl);
204 return -ENOMEM;
205 }
206 fl->fh_array[i]->size = be32_to_cpup(p++);
207 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
208 printk(KERN_ERR "Too big fh %d received %d\n",
209 i, fl->fh_array[i]->size);
210 filelayout_free_fh_array(fl);
211 return -EIO;
212 }
213 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
214 p += XDR_QUADLEN(fl->fh_array[i]->size);
215 dprintk("DEBUG: %s: fh len %d\n", __func__,
216 fl->fh_array[i]->size);
217 }
218
219 return 0;
220}
221
222static struct pnfs_layout_segment *
223filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
224 struct nfs4_layoutget_res *lgr)
225{
226 struct nfs4_filelayout_segment *fl;
227 int rc;
228 struct nfs4_deviceid id;
229
230 dprintk("--> %s\n", __func__);
231 fl = kzalloc(sizeof(*fl), GFP_KERNEL);
232 if (!fl)
233 return NULL;
234
235 rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
236 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
237 _filelayout_free_lseg(fl);
238 return NULL;
239 }
240 return &fl->generic_hdr;
241}
242
243static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{
246 struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248
249 dprintk("--> %s\n", __func__);
250 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache,
251 &fl->dsaddr->deviceid);
252 _filelayout_free_lseg(fl);
253}
254
255static struct pnfs_layoutdriver_type filelayout_type = {
256 .id = LAYOUT_NFSV4_1_FILES,
257 .name = "LAYOUT_NFSV4_1_FILES",
258 .owner = THIS_MODULE,
259 .set_layoutdriver = filelayout_set_layoutdriver,
260 .clear_layoutdriver = filelayout_clear_layoutdriver,
261 .alloc_lseg = filelayout_alloc_lseg,
262 .free_lseg = filelayout_free_lseg,
263};
264
265static int __init nfs4filelayout_init(void)
266{
267 printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
268 __func__);
269 return pnfs_register_layoutdriver(&filelayout_type);
270}
271
272static void __exit nfs4filelayout_exit(void)
273{
274 printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
275 __func__);
276 pnfs_unregister_layoutdriver(&filelayout_type);
277}
278
279module_init(nfs4filelayout_init);
280module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
new file mode 100644
index 000000000000..bbf60dd2ab9d
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.h
@@ -0,0 +1,94 @@
1/*
2 * NFSv4 file layout driver data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_NFS4FILELAYOUT_H
31#define FS_NFS_NFS4FILELAYOUT_H
32
33#include "pnfs.h"
34
35/*
36 * Field testing shows we need to support upto 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256
39 * RFC 5661 multipath_list4 structures.
40 */
41#define NFS4_PNFS_MAX_STRIPE_CNT 4096
42#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
43
44enum stripetype4 {
45 STRIPE_SPARSE = 1,
46 STRIPE_DENSE = 2
47};
48
49/* Individual ip address */
50struct nfs4_pnfs_ds {
51 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
52 u32 ds_ip_addr;
53 u32 ds_port;
54 struct nfs_client *ds_clp;
55 atomic_t ds_count;
56};
57
58struct nfs4_file_layout_dsaddr {
59 struct pnfs_deviceid_node deviceid;
60 u32 stripe_count;
61 u8 *stripe_indices;
62 u32 ds_num;
63 struct nfs4_pnfs_ds *ds_list[1];
64};
65
66struct nfs4_filelayout_segment {
67 struct pnfs_layout_segment generic_hdr;
68 u32 stripe_type;
69 u32 commit_through_mds;
70 u32 stripe_unit;
71 u32 first_stripe_index;
72 u64 pattern_offset;
73 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
74 unsigned int num_fh;
75 struct nfs_fh **fh_array;
76};
77
78static inline struct nfs4_filelayout_segment *
79FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
80{
81 return container_of(lseg,
82 struct nfs4_filelayout_segment,
83 generic_hdr);
84}
85
86extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *);
87extern void print_ds(struct nfs4_pnfs_ds *ds);
88extern void print_deviceid(struct nfs4_deviceid *dev_id);
89extern struct nfs4_file_layout_dsaddr *
90nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id);
91struct nfs4_file_layout_dsaddr *
92get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
93
94#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
new file mode 100644
index 000000000000..51fe64ace55a
--- /dev/null
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -0,0 +1,448 @@
1/*
2 * Device operations for the pnfs nfs4 file layout driver.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 * Garth Goodson <Garth.Goodson@netapp.com>
10 *
11 * Permission is granted to use, copy, create derivative works, and
12 * redistribute this software and such derivative works for any purpose,
13 * so long as the name of the University of Michigan is not used in
14 * any advertising or publicity pertaining to the use or distribution
15 * of this software without specific, written prior authorization. If
16 * the above copyright notice or any other identification of the
17 * University of Michigan is included in any copy of any portion of
18 * this software, then the disclaimer below must also be included.
19 *
20 * This software is provided as is, without representation or warranty
21 * of any kind either express or implied, including without limitation
22 * the implied warranties of merchantability, fitness for a particular
23 * purpose, or noninfringement. The Regents of the University of
24 * Michigan shall not be liable for any damages, including special,
25 * indirect, incidental, or consequential damages, with respect to any
26 * claim arising out of or in connection with the use of the software,
27 * even if it has been or is hereafter advised of the possibility of
28 * such damages.
29 */
30
31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39/*
40 * Data server cache
41 *
42 * Data servers can be mapped to different device ids.
43 * nfs4_pnfs_ds reference counting
44 * - set to 1 on allocation
45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache.
47 */
48DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49static LIST_HEAD(nfs4_data_server_cache);
50
51/* Debug routines */
52void
53print_ds(struct nfs4_pnfs_ds *ds)
54{
55 if (ds == NULL) {
56 printk("%s NULL device\n", __func__);
57 return;
58 }
59 printk(" ip_addr %x port %hu\n"
60 " ref count %d\n"
61 " client %p\n"
62 " cl_exchange_flags %x\n",
63 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
64 atomic_read(&ds->ds_count), ds->ds_clp,
65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66}
67
68void
69print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
70{
71 int i;
72
73 ifdebug(FACILITY) {
74 printk("%s dsaddr->ds_num %d\n", __func__,
75 dsaddr->ds_num);
76 for (i = 0; i < dsaddr->ds_num; i++)
77 print_ds(dsaddr->ds_list[i]);
78 }
79}
80
81void print_deviceid(struct nfs4_deviceid *id)
82{
83 u32 *p = (u32 *)id;
84
85 dprintk("%s: device id= [%x%x%x%x]\n", __func__,
86 p[0], p[1], p[2], p[3]);
87}
88
89/* nfs4_ds_cache_lock is held */
90static struct nfs4_pnfs_ds *
91_data_server_lookup_locked(u32 ip_addr, u32 port)
92{
93 struct nfs4_pnfs_ds *ds;
94
95 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
96 ntohl(ip_addr), ntohs(port));
97
98 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
99 if (ds->ds_ip_addr == ip_addr &&
100 ds->ds_port == port) {
101 return ds;
102 }
103 }
104 return NULL;
105}
106
107static void
108destroy_ds(struct nfs4_pnfs_ds *ds)
109{
110 dprintk("--> %s\n", __func__);
111 ifdebug(FACILITY)
112 print_ds(ds);
113
114 if (ds->ds_clp)
115 nfs_put_client(ds->ds_clp);
116 kfree(ds);
117}
118
119static void
120nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
121{
122 struct nfs4_pnfs_ds *ds;
123 int i;
124
125 print_deviceid(&dsaddr->deviceid.de_id);
126
127 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i];
129 if (ds != NULL) {
130 if (atomic_dec_and_lock(&ds->ds_count,
131 &nfs4_ds_cache_lock)) {
132 list_del_init(&ds->ds_node);
133 spin_unlock(&nfs4_ds_cache_lock);
134 destroy_ds(ds);
135 }
136 }
137 }
138 kfree(dsaddr->stripe_indices);
139 kfree(dsaddr);
140}
141
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{
154 struct nfs4_pnfs_ds *tmp_ds, *ds;
155
156 ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
157 if (!ds)
158 goto out;
159
160 spin_lock(&nfs4_ds_cache_lock);
161 tmp_ds = _data_server_lookup_locked(ip_addr, port);
162 if (tmp_ds == NULL) {
163 ds->ds_ip_addr = ip_addr;
164 ds->ds_port = port;
165 atomic_set(&ds->ds_count, 1);
166 INIT_LIST_HEAD(&ds->ds_node);
167 ds->ds_clp = NULL;
168 list_add(&ds->ds_node, &nfs4_data_server_cache);
169 dprintk("%s add new data server ip 0x%x\n", __func__,
170 ds->ds_ip_addr);
171 } else {
172 kfree(ds);
173 atomic_inc(&tmp_ds->ds_count);
174 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
175 __func__, tmp_ds->ds_ip_addr,
176 atomic_read(&tmp_ds->ds_count));
177 ds = tmp_ds;
178 }
179 spin_unlock(&nfs4_ds_cache_lock);
180out:
181 return ds;
182}
183
184/*
185 * Currently only support ipv4, and one multi-path address.
186 */
187static struct nfs4_pnfs_ds *
188decode_and_add_ds(__be32 **pp, struct inode *inode)
189{
190 struct nfs4_pnfs_ds *ds = NULL;
191 char *buf;
192 const char *ipend, *pstr;
193 u32 ip_addr, port;
194 int nlen, rlen, i;
195 int tmp[2];
196 __be32 *r_netid, *r_addr, *p = *pp;
197
198 /* r_netid */
199 nlen = be32_to_cpup(p++);
200 r_netid = p;
201 p += XDR_QUADLEN(nlen);
202
203 /* r_addr */
204 rlen = be32_to_cpup(p++);
205 r_addr = p;
206 p += XDR_QUADLEN(rlen);
207 *pp = p;
208
209 /* Check that netid is "tcp" */
210 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) {
211 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
212 goto out_err;
213 }
214
215 /* ipv6 length plus port is legal */
216 if (rlen > INET6_ADDRSTRLEN + 8) {
217 dprintk("%s Invalid address, length %d\n", __func__,
218 rlen);
219 goto out_err;
220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL);
222 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen);
224
225 /* replace the port dots with dashes for the in4_pton() delimiter*/
226 for (i = 0; i < 2; i++) {
227 char *res = strrchr(buf, '.');
228 *res = '-';
229 }
230
231 /* Currently only support ipv4 address */
232 if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
233 dprintk("%s: Only ipv4 addresses supported\n", __func__);
234 goto out_free;
235 }
236
237 /* port */
238 pstr = ipend;
239 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
240 port = htons((tmp[0] << 8) | (tmp[1]));
241
242 ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
243 dprintk("%s Decoded address and port %s\n", __func__, buf);
244out_free:
245 kfree(buf);
246out_err:
247 return ds;
248}
249
250/* Decode opaque device data and return the result */
251static struct nfs4_file_layout_dsaddr*
252decode_device(struct inode *ino, struct pnfs_device *pdev)
253{
254 int i, dummy;
255 u32 cnt, num;
256 u8 *indexp;
257 __be32 *p = (__be32 *)pdev->area, *indicesp;
258 struct nfs4_file_layout_dsaddr *dsaddr;
259
260 /* Get the stripe count (number of stripe index) */
261 cnt = be32_to_cpup(p++);
262 dprintk("%s stripe count %d\n", __func__, cnt);
263 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
264 printk(KERN_WARNING "%s: stripe count %d greater than "
265 "supported maximum %d\n", __func__,
266 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
267 goto out_err;
268 }
269
270 /* Check the multipath list count */
271 indicesp = p;
272 p += XDR_QUADLEN(cnt << 2);
273 num = be32_to_cpup(p++);
274 dprintk("%s ds_num %u\n", __func__, num);
275 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
276 printk(KERN_WARNING "%s: multipath count %d greater than "
277 "supported maximum %d\n", __func__,
278 num, NFS4_PNFS_MAX_MULTI_CNT);
279 goto out_err;
280 }
281 dsaddr = kzalloc(sizeof(*dsaddr) +
282 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
283 GFP_KERNEL);
284 if (!dsaddr)
285 goto out_err;
286
287 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
288 if (!dsaddr->stripe_indices)
289 goto out_err_free;
290
291 dsaddr->stripe_count = cnt;
292 dsaddr->ds_num = num;
293
294 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
295
296 /* Go back an read stripe indices */
297 p = indicesp;
298 indexp = &dsaddr->stripe_indices[0];
299 for (i = 0; i < dsaddr->stripe_count; i++) {
300 *indexp = be32_to_cpup(p++);
301 if (*indexp >= num)
302 goto out_err_free;
303 indexp++;
304 }
305 /* Skip already read multipath list count */
306 p++;
307
308 for (i = 0; i < dsaddr->ds_num; i++) {
309 int j;
310
311 dummy = be32_to_cpup(p++); /* multipath count */
312 if (dummy > 1) {
313 printk(KERN_WARNING
314 "%s: Multipath count %d not supported, "
315 "skipping all greater than 1\n", __func__,
316 dummy);
317 }
318 for (j = 0; j < dummy; j++) {
319 if (j == 0) {
320 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
321 if (dsaddr->ds_list[i] == NULL)
322 goto out_err_free;
323 } else {
324 u32 len;
325 /* skip extra multipath */
326 len = be32_to_cpup(p++);
327 p += XDR_QUADLEN(len);
328 len = be32_to_cpup(p++);
329 p += XDR_QUADLEN(len);
330 continue;
331 }
332 }
333 }
334 return dsaddr;
335
336out_err_free:
337 nfs4_fl_free_deviceid(dsaddr);
338out_err:
339 dprintk("%s ERROR: returning NULL\n", __func__);
340 return NULL;
341}
342
343/*
344 * Decode the opaque device specified in 'dev'
345 * and add it to the list of available devices.
346 * If the deviceid is already cached, nfs4_add_deviceid will return
347 * a pointer to the cached struct and throw away the new.
348 */
349static struct nfs4_file_layout_dsaddr*
350decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
351{
352 struct nfs4_file_layout_dsaddr *dsaddr;
353 struct pnfs_deviceid_node *d;
354
355 dsaddr = decode_device(inode, dev);
356 if (!dsaddr) {
357 printk(KERN_WARNING "%s: Could not decode or add device\n",
358 __func__);
359 return NULL;
360 }
361
362 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
363 &dsaddr->deviceid);
364
365 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
366}
367
368/*
369 * Retrieve the information for dev_id, add it to the list
370 * of available devices, and return it.
371 */
372struct nfs4_file_layout_dsaddr *
373get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
374{
375 struct pnfs_device *pdev = NULL;
376 u32 max_resp_sz;
377 int max_pages;
378 struct page **pages = NULL;
379 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
380 int rc, i;
381 struct nfs_server *server = NFS_SERVER(inode);
382
383 /*
384 * Use the session max response size as the basis for setting
385 * GETDEVICEINFO's maxcount
386 */
387 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
388 max_pages = max_resp_sz >> PAGE_SHIFT;
389 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
390 __func__, inode, max_resp_sz, max_pages);
391
392 pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
393 if (pdev == NULL)
394 return NULL;
395
396 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
397 if (pages == NULL) {
398 kfree(pdev);
399 return NULL;
400 }
401 for (i = 0; i < max_pages; i++) {
402 pages[i] = alloc_page(GFP_KERNEL);
403 if (!pages[i])
404 goto out_free;
405 }
406
407 /* set pdev->area */
408 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
409 if (!pdev->area)
410 goto out_free;
411
412 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
413 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
414 pdev->pages = pages;
415 pdev->pgbase = 0;
416 pdev->pglen = PAGE_SIZE * max_pages;
417 pdev->mincount = 0;
418
419 rc = nfs4_proc_getdeviceinfo(server, pdev);
420 dprintk("%s getdevice info returns %d\n", __func__, rc);
421 if (rc)
422 goto out_free;
423
424 /*
425 * Found new device, need to decode it and then add it to the
426 * list of known devices for this mountpoint.
427 */
428 dsaddr = decode_and_add_device(inode, pdev);
429out_free:
430 if (pdev->area != NULL)
431 vunmap(pdev->area);
432 for (i = 0; i < max_pages; i++)
433 __free_page(pages[i]);
434 kfree(pages);
435 kfree(pdev);
436 dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
437 return dsaddr;
438}
439
440struct nfs4_file_layout_dsaddr *
441nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
442{
443 struct pnfs_deviceid_node *d;
444
445 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
446 return (d == NULL) ? NULL :
447 container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
448}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 089da5b5d20a..32c8758c99fd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,7 @@
55#include "internal.h" 55#include "internal.h"
56#include "iostat.h" 56#include "iostat.h"
57#include "callback.h" 57#include "callback.h"
58#include "pnfs.h"
58 59
59#define NFSDBG_FACILITY NFSDBG_PROC 60#define NFSDBG_FACILITY NFSDBG_PROC
60 61
@@ -129,7 +130,8 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
129 | FATTR4_WORD0_MAXREAD 130 | FATTR4_WORD0_MAXREAD
130 | FATTR4_WORD0_MAXWRITE 131 | FATTR4_WORD0_MAXWRITE
131 | FATTR4_WORD0_LEASE_TIME, 132 | FATTR4_WORD0_LEASE_TIME,
132 0 133 FATTR4_WORD1_TIME_DELTA
134 | FATTR4_WORD1_FS_LAYOUT_TYPES
133}; 135};
134 136
135const u32 nfs4_fs_locations_bitmap[2] = { 137const u32 nfs4_fs_locations_bitmap[2] = {
@@ -255,9 +257,6 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
255 nfs4_state_mark_reclaim_nograce(clp, state); 257 nfs4_state_mark_reclaim_nograce(clp, state);
256 goto do_state_recovery; 258 goto do_state_recovery;
257 case -NFS4ERR_STALE_STATEID: 259 case -NFS4ERR_STALE_STATEID:
258 if (state == NULL)
259 break;
260 nfs4_state_mark_reclaim_reboot(clp, state);
261 case -NFS4ERR_STALE_CLIENTID: 260 case -NFS4ERR_STALE_CLIENTID:
262 case -NFS4ERR_EXPIRED: 261 case -NFS4ERR_EXPIRED:
263 goto do_state_recovery; 262 goto do_state_recovery;
@@ -334,10 +333,12 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
334 * Must be called while holding tbl->slot_tbl_lock 333 * Must be called while holding tbl->slot_tbl_lock
335 */ 334 */
336static void 335static void
337nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) 336nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
338{ 337{
338 int free_slotid = free_slot - tbl->slots;
339 int slotid = free_slotid; 339 int slotid = free_slotid;
340 340
341 BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
341 /* clear used bit in bitmap */ 342 /* clear used bit in bitmap */
342 __clear_bit(slotid, tbl->used_slots); 343 __clear_bit(slotid, tbl->used_slots);
343 344
@@ -379,7 +380,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
379 struct nfs4_slot_table *tbl; 380 struct nfs4_slot_table *tbl;
380 381
381 tbl = &res->sr_session->fc_slot_table; 382 tbl = &res->sr_session->fc_slot_table;
382 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { 383 if (!res->sr_slot) {
383 /* just wake up the next guy waiting since 384 /* just wake up the next guy waiting since
384 * we may have not consumed a slot after all */ 385 * we may have not consumed a slot after all */
385 dprintk("%s: No slot\n", __func__); 386 dprintk("%s: No slot\n", __func__);
@@ -387,17 +388,15 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
387 } 388 }
388 389
389 spin_lock(&tbl->slot_tbl_lock); 390 spin_lock(&tbl->slot_tbl_lock);
390 nfs4_free_slot(tbl, res->sr_slotid); 391 nfs4_free_slot(tbl, res->sr_slot);
391 nfs41_check_drain_session_complete(res->sr_session); 392 nfs41_check_drain_session_complete(res->sr_session);
392 spin_unlock(&tbl->slot_tbl_lock); 393 spin_unlock(&tbl->slot_tbl_lock);
393 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 394 res->sr_slot = NULL;
394} 395}
395 396
396static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) 397static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
397{ 398{
398 unsigned long timestamp; 399 unsigned long timestamp;
399 struct nfs4_slot_table *tbl;
400 struct nfs4_slot *slot;
401 struct nfs_client *clp; 400 struct nfs_client *clp;
402 401
403 /* 402 /*
@@ -410,17 +409,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
410 res->sr_status = NFS_OK; 409 res->sr_status = NFS_OK;
411 410
412 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ 411 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
413 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) 412 if (!res->sr_slot)
414 goto out; 413 goto out;
415 414
416 tbl = &res->sr_session->fc_slot_table;
417 slot = tbl->slots + res->sr_slotid;
418
419 /* Check the SEQUENCE operation status */ 415 /* Check the SEQUENCE operation status */
420 switch (res->sr_status) { 416 switch (res->sr_status) {
421 case 0: 417 case 0:
422 /* Update the slot's sequence and clientid lease timer */ 418 /* Update the slot's sequence and clientid lease timer */
423 ++slot->seq_nr; 419 ++res->sr_slot->seq_nr;
424 timestamp = res->sr_renewal_time; 420 timestamp = res->sr_renewal_time;
425 clp = res->sr_session->clp; 421 clp = res->sr_session->clp;
426 do_renew_lease(clp, timestamp); 422 do_renew_lease(clp, timestamp);
@@ -433,12 +429,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
433 * returned NFS4ERR_DELAY as per Section 2.10.6.2 429 * returned NFS4ERR_DELAY as per Section 2.10.6.2
434 * of RFC5661. 430 * of RFC5661.
435 */ 431 */
436 dprintk("%s: slot=%d seq=%d: Operation in progress\n", 432 dprintk("%s: slot=%ld seq=%d: Operation in progress\n",
437 __func__, res->sr_slotid, slot->seq_nr); 433 __func__,
434 res->sr_slot - res->sr_session->fc_slot_table.slots,
435 res->sr_slot->seq_nr);
438 goto out_retry; 436 goto out_retry;
439 default: 437 default:
440 /* Just update the slot sequence no. */ 438 /* Just update the slot sequence no. */
441 ++slot->seq_nr; 439 ++res->sr_slot->seq_nr;
442 } 440 }
443out: 441out:
444 /* The session may be reset by one of the error handlers. */ 442 /* The session may be reset by one of the error handlers. */
@@ -505,10 +503,9 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
505 503
506 dprintk("--> %s\n", __func__); 504 dprintk("--> %s\n", __func__);
507 /* slot already allocated? */ 505 /* slot already allocated? */
508 if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) 506 if (res->sr_slot != NULL)
509 return 0; 507 return 0;
510 508
511 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
512 tbl = &session->fc_slot_table; 509 tbl = &session->fc_slot_table;
513 510
514 spin_lock(&tbl->slot_tbl_lock); 511 spin_lock(&tbl->slot_tbl_lock);
@@ -550,7 +547,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
550 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); 547 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
551 548
552 res->sr_session = session; 549 res->sr_session = session;
553 res->sr_slotid = slotid; 550 res->sr_slot = slot;
554 res->sr_renewal_time = jiffies; 551 res->sr_renewal_time = jiffies;
555 res->sr_status_flags = 0; 552 res->sr_status_flags = 0;
556 /* 553 /*
@@ -576,8 +573,9 @@ int nfs4_setup_sequence(const struct nfs_server *server,
576 goto out; 573 goto out;
577 } 574 }
578 575
579 dprintk("--> %s clp %p session %p sr_slotid %d\n", 576 dprintk("--> %s clp %p session %p sr_slot %ld\n",
580 __func__, session->clp, session, res->sr_slotid); 577 __func__, session->clp, session, res->sr_slot ?
578 res->sr_slot - session->fc_slot_table.slots : -1);
581 579
582 ret = nfs41_setup_sequence(session, args, res, cache_reply, 580 ret = nfs41_setup_sequence(session, args, res, cache_reply,
583 task); 581 task);
@@ -650,7 +648,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
650 .callback_data = &data 648 .callback_data = &data
651 }; 649 };
652 650
653 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 651 res->sr_slot = NULL;
654 if (privileged) 652 if (privileged)
655 task_setup.callback_ops = &nfs41_call_priv_sync_ops; 653 task_setup.callback_ops = &nfs41_call_priv_sync_ops;
656 task = rpc_run_task(&task_setup); 654 task = rpc_run_task(&task_setup);
@@ -735,7 +733,6 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
735 p->o_res.server = p->o_arg.server; 733 p->o_res.server = p->o_arg.server;
736 nfs_fattr_init(&p->f_attr); 734 nfs_fattr_init(&p->f_attr);
737 nfs_fattr_init(&p->dir_attr); 735 nfs_fattr_init(&p->dir_attr);
738 p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
739} 736}
740 737
741static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 738static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
@@ -1120,6 +1117,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1120 clear_bit(NFS_DELEGATED_STATE, &state->flags); 1117 clear_bit(NFS_DELEGATED_STATE, &state->flags);
1121 smp_rmb(); 1118 smp_rmb();
1122 if (state->n_rdwr != 0) { 1119 if (state->n_rdwr != 0) {
1120 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1123 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); 1121 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
1124 if (ret != 0) 1122 if (ret != 0)
1125 return ret; 1123 return ret;
@@ -1127,6 +1125,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1127 return -ESTALE; 1125 return -ESTALE;
1128 } 1126 }
1129 if (state->n_wronly != 0) { 1127 if (state->n_wronly != 0) {
1128 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1130 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); 1129 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
1131 if (ret != 0) 1130 if (ret != 0)
1132 return ret; 1131 return ret;
@@ -1134,6 +1133,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1134 return -ESTALE; 1133 return -ESTALE;
1135 } 1134 }
1136 if (state->n_rdonly != 0) { 1135 if (state->n_rdonly != 0) {
1136 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1137 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); 1137 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
1138 if (ret != 0) 1138 if (ret != 0)
1139 return ret; 1139 return ret;
@@ -1188,7 +1188,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
1188 int err; 1188 int err;
1189 do { 1189 do {
1190 err = _nfs4_do_open_reclaim(ctx, state); 1190 err = _nfs4_do_open_reclaim(ctx, state);
1191 if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) 1191 if (err != -NFS4ERR_DELAY)
1192 break; 1192 break;
1193 nfs4_handle_exception(server, err, &exception); 1193 nfs4_handle_exception(server, err, &exception);
1194 } while (exception.retry); 1194 } while (exception.retry);
@@ -1258,6 +1258,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1258 case -NFS4ERR_ADMIN_REVOKED: 1258 case -NFS4ERR_ADMIN_REVOKED:
1259 case -NFS4ERR_BAD_STATEID: 1259 case -NFS4ERR_BAD_STATEID:
1260 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1260 nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
1261 case -EKEYEXPIRED:
1262 /*
1263 * User RPCSEC_GSS context has expired.
1264 * We cannot recover this stateid now, so
1265 * skip it and allow recovery thread to
1266 * proceed.
1267 */
1261 case -ENOMEM: 1268 case -ENOMEM:
1262 err = 0; 1269 err = 0;
1263 goto out; 1270 goto out;
@@ -1605,7 +1612,6 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state
1605 goto out; 1612 goto out;
1606 case -NFS4ERR_GRACE: 1613 case -NFS4ERR_GRACE:
1607 case -NFS4ERR_DELAY: 1614 case -NFS4ERR_DELAY:
1608 case -EKEYEXPIRED:
1609 nfs4_handle_exception(server, err, &exception); 1615 nfs4_handle_exception(server, err, &exception);
1610 err = 0; 1616 err = 0;
1611 } 1617 }
@@ -1975,7 +1981,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
1975 calldata->res.fattr = &calldata->fattr; 1981 calldata->res.fattr = &calldata->fattr;
1976 calldata->res.seqid = calldata->arg.seqid; 1982 calldata->res.seqid = calldata->arg.seqid;
1977 calldata->res.server = server; 1983 calldata->res.server = server;
1978 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
1979 path_get(path); 1984 path_get(path);
1980 calldata->path = *path; 1985 calldata->path = *path;
1981 1986
@@ -1998,120 +2003,17 @@ out:
1998 return status; 2003 return status;
1999} 2004}
2000 2005
2001static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode) 2006static struct inode *
2007nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
2002{ 2008{
2003 struct file *filp;
2004 int ret;
2005
2006 /* If the open_intent is for execute, we have an extra check to make */
2007 if (fmode & FMODE_EXEC) {
2008 ret = nfs_may_open(state->inode,
2009 state->owner->so_cred,
2010 nd->intent.open.flags);
2011 if (ret < 0)
2012 goto out_close;
2013 }
2014 filp = lookup_instantiate_filp(nd, path->dentry, NULL);
2015 if (!IS_ERR(filp)) {
2016 struct nfs_open_context *ctx;
2017 ctx = nfs_file_open_context(filp);
2018 ctx->state = state;
2019 return 0;
2020 }
2021 ret = PTR_ERR(filp);
2022out_close:
2023 nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE));
2024 return ret;
2025}
2026
2027struct dentry *
2028nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2029{
2030 struct path path = {
2031 .mnt = nd->path.mnt,
2032 .dentry = dentry,
2033 };
2034 struct dentry *parent;
2035 struct iattr attr;
2036 struct rpc_cred *cred;
2037 struct nfs4_state *state; 2009 struct nfs4_state *state;
2038 struct dentry *res;
2039 int open_flags = nd->intent.open.flags;
2040 fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
2041
2042 if (nd->flags & LOOKUP_CREATE) {
2043 attr.ia_mode = nd->intent.open.create_mode;
2044 attr.ia_valid = ATTR_MODE;
2045 if (!IS_POSIXACL(dir))
2046 attr.ia_mode &= ~current_umask();
2047 } else {
2048 open_flags &= ~O_EXCL;
2049 attr.ia_valid = 0;
2050 BUG_ON(open_flags & O_CREAT);
2051 }
2052 2010
2053 cred = rpc_lookup_cred();
2054 if (IS_ERR(cred))
2055 return (struct dentry *)cred;
2056 parent = dentry->d_parent;
2057 /* Protect against concurrent sillydeletes */ 2011 /* Protect against concurrent sillydeletes */
2058 nfs_block_sillyrename(parent); 2012 state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred);
2059 state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); 2013 if (IS_ERR(state))
2060 put_rpccred(cred); 2014 return ERR_CAST(state);
2061 if (IS_ERR(state)) { 2015 ctx->state = state;
2062 if (PTR_ERR(state) == -ENOENT) { 2016 return igrab(state->inode);
2063 d_add(dentry, NULL);
2064 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2065 }
2066 nfs_unblock_sillyrename(parent);
2067 return (struct dentry *)state;
2068 }
2069 res = d_add_unique(dentry, igrab(state->inode));
2070 if (res != NULL)
2071 path.dentry = res;
2072 nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
2073 nfs_unblock_sillyrename(parent);
2074 nfs4_intent_set_file(nd, &path, state, fmode);
2075 return res;
2076}
2077
2078int
2079nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
2080{
2081 struct path path = {
2082 .mnt = nd->path.mnt,
2083 .dentry = dentry,
2084 };
2085 struct rpc_cred *cred;
2086 struct nfs4_state *state;
2087 fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE);
2088
2089 cred = rpc_lookup_cred();
2090 if (IS_ERR(cred))
2091 return PTR_ERR(cred);
2092 state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred);
2093 put_rpccred(cred);
2094 if (IS_ERR(state)) {
2095 switch (PTR_ERR(state)) {
2096 case -EPERM:
2097 case -EACCES:
2098 case -EDQUOT:
2099 case -ENOSPC:
2100 case -EROFS:
2101 return PTR_ERR(state);
2102 default:
2103 goto out_drop;
2104 }
2105 }
2106 if (state->inode == dentry->d_inode) {
2107 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2108 nfs4_intent_set_file(nd, &path, state, fmode);
2109 return 1;
2110 }
2111 nfs4_close_sync(&path, state, fmode);
2112out_drop:
2113 d_drop(dentry);
2114 return 0;
2115} 2017}
2116 2018
2117static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) 2019static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
@@ -2568,36 +2470,34 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
2568 2470
2569static int 2471static int
2570nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 2472nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2571 int flags, struct nameidata *nd) 2473 int flags, struct nfs_open_context *ctx)
2572{ 2474{
2573 struct path path = { 2475 struct path my_path = {
2574 .mnt = nd->path.mnt,
2575 .dentry = dentry, 2476 .dentry = dentry,
2576 }; 2477 };
2478 struct path *path = &my_path;
2577 struct nfs4_state *state; 2479 struct nfs4_state *state;
2578 struct rpc_cred *cred; 2480 struct rpc_cred *cred = NULL;
2579 fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE); 2481 fmode_t fmode = 0;
2580 int status = 0; 2482 int status = 0;
2581 2483
2582 cred = rpc_lookup_cred(); 2484 if (ctx != NULL) {
2583 if (IS_ERR(cred)) { 2485 cred = ctx->cred;
2584 status = PTR_ERR(cred); 2486 path = &ctx->path;
2585 goto out; 2487 fmode = ctx->mode;
2586 } 2488 }
2587 state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred); 2489 state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
2588 d_drop(dentry); 2490 d_drop(dentry);
2589 if (IS_ERR(state)) { 2491 if (IS_ERR(state)) {
2590 status = PTR_ERR(state); 2492 status = PTR_ERR(state);
2591 goto out_putcred; 2493 goto out;
2592 } 2494 }
2593 d_add(dentry, igrab(state->inode)); 2495 d_add(dentry, igrab(state->inode));
2594 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 2496 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2595 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) 2497 if (ctx != NULL)
2596 status = nfs4_intent_set_file(nd, &path, state, fmode); 2498 ctx->state = state;
2597 else 2499 else
2598 nfs4_close_sync(&path, state, fmode); 2500 nfs4_close_sync(path, state, fmode);
2599out_putcred:
2600 put_rpccred(cred);
2601out: 2501out:
2602 return status; 2502 return status;
2603} 2503}
@@ -2655,6 +2555,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2655 2555
2656 args->bitmask = server->cache_consistency_bitmask; 2556 args->bitmask = server->cache_consistency_bitmask;
2657 res->server = server; 2557 res->server = server;
2558 res->seq_res.sr_slot = NULL;
2658 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2559 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2659} 2560}
2660 2561
@@ -2671,18 +2572,46 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2671 return 1; 2572 return 1;
2672} 2573}
2673 2574
2575static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
2576{
2577 struct nfs_server *server = NFS_SERVER(dir);
2578 struct nfs_renameargs *arg = msg->rpc_argp;
2579 struct nfs_renameres *res = msg->rpc_resp;
2580
2581 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
2582 arg->bitmask = server->attr_bitmask;
2583 res->server = server;
2584}
2585
2586static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
2587 struct inode *new_dir)
2588{
2589 struct nfs_renameres *res = task->tk_msg.rpc_resp;
2590
2591 if (!nfs4_sequence_done(task, &res->seq_res))
2592 return 0;
2593 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2594 return 0;
2595
2596 update_changeattr(old_dir, &res->old_cinfo);
2597 nfs_post_op_update_inode(old_dir, res->old_fattr);
2598 update_changeattr(new_dir, &res->new_cinfo);
2599 nfs_post_op_update_inode(new_dir, res->new_fattr);
2600 return 1;
2601}
2602
2674static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, 2603static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2675 struct inode *new_dir, struct qstr *new_name) 2604 struct inode *new_dir, struct qstr *new_name)
2676{ 2605{
2677 struct nfs_server *server = NFS_SERVER(old_dir); 2606 struct nfs_server *server = NFS_SERVER(old_dir);
2678 struct nfs4_rename_arg arg = { 2607 struct nfs_renameargs arg = {
2679 .old_dir = NFS_FH(old_dir), 2608 .old_dir = NFS_FH(old_dir),
2680 .new_dir = NFS_FH(new_dir), 2609 .new_dir = NFS_FH(new_dir),
2681 .old_name = old_name, 2610 .old_name = old_name,
2682 .new_name = new_name, 2611 .new_name = new_name,
2683 .bitmask = server->attr_bitmask, 2612 .bitmask = server->attr_bitmask,
2684 }; 2613 };
2685 struct nfs4_rename_res res = { 2614 struct nfs_renameres res = {
2686 .server = server, 2615 .server = server,
2687 }; 2616 };
2688 struct rpc_message msg = { 2617 struct rpc_message msg = {
@@ -2896,15 +2825,16 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
2896} 2825}
2897 2826
2898static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 2827static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2899 u64 cookie, struct page *page, unsigned int count, int plus) 2828 u64 cookie, struct page **pages, unsigned int count, int plus)
2900{ 2829{
2901 struct inode *dir = dentry->d_inode; 2830 struct inode *dir = dentry->d_inode;
2902 struct nfs4_readdir_arg args = { 2831 struct nfs4_readdir_arg args = {
2903 .fh = NFS_FH(dir), 2832 .fh = NFS_FH(dir),
2904 .pages = &page, 2833 .pages = pages,
2905 .pgbase = 0, 2834 .pgbase = 0,
2906 .count = count, 2835 .count = count,
2907 .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, 2836 .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
2837 .plus = plus,
2908 }; 2838 };
2909 struct nfs4_readdir_res res; 2839 struct nfs4_readdir_res res;
2910 struct rpc_message msg = { 2840 struct rpc_message msg = {
@@ -2932,14 +2862,14 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2932} 2862}
2933 2863
2934static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 2864static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2935 u64 cookie, struct page *page, unsigned int count, int plus) 2865 u64 cookie, struct page **pages, unsigned int count, int plus)
2936{ 2866{
2937 struct nfs4_exception exception = { }; 2867 struct nfs4_exception exception = { };
2938 int err; 2868 int err;
2939 do { 2869 do {
2940 err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), 2870 err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
2941 _nfs4_proc_readdir(dentry, cred, cookie, 2871 _nfs4_proc_readdir(dentry, cred, cookie,
2942 page, count, plus), 2872 pages, count, plus),
2943 &exception); 2873 &exception);
2944 } while (exception.retry); 2874 } while (exception.retry);
2945 return err; 2875 return err;
@@ -3490,9 +3420,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3490 nfs4_state_mark_reclaim_nograce(clp, state); 3420 nfs4_state_mark_reclaim_nograce(clp, state);
3491 goto do_state_recovery; 3421 goto do_state_recovery;
3492 case -NFS4ERR_STALE_STATEID: 3422 case -NFS4ERR_STALE_STATEID:
3493 if (state == NULL)
3494 break;
3495 nfs4_state_mark_reclaim_reboot(clp, state);
3496 case -NFS4ERR_STALE_CLIENTID: 3423 case -NFS4ERR_STALE_CLIENTID:
3497 case -NFS4ERR_EXPIRED: 3424 case -NFS4ERR_EXPIRED:
3498 goto do_state_recovery; 3425 goto do_state_recovery;
@@ -3626,7 +3553,6 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3626 case -NFS4ERR_RESOURCE: 3553 case -NFS4ERR_RESOURCE:
3627 /* The IBM lawyers misread another document! */ 3554 /* The IBM lawyers misread another document! */
3628 case -NFS4ERR_DELAY: 3555 case -NFS4ERR_DELAY:
3629 case -EKEYEXPIRED:
3630 err = nfs4_delay(clp->cl_rpcclient, &timeout); 3556 err = nfs4_delay(clp->cl_rpcclient, &timeout);
3631 } 3557 }
3632 } while (err == 0); 3558 } while (err == 0);
@@ -3721,7 +3647,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3721 memcpy(&data->stateid, stateid, sizeof(data->stateid)); 3647 memcpy(&data->stateid, stateid, sizeof(data->stateid));
3722 data->res.fattr = &data->fattr; 3648 data->res.fattr = &data->fattr;
3723 data->res.server = server; 3649 data->res.server = server;
3724 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3725 nfs_fattr_init(data->res.fattr); 3650 nfs_fattr_init(data->res.fattr);
3726 data->timestamp = jiffies; 3651 data->timestamp = jiffies;
3727 data->rpc_status = 0; 3652 data->rpc_status = 0;
@@ -3874,7 +3799,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
3874 p->arg.fl = &p->fl; 3799 p->arg.fl = &p->fl;
3875 p->arg.seqid = seqid; 3800 p->arg.seqid = seqid;
3876 p->res.seqid = seqid; 3801 p->res.seqid = seqid;
3877 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3878 p->arg.stateid = &lsp->ls_stateid; 3802 p->arg.stateid = &lsp->ls_stateid;
3879 p->lsp = lsp; 3803 p->lsp = lsp;
3880 atomic_inc(&lsp->ls_count); 3804 atomic_inc(&lsp->ls_count);
@@ -4054,7 +3978,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
4054 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 3978 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
4055 p->arg.lock_owner.id = lsp->ls_id.id; 3979 p->arg.lock_owner.id = lsp->ls_id.id;
4056 p->res.lock_seqid = p->arg.lock_seqid; 3980 p->res.lock_seqid = p->arg.lock_seqid;
4057 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
4058 p->lsp = lsp; 3981 p->lsp = lsp;
4059 p->server = server; 3982 p->server = server;
4060 atomic_inc(&lsp->ls_count); 3983 atomic_inc(&lsp->ls_count);
@@ -4241,7 +4164,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4241 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4164 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4242 return 0; 4165 return 0;
4243 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); 4166 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
4244 if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) 4167 if (err != -NFS4ERR_DELAY)
4245 break; 4168 break;
4246 nfs4_handle_exception(server, err, &exception); 4169 nfs4_handle_exception(server, err, &exception);
4247 } while (exception.retry); 4170 } while (exception.retry);
@@ -4266,7 +4189,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
4266 goto out; 4189 goto out;
4267 case -NFS4ERR_GRACE: 4190 case -NFS4ERR_GRACE:
4268 case -NFS4ERR_DELAY: 4191 case -NFS4ERR_DELAY:
4269 case -EKEYEXPIRED:
4270 nfs4_handle_exception(server, err, &exception); 4192 nfs4_handle_exception(server, err, &exception);
4271 err = 0; 4193 err = 0;
4272 } 4194 }
@@ -4412,13 +4334,21 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4412 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4334 nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
4413 err = 0; 4335 err = 0;
4414 goto out; 4336 goto out;
4337 case -EKEYEXPIRED:
4338 /*
4339 * User RPCSEC_GSS context has expired.
4340 * We cannot recover this stateid now, so
4341 * skip it and allow recovery thread to
4342 * proceed.
4343 */
4344 err = 0;
4345 goto out;
4415 case -ENOMEM: 4346 case -ENOMEM:
4416 case -NFS4ERR_DENIED: 4347 case -NFS4ERR_DENIED:
4417 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 4348 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
4418 err = 0; 4349 err = 0;
4419 goto out; 4350 goto out;
4420 case -NFS4ERR_DELAY: 4351 case -NFS4ERR_DELAY:
4421 case -EKEYEXPIRED:
4422 break; 4352 break;
4423 } 4353 }
4424 err = nfs4_handle_exception(server, err, &exception); 4354 err = nfs4_handle_exception(server, err, &exception);
@@ -4647,7 +4577,6 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4647 switch (task->tk_status) { 4577 switch (task->tk_status) {
4648 case -NFS4ERR_DELAY: 4578 case -NFS4ERR_DELAY:
4649 case -NFS4ERR_GRACE: 4579 case -NFS4ERR_GRACE:
4650 case -EKEYEXPIRED:
4651 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); 4580 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
4652 rpc_delay(task, NFS4_POLL_RETRY_MIN); 4581 rpc_delay(task, NFS4_POLL_RETRY_MIN);
4653 task->tk_status = 0; 4582 task->tk_status = 0;
@@ -4687,7 +4616,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4687 }; 4616 };
4688 int status; 4617 int status;
4689 4618
4690 res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
4691 dprintk("--> %s\n", __func__); 4619 dprintk("--> %s\n", __func__);
4692 task = rpc_run_task(&task_setup); 4620 task = rpc_run_task(&task_setup);
4693 4621
@@ -4914,49 +4842,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
4914 args->bc_attrs.max_reqs); 4842 args->bc_attrs.max_reqs);
4915} 4843}
4916 4844
4917static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) 4845static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4918{ 4846{
4919 if (rcvd <= sent) 4847 struct nfs4_channel_attrs *sent = &args->fc_attrs;
4920 return 0; 4848 struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
4921 printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " 4849
4922 "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); 4850 if (rcvd->headerpadsz > sent->headerpadsz)
4923 return -EINVAL; 4851 return -EINVAL;
4852 if (rcvd->max_resp_sz > sent->max_resp_sz)
4853 return -EINVAL;
4854 /*
4855 * Our requested max_ops is the minimum we need; we're not
4856 * prepared to break up compounds into smaller pieces than that.
4857 * So, no point even trying to continue if the server won't
4858 * cooperate:
4859 */
4860 if (rcvd->max_ops < sent->max_ops)
4861 return -EINVAL;
4862 if (rcvd->max_reqs == 0)
4863 return -EINVAL;
4864 return 0;
4924} 4865}
4925 4866
4926#define _verify_fore_channel_attr(_name_) \ 4867static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4927 _verify_channel_attr("fore", #_name_, \ 4868{
4928 args->fc_attrs._name_, \ 4869 struct nfs4_channel_attrs *sent = &args->bc_attrs;
4929 session->fc_attrs._name_) 4870 struct nfs4_channel_attrs *rcvd = &session->bc_attrs;
4930 4871
4931#define _verify_back_channel_attr(_name_) \ 4872 if (rcvd->max_rqst_sz > sent->max_rqst_sz)
4932 _verify_channel_attr("back", #_name_, \ 4873 return -EINVAL;
4933 args->bc_attrs._name_, \ 4874 if (rcvd->max_resp_sz < sent->max_resp_sz)
4934 session->bc_attrs._name_) 4875 return -EINVAL;
4876 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
4877 return -EINVAL;
4878 /* These would render the backchannel useless: */
4879 if (rcvd->max_ops == 0)
4880 return -EINVAL;
4881 if (rcvd->max_reqs == 0)
4882 return -EINVAL;
4883 return 0;
4884}
4935 4885
4936/*
4937 * The server is not allowed to increase the fore channel header pad size,
4938 * maximum response size, or maximum number of operations.
4939 *
4940 * The back channel attributes are only negotiatied down: We send what the
4941 * (back channel) server insists upon.
4942 */
4943static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, 4886static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
4944 struct nfs4_session *session) 4887 struct nfs4_session *session)
4945{ 4888{
4946 int ret = 0; 4889 int ret;
4947
4948 ret |= _verify_fore_channel_attr(headerpadsz);
4949 ret |= _verify_fore_channel_attr(max_resp_sz);
4950 ret |= _verify_fore_channel_attr(max_ops);
4951
4952 ret |= _verify_back_channel_attr(headerpadsz);
4953 ret |= _verify_back_channel_attr(max_rqst_sz);
4954 ret |= _verify_back_channel_attr(max_resp_sz);
4955 ret |= _verify_back_channel_attr(max_resp_sz_cached);
4956 ret |= _verify_back_channel_attr(max_ops);
4957 ret |= _verify_back_channel_attr(max_reqs);
4958 4890
4959 return ret; 4891 ret = nfs4_verify_fore_channel_attrs(args, session);
4892 if (ret)
4893 return ret;
4894 return nfs4_verify_back_channel_attrs(args, session);
4960} 4895}
4961 4896
4962static int _nfs4_proc_create_session(struct nfs_client *clp) 4897static int _nfs4_proc_create_session(struct nfs_client *clp)
@@ -5111,7 +5046,6 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5111{ 5046{
5112 switch(task->tk_status) { 5047 switch(task->tk_status) {
5113 case -NFS4ERR_DELAY: 5048 case -NFS4ERR_DELAY:
5114 case -EKEYEXPIRED:
5115 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5049 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5116 return -EAGAIN; 5050 return -EAGAIN;
5117 default: 5051 default:
@@ -5180,12 +5114,11 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
5180 5114
5181 if (!atomic_inc_not_zero(&clp->cl_count)) 5115 if (!atomic_inc_not_zero(&clp->cl_count))
5182 return ERR_PTR(-EIO); 5116 return ERR_PTR(-EIO);
5183 calldata = kmalloc(sizeof(*calldata), GFP_NOFS); 5117 calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
5184 if (calldata == NULL) { 5118 if (calldata == NULL) {
5185 nfs_put_client(clp); 5119 nfs_put_client(clp);
5186 return ERR_PTR(-ENOMEM); 5120 return ERR_PTR(-ENOMEM);
5187 } 5121 }
5188 calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
5189 msg.rpc_argp = &calldata->args; 5122 msg.rpc_argp = &calldata->args;
5190 msg.rpc_resp = &calldata->res; 5123 msg.rpc_resp = &calldata->res;
5191 calldata->clp = clp; 5124 calldata->clp = clp;
@@ -5254,7 +5187,6 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5254 case -NFS4ERR_WRONG_CRED: /* What to do here? */ 5187 case -NFS4ERR_WRONG_CRED: /* What to do here? */
5255 break; 5188 break;
5256 case -NFS4ERR_DELAY: 5189 case -NFS4ERR_DELAY:
5257 case -EKEYEXPIRED:
5258 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5190 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5259 return -EAGAIN; 5191 return -EAGAIN;
5260 default: 5192 default:
@@ -5317,7 +5249,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5317 goto out; 5249 goto out;
5318 calldata->clp = clp; 5250 calldata->clp = clp;
5319 calldata->arg.one_fs = 0; 5251 calldata->arg.one_fs = 0;
5320 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
5321 5252
5322 msg.rpc_argp = &calldata->arg; 5253 msg.rpc_argp = &calldata->arg;
5323 msg.rpc_resp = &calldata->res; 5254 msg.rpc_resp = &calldata->res;
@@ -5333,6 +5264,147 @@ out:
5333 dprintk("<-- %s status=%d\n", __func__, status); 5264 dprintk("<-- %s status=%d\n", __func__, status);
5334 return status; 5265 return status;
5335} 5266}
5267
5268static void
5269nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5270{
5271 struct nfs4_layoutget *lgp = calldata;
5272 struct inode *ino = lgp->args.inode;
5273 struct nfs_server *server = NFS_SERVER(ino);
5274
5275 dprintk("--> %s\n", __func__);
5276 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5277 &lgp->res.seq_res, 0, task))
5278 return;
5279 rpc_call_start(task);
5280}
5281
5282static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
5283{
5284 struct nfs4_layoutget *lgp = calldata;
5285 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5286
5287 dprintk("--> %s\n", __func__);
5288
5289 if (!nfs4_sequence_done(task, &lgp->res.seq_res))
5290 return;
5291
5292 switch (task->tk_status) {
5293 case 0:
5294 break;
5295 case -NFS4ERR_LAYOUTTRYLATER:
5296 case -NFS4ERR_RECALLCONFLICT:
5297 task->tk_status = -NFS4ERR_DELAY;
5298 /* Fall through */
5299 default:
5300 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5301 rpc_restart_call_prepare(task);
5302 return;
5303 }
5304 }
5305 lgp->status = task->tk_status;
5306 dprintk("<-- %s\n", __func__);
5307}
5308
5309static void nfs4_layoutget_release(void *calldata)
5310{
5311 struct nfs4_layoutget *lgp = calldata;
5312
5313 dprintk("--> %s\n", __func__);
5314 put_layout_hdr(lgp->args.inode);
5315 if (lgp->res.layout.buf != NULL)
5316 free_page((unsigned long) lgp->res.layout.buf);
5317 put_nfs_open_context(lgp->args.ctx);
5318 kfree(calldata);
5319 dprintk("<-- %s\n", __func__);
5320}
5321
5322static const struct rpc_call_ops nfs4_layoutget_call_ops = {
5323 .rpc_call_prepare = nfs4_layoutget_prepare,
5324 .rpc_call_done = nfs4_layoutget_done,
5325 .rpc_release = nfs4_layoutget_release,
5326};
5327
5328int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5329{
5330 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5331 struct rpc_task *task;
5332 struct rpc_message msg = {
5333 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
5334 .rpc_argp = &lgp->args,
5335 .rpc_resp = &lgp->res,
5336 };
5337 struct rpc_task_setup task_setup_data = {
5338 .rpc_client = server->client,
5339 .rpc_message = &msg,
5340 .callback_ops = &nfs4_layoutget_call_ops,
5341 .callback_data = lgp,
5342 .flags = RPC_TASK_ASYNC,
5343 };
5344 int status = 0;
5345
5346 dprintk("--> %s\n", __func__);
5347
5348 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
5349 if (lgp->res.layout.buf == NULL) {
5350 nfs4_layoutget_release(lgp);
5351 return -ENOMEM;
5352 }
5353
5354 lgp->res.seq_res.sr_slot = NULL;
5355 task = rpc_run_task(&task_setup_data);
5356 if (IS_ERR(task))
5357 return PTR_ERR(task);
5358 status = nfs4_wait_for_completion_rpc_task(task);
5359 if (status != 0)
5360 goto out;
5361 status = lgp->status;
5362 if (status != 0)
5363 goto out;
5364 status = pnfs_layout_process(lgp);
5365out:
5366 rpc_put_task(task);
5367 dprintk("<-- %s status=%d\n", __func__, status);
5368 return status;
5369}
5370
5371static int
5372_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5373{
5374 struct nfs4_getdeviceinfo_args args = {
5375 .pdev = pdev,
5376 };
5377 struct nfs4_getdeviceinfo_res res = {
5378 .pdev = pdev,
5379 };
5380 struct rpc_message msg = {
5381 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
5382 .rpc_argp = &args,
5383 .rpc_resp = &res,
5384 };
5385 int status;
5386
5387 dprintk("--> %s\n", __func__);
5388 status = nfs4_call_sync(server, &msg, &args, &res, 0);
5389 dprintk("<-- %s status=%d\n", __func__, status);
5390
5391 return status;
5392}
5393
5394int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5395{
5396 struct nfs4_exception exception = { };
5397 int err;
5398
5399 do {
5400 err = nfs4_handle_exception(server,
5401 _nfs4_proc_getdeviceinfo(server, pdev),
5402 &exception);
5403 } while (exception.retry);
5404 return err;
5405}
5406EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5407
5336#endif /* CONFIG_NFS_V4_1 */ 5408#endif /* CONFIG_NFS_V4_1 */
5337 5409
5338struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5410struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5443,6 +5515,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5443 .unlink_setup = nfs4_proc_unlink_setup, 5515 .unlink_setup = nfs4_proc_unlink_setup,
5444 .unlink_done = nfs4_proc_unlink_done, 5516 .unlink_done = nfs4_proc_unlink_done,
5445 .rename = nfs4_proc_rename, 5517 .rename = nfs4_proc_rename,
5518 .rename_setup = nfs4_proc_rename_setup,
5519 .rename_done = nfs4_proc_rename_done,
5446 .link = nfs4_proc_link, 5520 .link = nfs4_proc_link,
5447 .symlink = nfs4_proc_symlink, 5521 .symlink = nfs4_proc_symlink,
5448 .mkdir = nfs4_proc_mkdir, 5522 .mkdir = nfs4_proc_mkdir,
@@ -5463,6 +5537,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5463 .lock = nfs4_proc_lock, 5537 .lock = nfs4_proc_lock,
5464 .clear_acl_cache = nfs4_zap_acl_attr, 5538 .clear_acl_cache = nfs4_zap_acl_attr,
5465 .close_context = nfs4_close_context, 5539 .close_context = nfs4_close_context,
5540 .open_context = nfs4_atomic_open,
5466}; 5541};
5467 5542
5468/* 5543/*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 3e2f19b04c06..f575a3126737 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -40,12 +40,13 @@
40 40
41#include <linux/kernel.h> 41#include <linux/kernel.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/smp_lock.h> 43#include <linux/fs.h>
44#include <linux/nfs_fs.h> 44#include <linux/nfs_fs.h>
45#include <linux/nfs_idmap.h> 45#include <linux/nfs_idmap.h>
46#include <linux/kthread.h> 46#include <linux/kthread.h>
47#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/random.h> 48#include <linux/random.h>
49#include <linux/ratelimit.h>
49#include <linux/workqueue.h> 50#include <linux/workqueue.h>
50#include <linux/bitops.h> 51#include <linux/bitops.h>
51 52
@@ -53,6 +54,7 @@
53#include "callback.h" 54#include "callback.h"
54#include "delegation.h" 55#include "delegation.h"
55#include "internal.h" 56#include "internal.h"
57#include "pnfs.h"
56 58
57#define OPENOWNER_POOL_SIZE 8 59#define OPENOWNER_POOL_SIZE 8
58 60
@@ -970,13 +972,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
970 /* Guard against delegation returns and new lock/unlock calls */ 972 /* Guard against delegation returns and new lock/unlock calls */
971 down_write(&nfsi->rwsem); 973 down_write(&nfsi->rwsem);
972 /* Protect inode->i_flock using the BKL */ 974 /* Protect inode->i_flock using the BKL */
973 lock_kernel(); 975 lock_flocks();
974 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 976 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
975 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 977 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
976 continue; 978 continue;
977 if (nfs_file_open_context(fl->fl_file)->state != state) 979 if (nfs_file_open_context(fl->fl_file)->state != state)
978 continue; 980 continue;
979 unlock_kernel(); 981 unlock_flocks();
980 status = ops->recover_lock(state, fl); 982 status = ops->recover_lock(state, fl);
981 switch (status) { 983 switch (status) {
982 case 0: 984 case 0:
@@ -1003,9 +1005,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
1003 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 1005 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
1004 status = 0; 1006 status = 0;
1005 } 1007 }
1006 lock_kernel(); 1008 lock_flocks();
1007 } 1009 }
1008 unlock_kernel(); 1010 unlock_flocks();
1009out: 1011out:
1010 up_write(&nfsi->rwsem); 1012 up_write(&nfsi->rwsem);
1011 return status; 1013 return status;
@@ -1063,6 +1065,14 @@ restart:
1063 /* Mark the file as being 'closed' */ 1065 /* Mark the file as being 'closed' */
1064 state->state = 0; 1066 state->state = 0;
1065 break; 1067 break;
1068 case -EKEYEXPIRED:
1069 /*
1070 * User RPCSEC_GSS context has expired.
1071 * We cannot recover this stateid now, so
1072 * skip it and allow recovery thread to
1073 * proceed.
1074 */
1075 break;
1066 case -NFS4ERR_ADMIN_REVOKED: 1076 case -NFS4ERR_ADMIN_REVOKED:
1067 case -NFS4ERR_STALE_STATEID: 1077 case -NFS4ERR_STALE_STATEID:
1068 case -NFS4ERR_BAD_STATEID: 1078 case -NFS4ERR_BAD_STATEID:
@@ -1138,16 +1148,14 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
1138 (void)ops->reclaim_complete(clp); 1148 (void)ops->reclaim_complete(clp);
1139} 1149}
1140 1150
1141static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) 1151static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
1142{ 1152{
1143 struct nfs4_state_owner *sp; 1153 struct nfs4_state_owner *sp;
1144 struct rb_node *pos; 1154 struct rb_node *pos;
1145 struct nfs4_state *state; 1155 struct nfs4_state *state;
1146 1156
1147 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1157 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1148 return; 1158 return 0;
1149
1150 nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
1151 1159
1152 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1160 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
1153 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1161 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
@@ -1161,6 +1169,14 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1161 } 1169 }
1162 1170
1163 nfs_delegation_reap_unclaimed(clp); 1171 nfs_delegation_reap_unclaimed(clp);
1172 return 1;
1173}
1174
1175static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1176{
1177 if (!nfs4_state_clear_reclaim_reboot(clp))
1178 return;
1179 nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
1164} 1180}
1165 1181
1166static void nfs_delegation_clear_all(struct nfs_client *clp) 1182static void nfs_delegation_clear_all(struct nfs_client *clp)
@@ -1175,6 +1191,14 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
1175 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); 1191 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
1176} 1192}
1177 1193
1194static void nfs4_warn_keyexpired(const char *s)
1195{
1196 printk_ratelimited(KERN_WARNING "Error: state manager"
1197 " encountered RPCSEC_GSS session"
1198 " expired against NFSv4 server %s.\n",
1199 s);
1200}
1201
1178static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) 1202static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1179{ 1203{
1180 switch (error) { 1204 switch (error) {
@@ -1187,7 +1211,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1187 case -NFS4ERR_STALE_CLIENTID: 1211 case -NFS4ERR_STALE_CLIENTID:
1188 case -NFS4ERR_LEASE_MOVED: 1212 case -NFS4ERR_LEASE_MOVED:
1189 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1213 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1190 nfs4_state_end_reclaim_reboot(clp); 1214 nfs4_state_clear_reclaim_reboot(clp);
1191 nfs4_state_start_reclaim_reboot(clp); 1215 nfs4_state_start_reclaim_reboot(clp);
1192 break; 1216 break;
1193 case -NFS4ERR_EXPIRED: 1217 case -NFS4ERR_EXPIRED:
@@ -1204,6 +1228,10 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1204 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1228 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1205 /* Zero session reset errors */ 1229 /* Zero session reset errors */
1206 return 0; 1230 return 0;
1231 case -EKEYEXPIRED:
1232 /* Nothing we can do */
1233 nfs4_warn_keyexpired(clp->cl_hostname);
1234 return 0;
1207 } 1235 }
1208 return error; 1236 return error;
1209} 1237}
@@ -1414,9 +1442,10 @@ static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1414 case -NFS4ERR_DELAY: 1442 case -NFS4ERR_DELAY:
1415 case -NFS4ERR_CLID_INUSE: 1443 case -NFS4ERR_CLID_INUSE:
1416 case -EAGAIN: 1444 case -EAGAIN:
1417 case -EKEYEXPIRED:
1418 break; 1445 break;
1419 1446
1447 case -EKEYEXPIRED:
1448 nfs4_warn_keyexpired(clp->cl_hostname);
1420 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery 1449 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1421 * in nfs4_exchange_id */ 1450 * in nfs4_exchange_id */
1422 default: 1451 default:
@@ -1447,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1447 } 1476 }
1448 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1477 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1449 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); 1478 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1479 pnfs_destroy_all_layouts(clp);
1450 } 1480 }
1451 1481
1452 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { 1482 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 08ef91291132..f313c4cce7e4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
52#include <linux/nfs_idmap.h> 52#include <linux/nfs_idmap.h>
53#include "nfs4_fs.h" 53#include "nfs4_fs.h"
54#include "internal.h" 54#include "internal.h"
55#include "pnfs.h"
55 56
56#define NFSDBG_FACILITY NFSDBG_XDR 57#define NFSDBG_FACILITY NFSDBG_XDR
57 58
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int);
310 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) 311 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
311#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) 312#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4)
312#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) 313#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4)
314#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
315 XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
316#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
317 1 /* layout type */ + \
318 1 /* opaque devaddr4 length */ + \
319 /* devaddr4 payload is read into page */ \
320 1 /* notification bitmap length */ + \
321 1 /* notification bitmap */)
322#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
323 encode_stateid_maxsz)
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
313#else /* CONFIG_NFS_V4_1 */ 327#else /* CONFIG_NFS_V4_1 */
314#define encode_sequence_maxsz 0 328#define encode_sequence_maxsz 0
315#define decode_sequence_maxsz 0 329#define decode_sequence_maxsz 0
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int);
699#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ 713#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \
700 decode_sequence_maxsz + \ 714 decode_sequence_maxsz + \
701 decode_reclaim_complete_maxsz) 715 decode_reclaim_complete_maxsz)
716#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \
717 encode_sequence_maxsz +\
718 encode_getdeviceinfo_maxsz)
719#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \
720 decode_sequence_maxsz + \
721 decode_getdeviceinfo_maxsz)
722#define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \
723 encode_sequence_maxsz + \
724 encode_putfh_maxsz + \
725 encode_layoutget_maxsz)
726#define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \
727 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \
729 decode_layoutget_maxsz)
702 730
703const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
704 compound_encode_hdr_maxsz + 732 compound_encode_hdr_maxsz +
@@ -816,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
816 if (iap->ia_valid & ATTR_MODE) 844 if (iap->ia_valid & ATTR_MODE)
817 len += 4; 845 len += 4;
818 if (iap->ia_valid & ATTR_UID) { 846 if (iap->ia_valid & ATTR_UID) {
819 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); 847 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ);
820 if (owner_namelen < 0) { 848 if (owner_namelen < 0) {
821 dprintk("nfs: couldn't resolve uid %d to string\n", 849 dprintk("nfs: couldn't resolve uid %d to string\n",
822 iap->ia_uid); 850 iap->ia_uid);
@@ -828,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
828 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
829 } 857 }
830 if (iap->ia_valid & ATTR_GID) { 858 if (iap->ia_valid & ATTR_GID) {
831 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); 859 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ);
832 if (owner_grouplen < 0) { 860 if (owner_grouplen < 0) {
833 dprintk("nfs: couldn't resolve gid %d to string\n", 861 dprintk("nfs: couldn't resolve gid %d to string\n",
834 iap->ia_gid); 862 iap->ia_gid);
@@ -1385,24 +1413,35 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1385 1413
1386static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) 1414static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
1387{ 1415{
1388 uint32_t attrs[2] = { 1416 uint32_t attrs[2] = {0, 0};
1389 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, 1417 uint32_t dircount = readdir->count >> 1;
1390 FATTR4_WORD1_MOUNTED_ON_FILEID,
1391 };
1392 __be32 *p; 1418 __be32 *p;
1393 1419
1420 if (readdir->plus) {
1421 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
1422 FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE;
1423 attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
1424 FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
1425 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
1426 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
1427 dircount >>= 1;
1428 }
1429 attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID;
1430 attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
1431 /* Switch to mounted_on_fileid if the server supports it */
1432 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1433 attrs[0] &= ~FATTR4_WORD0_FILEID;
1434 else
1435 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1436
1394 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); 1437 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1395 *p++ = cpu_to_be32(OP_READDIR); 1438 *p++ = cpu_to_be32(OP_READDIR);
1396 p = xdr_encode_hyper(p, readdir->cookie); 1439 p = xdr_encode_hyper(p, readdir->cookie);
1397 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); 1440 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
1398 *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */ 1441 *p++ = cpu_to_be32(dircount);
1399 *p++ = cpu_to_be32(readdir->count); 1442 *p++ = cpu_to_be32(readdir->count);
1400 *p++ = cpu_to_be32(2); 1443 *p++ = cpu_to_be32(2);
1401 /* Switch to mounted_on_fileid if the server supports it */ 1444
1402 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1403 attrs[0] &= ~FATTR4_WORD0_FILEID;
1404 else
1405 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1406 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); 1445 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
1407 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); 1446 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
1408 hdr->nops++; 1447 hdr->nops++;
@@ -1726,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr,
1726#endif /* CONFIG_NFS_V4_1 */ 1765#endif /* CONFIG_NFS_V4_1 */
1727} 1766}
1728 1767
1768#ifdef CONFIG_NFS_V4_1
1769static void
1770encode_getdeviceinfo(struct xdr_stream *xdr,
1771 const struct nfs4_getdeviceinfo_args *args,
1772 struct compound_hdr *hdr)
1773{
1774 __be32 *p;
1775
1776 p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
1777 *p++ = cpu_to_be32(OP_GETDEVICEINFO);
1778 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
1779 NFS4_DEVICEID4_SIZE);
1780 *p++ = cpu_to_be32(args->pdev->layout_type);
1781 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
1782 *p++ = cpu_to_be32(0); /* bitmap length 0 */
1783 hdr->nops++;
1784 hdr->replen += decode_getdeviceinfo_maxsz;
1785}
1786
1787static void
1788encode_layoutget(struct xdr_stream *xdr,
1789 const struct nfs4_layoutget_args *args,
1790 struct compound_hdr *hdr)
1791{
1792 nfs4_stateid stateid;
1793 __be32 *p;
1794
1795 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
1796 *p++ = cpu_to_be32(OP_LAYOUTGET);
1797 *p++ = cpu_to_be32(0); /* Signal layout available */
1798 *p++ = cpu_to_be32(args->type);
1799 *p++ = cpu_to_be32(args->range.iomode);
1800 p = xdr_encode_hyper(p, args->range.offset);
1801 p = xdr_encode_hyper(p, args->range.length);
1802 p = xdr_encode_hyper(p, args->minlength);
1803 pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
1804 args->ctx->state);
1805 p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
1806 *p = cpu_to_be32(args->maxcount);
1807
1808 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
1809 __func__,
1810 args->type,
1811 args->range.iomode,
1812 (unsigned long)args->range.offset,
1813 (unsigned long)args->range.length,
1814 args->maxcount);
1815 hdr->nops++;
1816 hdr->replen += decode_layoutget_maxsz;
1817}
1818#endif /* CONFIG_NFS_V4_1 */
1819
1729/* 1820/*
1730 * END OF "GENERIC" ENCODE ROUTINES. 1821 * END OF "GENERIC" ENCODE ROUTINES.
1731 */ 1822 */
@@ -1823,7 +1914,7 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs
1823/* 1914/*
1824 * Encode RENAME request 1915 * Encode RENAME request
1825 */ 1916 */
1826static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args) 1917static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args)
1827{ 1918{
1828 struct xdr_stream xdr; 1919 struct xdr_stream xdr;
1829 struct compound_hdr hdr = { 1920 struct compound_hdr hdr = {
@@ -2543,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
2543 return 0; 2634 return 0;
2544} 2635}
2545 2636
2637/*
2638 * Encode GETDEVICEINFO request
2639 */
2640static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
2641 struct nfs4_getdeviceinfo_args *args)
2642{
2643 struct xdr_stream xdr;
2644 struct compound_hdr hdr = {
2645 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2646 };
2647
2648 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2649 encode_compound_hdr(&xdr, req, &hdr);
2650 encode_sequence(&xdr, &args->seq_args, &hdr);
2651 encode_getdeviceinfo(&xdr, args, &hdr);
2652
2653 /* set up reply kvec. Subtract notification bitmap max size (2)
2654 * so that notification bitmap is put in xdr_buf tail */
2655 xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
2656 args->pdev->pages, args->pdev->pgbase,
2657 args->pdev->pglen);
2658
2659 encode_nops(&hdr);
2660 return 0;
2661}
2662
2663/*
2664 * Encode LAYOUTGET request
2665 */
2666static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
2667 struct nfs4_layoutget_args *args)
2668{
2669 struct xdr_stream xdr;
2670 struct compound_hdr hdr = {
2671 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2672 };
2673
2674 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2675 encode_compound_hdr(&xdr, req, &hdr);
2676 encode_sequence(&xdr, &args->seq_args, &hdr);
2677 encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
2678 encode_layoutget(&xdr, args, &hdr);
2679 encode_nops(&hdr);
2680 return 0;
2681}
2546#endif /* CONFIG_NFS_V4_1 */ 2682#endif /* CONFIG_NFS_V4_1 */
2547 2683
2548static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2684static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -2676,7 +2812,10 @@ out_overflow:
2676static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) 2812static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
2677{ 2813{
2678 if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) { 2814 if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) {
2679 decode_attr_bitmap(xdr, bitmask); 2815 int ret;
2816 ret = decode_attr_bitmap(xdr, bitmask);
2817 if (unlikely(ret < 0))
2818 return ret;
2680 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; 2819 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
2681 } else 2820 } else
2682 bitmask[0] = bitmask[1] = 0; 2821 bitmask[0] = bitmask[1] = 0;
@@ -2848,6 +2987,56 @@ out_overflow:
2848 return -EIO; 2987 return -EIO;
2849} 2988}
2850 2989
2990static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
2991{
2992 __be32 *p;
2993
2994 if (unlikely(bitmap[0] & (FATTR4_WORD0_RDATTR_ERROR - 1U)))
2995 return -EIO;
2996 if (likely(bitmap[0] & FATTR4_WORD0_RDATTR_ERROR)) {
2997 p = xdr_inline_decode(xdr, 4);
2998 if (unlikely(!p))
2999 goto out_overflow;
3000 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
3001 }
3002 return 0;
3003out_overflow:
3004 print_overflow_msg(__func__, xdr);
3005 return -EIO;
3006}
3007
3008static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh)
3009{
3010 __be32 *p;
3011 int len;
3012
3013 if (fh != NULL)
3014 memset(fh, 0, sizeof(*fh));
3015
3016 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEHANDLE - 1U)))
3017 return -EIO;
3018 if (likely(bitmap[0] & FATTR4_WORD0_FILEHANDLE)) {
3019 p = xdr_inline_decode(xdr, 4);
3020 if (unlikely(!p))
3021 goto out_overflow;
3022 len = be32_to_cpup(p);
3023 if (len > NFS4_FHSIZE)
3024 return -EIO;
3025 p = xdr_inline_decode(xdr, len);
3026 if (unlikely(!p))
3027 goto out_overflow;
3028 if (fh != NULL) {
3029 memcpy(fh->data, p, len);
3030 fh->size = len;
3031 }
3032 bitmap[0] &= ~FATTR4_WORD0_FILEHANDLE;
3033 }
3034 return 0;
3035out_overflow:
3036 print_overflow_msg(__func__, xdr);
3037 return -EIO;
3038}
3039
2851static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3040static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
2852{ 3041{
2853 __be32 *p; 3042 __be32 *p;
@@ -3521,6 +3710,24 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
3521 return status; 3710 return status;
3522} 3711}
3523 3712
3713static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap,
3714 struct timespec *time)
3715{
3716 int status = 0;
3717
3718 time->tv_sec = 0;
3719 time->tv_nsec = 0;
3720 if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_DELTA - 1U)))
3721 return -EIO;
3722 if (likely(bitmap[1] & FATTR4_WORD1_TIME_DELTA)) {
3723 status = decode_attr_time(xdr, time);
3724 bitmap[1] &= ~FATTR4_WORD1_TIME_DELTA;
3725 }
3726 dprintk("%s: time_delta=%ld %ld\n", __func__, (long)time->tv_sec,
3727 (long)time->tv_nsec);
3728 return status;
3729}
3730
3524static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 3731static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
3525{ 3732{
3526 int status = 0; 3733 int status = 0;
@@ -3744,29 +3951,14 @@ xdr_error:
3744 return status; 3951 return status;
3745} 3952}
3746 3953
3747static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, 3954static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3955 struct nfs_fattr *fattr, struct nfs_fh *fh,
3748 const struct nfs_server *server, int may_sleep) 3956 const struct nfs_server *server, int may_sleep)
3749{ 3957{
3750 __be32 *savep;
3751 uint32_t attrlen,
3752 bitmap[2] = {0},
3753 type;
3754 int status; 3958 int status;
3755 umode_t fmode = 0; 3959 umode_t fmode = 0;
3756 uint64_t fileid; 3960 uint64_t fileid;
3757 3961 uint32_t type;
3758 status = decode_op_hdr(xdr, OP_GETATTR);
3759 if (status < 0)
3760 goto xdr_error;
3761
3762 status = decode_attr_bitmap(xdr, bitmap);
3763 if (status < 0)
3764 goto xdr_error;
3765
3766 status = decode_attr_length(xdr, &attrlen, &savep);
3767 if (status < 0)
3768 goto xdr_error;
3769
3770 3962
3771 status = decode_attr_type(xdr, bitmap, &type); 3963 status = decode_attr_type(xdr, bitmap, &type);
3772 if (status < 0) 3964 if (status < 0)
@@ -3792,6 +3984,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3792 goto xdr_error; 3984 goto xdr_error;
3793 fattr->valid |= status; 3985 fattr->valid |= status;
3794 3986
3987 status = decode_attr_error(xdr, bitmap);
3988 if (status < 0)
3989 goto xdr_error;
3990
3991 status = decode_attr_filehandle(xdr, bitmap, fh);
3992 if (status < 0)
3993 goto xdr_error;
3994
3795 status = decode_attr_fileid(xdr, bitmap, &fattr->fileid); 3995 status = decode_attr_fileid(xdr, bitmap, &fattr->fileid);
3796 if (status < 0) 3996 if (status < 0)
3797 goto xdr_error; 3997 goto xdr_error;
@@ -3862,12 +4062,101 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3862 fattr->valid |= status; 4062 fattr->valid |= status;
3863 } 4063 }
3864 4064
4065xdr_error:
4066 dprintk("%s: xdr returned %d\n", __func__, -status);
4067 return status;
4068}
4069
4070static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4071 struct nfs_fh *fh, const struct nfs_server *server, int may_sleep)
4072{
4073 __be32 *savep;
4074 uint32_t attrlen,
4075 bitmap[2] = {0};
4076 int status;
4077
4078 status = decode_op_hdr(xdr, OP_GETATTR);
4079 if (status < 0)
4080 goto xdr_error;
4081
4082 status = decode_attr_bitmap(xdr, bitmap);
4083 if (status < 0)
4084 goto xdr_error;
4085
4086 status = decode_attr_length(xdr, &attrlen, &savep);
4087 if (status < 0)
4088 goto xdr_error;
4089
4090 status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep);
4091 if (status < 0)
4092 goto xdr_error;
4093
3865 status = verify_attr_len(xdr, savep, attrlen); 4094 status = verify_attr_len(xdr, savep, attrlen);
3866xdr_error: 4095xdr_error:
3867 dprintk("%s: xdr returned %d\n", __func__, -status); 4096 dprintk("%s: xdr returned %d\n", __func__, -status);
3868 return status; 4097 return status;
3869} 4098}
3870 4099
4100static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4101 const struct nfs_server *server, int may_sleep)
4102{
4103 return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
4104}
4105
4106/*
4107 * Decode potentially multiple layout types. Currently we only support
4108 * one layout driver per file system.
4109 */
4110static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
4111 uint32_t *layouttype)
4112{
4113 uint32_t *p;
4114 int num;
4115
4116 p = xdr_inline_decode(xdr, 4);
4117 if (unlikely(!p))
4118 goto out_overflow;
4119 num = be32_to_cpup(p);
4120
4121 /* pNFS is not supported by the underlying file system */
4122 if (num == 0) {
4123 *layouttype = 0;
4124 return 0;
4125 }
4126 if (num > 1)
4127 printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
4128 "per filesystem not supported\n", __func__);
4129
4130 /* Decode and set first layout type, move xdr->p past unused types */
4131 p = xdr_inline_decode(xdr, num * 4);
4132 if (unlikely(!p))
4133 goto out_overflow;
4134 *layouttype = be32_to_cpup(p);
4135 return 0;
4136out_overflow:
4137 print_overflow_msg(__func__, xdr);
4138 return -EIO;
4139}
4140
4141/*
4142 * The type of file system exported.
4143 * Note we must ensure that layouttype is set in any non-error case.
4144 */
4145static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
4146 uint32_t *layouttype)
4147{
4148 int status = 0;
4149
4150 dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
4151 if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
4152 return -EIO;
4153 if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
4154 status = decode_first_pnfs_layout_type(xdr, layouttype);
4155 bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
4156 } else
4157 *layouttype = 0;
4158 return status;
4159}
3871 4160
3872static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) 4161static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
3873{ 4162{
@@ -3894,6 +4183,12 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
3894 if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) 4183 if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
3895 goto xdr_error; 4184 goto xdr_error;
3896 fsinfo->wtpref = fsinfo->wtmax; 4185 fsinfo->wtpref = fsinfo->wtmax;
4186 status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
4187 if (status != 0)
4188 goto xdr_error;
4189 status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
4190 if (status != 0)
4191 goto xdr_error;
3897 4192
3898 status = verify_attr_len(xdr, savep, attrlen); 4193 status = verify_attr_len(xdr, savep, attrlen);
3899xdr_error: 4194xdr_error:
@@ -3950,13 +4245,13 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3950 __be32 *p; 4245 __be32 *p;
3951 uint32_t namelen, type; 4246 uint32_t namelen, type;
3952 4247
3953 p = xdr_inline_decode(xdr, 32); 4248 p = xdr_inline_decode(xdr, 32); /* read 32 bytes */
3954 if (unlikely(!p)) 4249 if (unlikely(!p))
3955 goto out_overflow; 4250 goto out_overflow;
3956 p = xdr_decode_hyper(p, &offset); 4251 p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */
3957 p = xdr_decode_hyper(p, &length); 4252 p = xdr_decode_hyper(p, &length);
3958 type = be32_to_cpup(p++); 4253 type = be32_to_cpup(p++); /* 4 byte read */
3959 if (fl != NULL) { 4254 if (fl != NULL) { /* manipulate file lock */
3960 fl->fl_start = (loff_t)offset; 4255 fl->fl_start = (loff_t)offset;
3961 fl->fl_end = fl->fl_start + (loff_t)length - 1; 4256 fl->fl_end = fl->fl_start + (loff_t)length - 1;
3962 if (length == ~(uint64_t)0) 4257 if (length == ~(uint64_t)0)
@@ -3966,9 +4261,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3966 fl->fl_type = F_RDLCK; 4261 fl->fl_type = F_RDLCK;
3967 fl->fl_pid = 0; 4262 fl->fl_pid = 0;
3968 } 4263 }
3969 p = xdr_decode_hyper(p, &clientid); 4264 p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
3970 namelen = be32_to_cpup(p); 4265 namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */
3971 p = xdr_inline_decode(xdr, namelen); 4266 p = xdr_inline_decode(xdr, namelen); /* variable size field */
3972 if (likely(p)) 4267 if (likely(p))
3973 return -NFS4ERR_DENIED; 4268 return -NFS4ERR_DENIED;
3974out_overflow: 4269out_overflow:
@@ -4200,12 +4495,9 @@ out_overflow:
4200static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) 4495static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
4201{ 4496{
4202 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 4497 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
4203 struct page *page = *rcvbuf->pages;
4204 struct kvec *iov = rcvbuf->head; 4498 struct kvec *iov = rcvbuf->head;
4205 size_t hdrlen; 4499 size_t hdrlen;
4206 u32 recvd, pglen = rcvbuf->page_len; 4500 u32 recvd, pglen = rcvbuf->page_len;
4207 __be32 *end, *entry, *p, *kaddr;
4208 unsigned int nr = 0;
4209 int status; 4501 int status;
4210 4502
4211 status = decode_op_hdr(xdr, OP_READDIR); 4503 status = decode_op_hdr(xdr, OP_READDIR);
@@ -4225,71 +4517,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
4225 pglen = recvd; 4517 pglen = recvd;
4226 xdr_read_pages(xdr, pglen); 4518 xdr_read_pages(xdr, pglen);
4227 4519
4228 BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); 4520
4229 kaddr = p = kmap_atomic(page, KM_USER0);
4230 end = p + ((pglen + readdir->pgbase) >> 2);
4231 entry = p;
4232
4233 /* Make sure the packet actually has a value_follows and EOF entry */
4234 if ((entry + 1) > end)
4235 goto short_pkt;
4236
4237 for (; *p++; nr++) {
4238 u32 len, attrlen, xlen;
4239 if (end - p < 3)
4240 goto short_pkt;
4241 dprintk("cookie = %Lu, ", *((unsigned long long *)p));
4242 p += 2; /* cookie */
4243 len = ntohl(*p++); /* filename length */
4244 if (len > NFS4_MAXNAMLEN) {
4245 dprintk("NFS: giant filename in readdir (len 0x%x)\n",
4246 len);
4247 goto err_unmap;
4248 }
4249 xlen = XDR_QUADLEN(len);
4250 if (end - p < xlen + 1)
4251 goto short_pkt;
4252 dprintk("filename = %*s\n", len, (char *)p);
4253 p += xlen;
4254 len = ntohl(*p++); /* bitmap length */
4255 if (end - p < len + 1)
4256 goto short_pkt;
4257 p += len;
4258 attrlen = XDR_QUADLEN(ntohl(*p++));
4259 if (end - p < attrlen + 2)
4260 goto short_pkt;
4261 p += attrlen; /* attributes */
4262 entry = p;
4263 }
4264 /*
4265 * Apparently some server sends responses that are a valid size, but
4266 * contain no entries, and have value_follows==0 and EOF==0. For
4267 * those, just set the EOF marker.
4268 */
4269 if (!nr && entry[1] == 0) {
4270 dprintk("NFS: readdir reply truncated!\n");
4271 entry[1] = 1;
4272 }
4273out:
4274 kunmap_atomic(kaddr, KM_USER0);
4275 return 0; 4521 return 0;
4276short_pkt:
4277 /*
4278 * When we get a short packet there are 2 possibilities. We can
4279 * return an error, or fix up the response to look like a valid
4280 * response and return what we have so far. If there are no
4281 * entries and the packet was short, then return -EIO. If there
4282 * are valid entries in the response, return them and pretend that
4283 * the call was successful, but incomplete. The caller can retry the
4284 * readdir starting at the last cookie.
4285 */
4286 dprintk("%s: short packet at entry %d\n", __func__, nr);
4287 entry[0] = entry[1] = 0;
4288 if (nr)
4289 goto out;
4290err_unmap:
4291 kunmap_atomic(kaddr, KM_USER0);
4292 return -errno_NFSERR_IO;
4293} 4522}
4294 4523
4295static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) 4524static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
@@ -4299,7 +4528,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4299 size_t hdrlen; 4528 size_t hdrlen;
4300 u32 len, recvd; 4529 u32 len, recvd;
4301 __be32 *p; 4530 __be32 *p;
4302 char *kaddr;
4303 int status; 4531 int status;
4304 4532
4305 status = decode_op_hdr(xdr, OP_READLINK); 4533 status = decode_op_hdr(xdr, OP_READLINK);
@@ -4330,9 +4558,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4330 * and and null-terminate the text (the VFS expects 4558 * and and null-terminate the text (the VFS expects
4331 * null-termination). 4559 * null-termination).
4332 */ 4560 */
4333 kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); 4561 xdr_terminate_string(rcvbuf, len);
4334 kaddr[len+rcvbuf->page_base] = '\0';
4335 kunmap_atomic(kaddr, KM_USER0);
4336 return 0; 4562 return 0;
4337out_overflow: 4563out_overflow:
4338 print_overflow_msg(__func__, xdr); 4564 print_overflow_msg(__func__, xdr);
@@ -4668,7 +4894,6 @@ static int decode_sequence(struct xdr_stream *xdr,
4668 struct rpc_rqst *rqstp) 4894 struct rpc_rqst *rqstp)
4669{ 4895{
4670#if defined(CONFIG_NFS_V4_1) 4896#if defined(CONFIG_NFS_V4_1)
4671 struct nfs4_slot *slot;
4672 struct nfs4_sessionid id; 4897 struct nfs4_sessionid id;
4673 u32 dummy; 4898 u32 dummy;
4674 int status; 4899 int status;
@@ -4700,15 +4925,14 @@ static int decode_sequence(struct xdr_stream *xdr,
4700 goto out_overflow; 4925 goto out_overflow;
4701 4926
4702 /* seqid */ 4927 /* seqid */
4703 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4704 dummy = be32_to_cpup(p++); 4928 dummy = be32_to_cpup(p++);
4705 if (dummy != slot->seq_nr) { 4929 if (dummy != res->sr_slot->seq_nr) {
4706 dprintk("%s Invalid sequence number\n", __func__); 4930 dprintk("%s Invalid sequence number\n", __func__);
4707 goto out_err; 4931 goto out_err;
4708 } 4932 }
4709 /* slot id */ 4933 /* slot id */
4710 dummy = be32_to_cpup(p++); 4934 dummy = be32_to_cpup(p++);
4711 if (dummy != res->sr_slotid) { 4935 if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) {
4712 dprintk("%s Invalid slot id\n", __func__); 4936 dprintk("%s Invalid slot id\n", __func__);
4713 goto out_err; 4937 goto out_err;
4714 } 4938 }
@@ -4731,6 +4955,134 @@ out_overflow:
4731#endif /* CONFIG_NFS_V4_1 */ 4955#endif /* CONFIG_NFS_V4_1 */
4732} 4956}
4733 4957
4958#if defined(CONFIG_NFS_V4_1)
4959
4960static int decode_getdeviceinfo(struct xdr_stream *xdr,
4961 struct pnfs_device *pdev)
4962{
4963 __be32 *p;
4964 uint32_t len, type;
4965 int status;
4966
4967 status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
4968 if (status) {
4969 if (status == -ETOOSMALL) {
4970 p = xdr_inline_decode(xdr, 4);
4971 if (unlikely(!p))
4972 goto out_overflow;
4973 pdev->mincount = be32_to_cpup(p);
4974 dprintk("%s: Min count too small. mincnt = %u\n",
4975 __func__, pdev->mincount);
4976 }
4977 return status;
4978 }
4979
4980 p = xdr_inline_decode(xdr, 8);
4981 if (unlikely(!p))
4982 goto out_overflow;
4983 type = be32_to_cpup(p++);
4984 if (type != pdev->layout_type) {
4985 dprintk("%s: layout mismatch req: %u pdev: %u\n",
4986 __func__, pdev->layout_type, type);
4987 return -EINVAL;
4988 }
4989 /*
4990 * Get the length of the opaque device_addr4. xdr_read_pages places
4991 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
4992 * and places the remaining xdr data in xdr_buf->tail
4993 */
4994 pdev->mincount = be32_to_cpup(p);
4995 xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
4996
4997 /* Parse notification bitmap, verifying that it is zero. */
4998 p = xdr_inline_decode(xdr, 4);
4999 if (unlikely(!p))
5000 goto out_overflow;
5001 len = be32_to_cpup(p);
5002 if (len) {
5003 int i;
5004
5005 p = xdr_inline_decode(xdr, 4 * len);
5006 if (unlikely(!p))
5007 goto out_overflow;
5008 for (i = 0; i < len; i++, p++) {
5009 if (be32_to_cpup(p)) {
5010 dprintk("%s: notifications not supported\n",
5011 __func__);
5012 return -EIO;
5013 }
5014 }
5015 }
5016 return 0;
5017out_overflow:
5018 print_overflow_msg(__func__, xdr);
5019 return -EIO;
5020}
5021
5022static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5023 struct nfs4_layoutget_res *res)
5024{
5025 __be32 *p;
5026 int status;
5027 u32 layout_count;
5028
5029 status = decode_op_hdr(xdr, OP_LAYOUTGET);
5030 if (status)
5031 return status;
5032 p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
5033 if (unlikely(!p))
5034 goto out_overflow;
5035 res->return_on_close = be32_to_cpup(p++);
5036 p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
5037 layout_count = be32_to_cpup(p);
5038 if (!layout_count) {
5039 dprintk("%s: server responded with empty layout array\n",
5040 __func__);
5041 return -EINVAL;
5042 }
5043
5044 p = xdr_inline_decode(xdr, 24);
5045 if (unlikely(!p))
5046 goto out_overflow;
5047 p = xdr_decode_hyper(p, &res->range.offset);
5048 p = xdr_decode_hyper(p, &res->range.length);
5049 res->range.iomode = be32_to_cpup(p++);
5050 res->type = be32_to_cpup(p++);
5051
5052 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
5053 if (unlikely(status))
5054 return status;
5055
5056 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
5057 __func__,
5058 (unsigned long)res->range.offset,
5059 (unsigned long)res->range.length,
5060 res->range.iomode,
5061 res->type,
5062 res->layout.len);
5063
5064 /* nfs4_proc_layoutget allocated a single page */
5065 if (res->layout.len > PAGE_SIZE)
5066 return -ENOMEM;
5067 memcpy(res->layout.buf, p, res->layout.len);
5068
5069 if (layout_count > 1) {
5070 /* We only handle a length one array at the moment. Any
5071 * further entries are just ignored. Note that this means
5072 * the client may see a response that is less than the
5073 * minimum it requested.
5074 */
5075 dprintk("%s: server responded with %d layouts, dropping tail\n",
5076 __func__, layout_count);
5077 }
5078
5079 return 0;
5080out_overflow:
5081 print_overflow_msg(__func__, xdr);
5082 return -EIO;
5083}
5084#endif /* CONFIG_NFS_V4_1 */
5085
4734/* 5086/*
4735 * END OF "GENERIC" DECODE ROUTINES. 5087 * END OF "GENERIC" DECODE ROUTINES.
4736 */ 5088 */
@@ -4873,7 +5225,7 @@ out:
4873/* 5225/*
4874 * Decode RENAME response 5226 * Decode RENAME response
4875 */ 5227 */
4876static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res) 5228static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res)
4877{ 5229{
4878 struct xdr_stream xdr; 5230 struct xdr_stream xdr;
4879 struct compound_hdr hdr; 5231 struct compound_hdr hdr;
@@ -5758,25 +6110,84 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
5758 status = decode_reclaim_complete(&xdr, (void *)NULL); 6110 status = decode_reclaim_complete(&xdr, (void *)NULL);
5759 return status; 6111 return status;
5760} 6112}
6113
6114/*
6115 * Decode GETDEVINFO response
6116 */
6117static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
6118 struct nfs4_getdeviceinfo_res *res)
6119{
6120 struct xdr_stream xdr;
6121 struct compound_hdr hdr;
6122 int status;
6123
6124 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6125 status = decode_compound_hdr(&xdr, &hdr);
6126 if (status != 0)
6127 goto out;
6128 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6129 if (status != 0)
6130 goto out;
6131 status = decode_getdeviceinfo(&xdr, res->pdev);
6132out:
6133 return status;
6134}
6135
6136/*
6137 * Decode LAYOUTGET response
6138 */
6139static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
6140 struct nfs4_layoutget_res *res)
6141{
6142 struct xdr_stream xdr;
6143 struct compound_hdr hdr;
6144 int status;
6145
6146 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6147 status = decode_compound_hdr(&xdr, &hdr);
6148 if (status)
6149 goto out;
6150 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6151 if (status)
6152 goto out;
6153 status = decode_putfh(&xdr);
6154 if (status)
6155 goto out;
6156 status = decode_layoutget(&xdr, rqstp, res);
6157out:
6158 return status;
6159}
5761#endif /* CONFIG_NFS_V4_1 */ 6160#endif /* CONFIG_NFS_V4_1 */
5762 6161
5763__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 6162__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6163 struct nfs_server *server, int plus)
5764{ 6164{
5765 uint32_t bitmap[2] = {0}; 6165 uint32_t bitmap[2] = {0};
5766 uint32_t len; 6166 uint32_t len;
5767 6167 __be32 *p = xdr_inline_decode(xdr, 4);
5768 if (!*p++) { 6168 if (unlikely(!p))
5769 if (!*p) 6169 goto out_overflow;
6170 if (!ntohl(*p++)) {
6171 p = xdr_inline_decode(xdr, 4);
6172 if (unlikely(!p))
6173 goto out_overflow;
6174 if (!ntohl(*p++))
5770 return ERR_PTR(-EAGAIN); 6175 return ERR_PTR(-EAGAIN);
5771 entry->eof = 1; 6176 entry->eof = 1;
5772 return ERR_PTR(-EBADCOOKIE); 6177 return ERR_PTR(-EBADCOOKIE);
5773 } 6178 }
5774 6179
6180 p = xdr_inline_decode(xdr, 12);
6181 if (unlikely(!p))
6182 goto out_overflow;
5775 entry->prev_cookie = entry->cookie; 6183 entry->prev_cookie = entry->cookie;
5776 p = xdr_decode_hyper(p, &entry->cookie); 6184 p = xdr_decode_hyper(p, &entry->cookie);
5777 entry->len = ntohl(*p++); 6185 entry->len = ntohl(*p++);
6186
6187 p = xdr_inline_decode(xdr, entry->len);
6188 if (unlikely(!p))
6189 goto out_overflow;
5778 entry->name = (const char *) p; 6190 entry->name = (const char *) p;
5779 p += XDR_QUADLEN(entry->len);
5780 6191
5781 /* 6192 /*
5782 * In case the server doesn't return an inode number, 6193 * In case the server doesn't return an inode number,
@@ -5784,32 +6195,33 @@ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
5784 * since glibc seems to choke on it...) 6195 * since glibc seems to choke on it...)
5785 */ 6196 */
5786 entry->ino = 1; 6197 entry->ino = 1;
6198 entry->fattr->valid = 0;
5787 6199
5788 len = ntohl(*p++); /* bitmap length */ 6200 if (decode_attr_bitmap(xdr, bitmap) < 0)
5789 if (len-- > 0) { 6201 goto out_overflow;
5790 bitmap[0] = ntohl(*p++); 6202
5791 if (len-- > 0) { 6203 if (decode_attr_length(xdr, &len, &p) < 0)
5792 bitmap[1] = ntohl(*p++); 6204 goto out_overflow;
5793 p += len; 6205
5794 } 6206 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0)
5795 } 6207 goto out_overflow;
5796 len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ 6208 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
5797 if (len > 0) { 6209 entry->ino = entry->fattr->fileid;
5798 if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { 6210
5799 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; 6211 if (verify_attr_len(xdr, p, len) < 0)
5800 /* Ignore the return value of rdattr_error for now */ 6212 goto out_overflow;
5801 p++; 6213
5802 len--; 6214 p = xdr_inline_peek(xdr, 8);
5803 } 6215 if (p != NULL)
5804 if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) 6216 entry->eof = !p[0] && p[1];
5805 xdr_decode_hyper(p, &entry->ino); 6217 else
5806 else if (bitmap[0] == FATTR4_WORD0_FILEID) 6218 entry->eof = 0;
5807 xdr_decode_hyper(p, &entry->ino);
5808 p += len;
5809 }
5810 6219
5811 entry->eof = !p[0] && p[1];
5812 return p; 6220 return p;
6221
6222out_overflow:
6223 print_overflow_msg(__func__, xdr);
6224 return ERR_PTR(-EIO);
5813} 6225}
5814 6226
5815/* 6227/*
@@ -5936,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = {
5936 PROC(SEQUENCE, enc_sequence, dec_sequence), 6348 PROC(SEQUENCE, enc_sequence, dec_sequence),
5937 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 6349 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
5938 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6350 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6351 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6352 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
5939#endif /* CONFIG_NFS_V4_1 */ 6353#endif /* CONFIG_NFS_V4_1 */
5940}; 6354};
5941 6355
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index df101d9f546a..903908a20023 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -3,9 +3,10 @@
3 * 3 *
4 * Allow an NFS filesystem to be mounted as root. The way this works is: 4 * Allow an NFS filesystem to be mounted as root. The way this works is:
5 * (1) Use the IP autoconfig mechanism to set local IP addresses and routes. 5 * (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
6 * (2) Handle RPC negotiation with the system which replied to RARP or 6 * (2) Construct the device string and the options string using DHCP
7 * was reported as a boot server by BOOTP or manually. 7 * option 17 and/or kernel command line options.
8 * (3) The actual mounting is done later, when init() is running. 8 * (3) When mount_root() sets up the root file system, pass these strings
9 * to the NFS client's regular mount interface via sys_mount().
9 * 10 *
10 * 11 *
11 * Changes: 12 * Changes:
@@ -65,470 +66,245 @@
65 * Hua Qin : Support for mounting root file system via 66 * Hua Qin : Support for mounting root file system via
66 * NFS over TCP. 67 * NFS over TCP.
67 * Fabian Frederick: Option parser rebuilt (using parser lib) 68 * Fabian Frederick: Option parser rebuilt (using parser lib)
68*/ 69 * Chuck Lever : Use super.c's text-based mount option parsing
70 * Chuck Lever : Add "nfsrootdebug".
71 */
69 72
70#include <linux/types.h> 73#include <linux/types.h>
71#include <linux/string.h> 74#include <linux/string.h>
72#include <linux/kernel.h>
73#include <linux/time.h>
74#include <linux/fs.h>
75#include <linux/init.h> 75#include <linux/init.h>
76#include <linux/sunrpc/clnt.h>
77#include <linux/sunrpc/xprtsock.h>
78#include <linux/nfs.h> 76#include <linux/nfs.h>
79#include <linux/nfs_fs.h> 77#include <linux/nfs_fs.h>
80#include <linux/nfs_mount.h>
81#include <linux/in.h>
82#include <linux/major.h>
83#include <linux/utsname.h> 78#include <linux/utsname.h>
84#include <linux/inet.h>
85#include <linux/root_dev.h> 79#include <linux/root_dev.h>
86#include <net/ipconfig.h> 80#include <net/ipconfig.h>
87#include <linux/parser.h>
88 81
89#include "internal.h" 82#include "internal.h"
90 83
91/* Define this to allow debugging output */
92#undef NFSROOT_DEBUG
93#define NFSDBG_FACILITY NFSDBG_ROOT 84#define NFSDBG_FACILITY NFSDBG_ROOT
94 85
95/* Default port to use if server is not running a portmapper */
96#define NFS_MNT_PORT 627
97
98/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
99#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
100 88
101/* Parameters passed from the kernel command line */ 89/* Parameters passed from the kernel command line */
102static char nfs_root_name[256] __initdata = ""; 90static char nfs_root_parms[256] __initdata = "";
91
92/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = "";
103 94
104/* Address of NFS server */ 95/* Address of NFS server */
105static __be32 servaddr __initdata = 0; 96static __be32 servaddr __initdata = htonl(INADDR_NONE);
106 97
107/* Name of directory to mount */ 98/* Name of directory to mount */
108static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, }; 99static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
109
110/* NFS-related data */
111static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
112static int nfs_port __initdata = 0; /* Port to connect to for NFS */
113static int mount_port __initdata = 0; /* Mount daemon port number */
114
115
116/***************************************************************************
117
118 Parsing of options
119
120 ***************************************************************************/
121
122enum {
123 /* Options that take integer arguments */
124 Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
125 Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
126 /* Options that take no arguments */
127 Opt_soft, Opt_hard, Opt_intr,
128 Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac,
129 Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
130 Opt_acl, Opt_noacl,
131 /* Error token */
132 Opt_err
133};
134
135static const match_table_t tokens __initconst = {
136 {Opt_port, "port=%u"},
137 {Opt_rsize, "rsize=%u"},
138 {Opt_wsize, "wsize=%u"},
139 {Opt_timeo, "timeo=%u"},
140 {Opt_retrans, "retrans=%u"},
141 {Opt_acregmin, "acregmin=%u"},
142 {Opt_acregmax, "acregmax=%u"},
143 {Opt_acdirmin, "acdirmin=%u"},
144 {Opt_acdirmax, "acdirmax=%u"},
145 {Opt_soft, "soft"},
146 {Opt_hard, "hard"},
147 {Opt_intr, "intr"},
148 {Opt_nointr, "nointr"},
149 {Opt_posix, "posix"},
150 {Opt_noposix, "noposix"},
151 {Opt_cto, "cto"},
152 {Opt_nocto, "nocto"},
153 {Opt_ac, "ac"},
154 {Opt_noac, "noac"},
155 {Opt_lock, "lock"},
156 {Opt_nolock, "nolock"},
157 {Opt_v2, "nfsvers=2"},
158 {Opt_v2, "v2"},
159 {Opt_v3, "nfsvers=3"},
160 {Opt_v3, "v3"},
161 {Opt_udp, "proto=udp"},
162 {Opt_udp, "udp"},
163 {Opt_tcp, "proto=tcp"},
164 {Opt_tcp, "tcp"},
165 {Opt_acl, "acl"},
166 {Opt_noacl, "noacl"},
167 {Opt_err, NULL}
168
169};
170 100
101/* server:export path string passed to super.c */
102static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
103
104#ifdef RPC_DEBUG
171/* 105/*
172 * Parse option string. 106 * When the "nfsrootdebug" kernel command line option is specified,
107 * enable debugging messages for NFSROOT.
173 */ 108 */
174 109static int __init nfs_root_debug(char *__unused)
175static int __init root_nfs_parse(char *name, char *buf)
176{ 110{
177 111 nfs_debug |= NFSDBG_ROOT | NFSDBG_MOUNT;
178 char *p;
179 substring_t args[MAX_OPT_ARGS];
180 int option;
181
182 if (!name)
183 return 1;
184
185 /* Set the NFS remote path */
186 p = strsep(&name, ",");
187 if (p[0] != '\0' && strcmp(p, "default") != 0)
188 strlcpy(buf, p, NFS_MAXPATHLEN);
189
190 while ((p = strsep (&name, ",")) != NULL) {
191 int token;
192 if (!*p)
193 continue;
194 token = match_token(p, tokens, args);
195
196 /* %u tokens only. Beware if you add new tokens! */
197 if (token < Opt_soft && match_int(&args[0], &option))
198 return 0;
199 switch (token) {
200 case Opt_port:
201 nfs_port = option;
202 break;
203 case Opt_rsize:
204 nfs_data.rsize = option;
205 break;
206 case Opt_wsize:
207 nfs_data.wsize = option;
208 break;
209 case Opt_timeo:
210 nfs_data.timeo = option;
211 break;
212 case Opt_retrans:
213 nfs_data.retrans = option;
214 break;
215 case Opt_acregmin:
216 nfs_data.acregmin = option;
217 break;
218 case Opt_acregmax:
219 nfs_data.acregmax = option;
220 break;
221 case Opt_acdirmin:
222 nfs_data.acdirmin = option;
223 break;
224 case Opt_acdirmax:
225 nfs_data.acdirmax = option;
226 break;
227 case Opt_soft:
228 nfs_data.flags |= NFS_MOUNT_SOFT;
229 break;
230 case Opt_hard:
231 nfs_data.flags &= ~NFS_MOUNT_SOFT;
232 break;
233 case Opt_intr:
234 case Opt_nointr:
235 break;
236 case Opt_posix:
237 nfs_data.flags |= NFS_MOUNT_POSIX;
238 break;
239 case Opt_noposix:
240 nfs_data.flags &= ~NFS_MOUNT_POSIX;
241 break;
242 case Opt_cto:
243 nfs_data.flags &= ~NFS_MOUNT_NOCTO;
244 break;
245 case Opt_nocto:
246 nfs_data.flags |= NFS_MOUNT_NOCTO;
247 break;
248 case Opt_ac:
249 nfs_data.flags &= ~NFS_MOUNT_NOAC;
250 break;
251 case Opt_noac:
252 nfs_data.flags |= NFS_MOUNT_NOAC;
253 break;
254 case Opt_lock:
255 nfs_data.flags &= ~NFS_MOUNT_NONLM;
256 break;
257 case Opt_nolock:
258 nfs_data.flags |= NFS_MOUNT_NONLM;
259 break;
260 case Opt_v2:
261 nfs_data.flags &= ~NFS_MOUNT_VER3;
262 break;
263 case Opt_v3:
264 nfs_data.flags |= NFS_MOUNT_VER3;
265 break;
266 case Opt_udp:
267 nfs_data.flags &= ~NFS_MOUNT_TCP;
268 break;
269 case Opt_tcp:
270 nfs_data.flags |= NFS_MOUNT_TCP;
271 break;
272 case Opt_acl:
273 nfs_data.flags &= ~NFS_MOUNT_NOACL;
274 break;
275 case Opt_noacl:
276 nfs_data.flags |= NFS_MOUNT_NOACL;
277 break;
278 default:
279 printk(KERN_WARNING "Root-NFS: unknown "
280 "option: %s\n", p);
281 return 0;
282 }
283 }
284
285 return 1; 112 return 1;
286} 113}
287 114
115__setup("nfsrootdebug", nfs_root_debug);
116#endif
117
288/* 118/*
289 * Prepare the NFS data structure and parse all options. 119 * Parse NFS server and directory information passed on the kernel
120 * command line.
121 *
122 * nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
123 *
124 * If there is a "%s" token in the <root-dir> string, it is replaced
125 * by the ASCII-representation of the client's IP address.
290 */ 126 */
291static int __init root_nfs_name(char *name) 127static int __init nfs_root_setup(char *line)
292{ 128{
293 static char buf[NFS_MAXPATHLEN] __initdata; 129 ROOT_DEV = Root_NFS;
294 char *cp; 130
295 131 if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
296 /* Set some default values */ 132 strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms));
297 memset(&nfs_data, 0, sizeof(nfs_data)); 133 } else {
298 nfs_port = -1; 134 size_t n = strlen(line) + sizeof(NFS_ROOT) - 1;
299 nfs_data.version = NFS_MOUNT_VERSION; 135 if (n >= sizeof(nfs_root_parms))
300 nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ 136 line[sizeof(nfs_root_parms) - sizeof(NFS_ROOT) - 2] = '\0';
301 nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; 137 sprintf(nfs_root_parms, NFS_ROOT, line);
302 nfs_data.wsize = NFS_DEF_FILE_IO_SIZE;
303 nfs_data.acregmin = NFS_DEF_ACREGMIN;
304 nfs_data.acregmax = NFS_DEF_ACREGMAX;
305 nfs_data.acdirmin = NFS_DEF_ACDIRMIN;
306 nfs_data.acdirmax = NFS_DEF_ACDIRMAX;
307 strcpy(buf, NFS_ROOT);
308
309 /* Process options received from the remote server */
310 root_nfs_parse(root_server_path, buf);
311
312 /* Override them by options set on kernel command-line */
313 root_nfs_parse(name, buf);
314
315 cp = utsname()->nodename;
316 if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
317 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
318 return -1;
319 } 138 }
320 sprintf(nfs_export_path, buf, cp); 139
140 /*
141 * Extract the IP address of the NFS server containing our
142 * root file system, if one was specified.
143 *
144 * Note: root_nfs_parse_addr() removes the server-ip from
145 * nfs_root_parms, if it exists.
146 */
147 root_server_addr = root_nfs_parse_addr(nfs_root_parms);
321 148
322 return 1; 149 return 1;
323} 150}
324 151
152__setup("nfsroot=", nfs_root_setup);
325 153
326/* 154static int __init root_nfs_copy(char *dest, const char *src,
327 * Get NFS server address. 155 const size_t destlen)
328 */
329static int __init root_nfs_addr(void)
330{ 156{
331 if ((servaddr = root_server_addr) == htonl(INADDR_NONE)) { 157 if (strlcpy(dest, src, destlen) > destlen)
332 printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
333 return -1; 158 return -1;
334 } 159 return 0;
160}
335 161
336 snprintf(nfs_data.hostname, sizeof(nfs_data.hostname), 162static int __init root_nfs_cat(char *dest, const char *src,
337 "%pI4", &servaddr); 163 const size_t destlen)
164{
165 if (strlcat(dest, src, destlen) > destlen)
166 return -1;
338 return 0; 167 return 0;
339} 168}
340 169
341/* 170/*
342 * Tell the user what's going on. 171 * Parse out root export path and mount options from
172 * passed-in string @incoming.
173 *
174 * Copy the export path into @exppath.
343 */ 175 */
344#ifdef NFSROOT_DEBUG 176static int __init root_nfs_parse_options(char *incoming, char *exppath,
345static void __init root_nfs_print(void) 177 const size_t exppathlen)
346{ 178{
347 printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", 179 char *p;
348 nfs_export_path, nfs_data.hostname);
349 printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
350 nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
351 printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
352 nfs_data.acregmin, nfs_data.acregmax,
353 nfs_data.acdirmin, nfs_data.acdirmax);
354 printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n",
355 nfs_port, mount_port, nfs_data.flags);
356}
357#endif
358
359 180
360static int __init root_nfs_init(void) 181 /*
361{ 182 * Set the NFS remote path
362#ifdef NFSROOT_DEBUG 183 */
363 nfs_debug |= NFSDBG_ROOT; 184 p = strsep(&incoming, ",");
364#endif 185 if (*p != '\0' && strcmp(p, "default") != 0)
186 if (root_nfs_copy(exppath, p, exppathlen))
187 return -1;
365 188
366 /* 189 /*
367 * Decode the root directory path name and NFS options from 190 * @incoming now points to the rest of the string; if it
368 * the kernel command line. This has to go here in order to 191 * contains something, append it to our root options buffer
369 * be able to use the client IP address for the remote root
370 * directory (necessary for pure RARP booting).
371 */ 192 */
372 if (root_nfs_name(nfs_root_name) < 0 || 193 if (incoming != NULL && *incoming != '\0')
373 root_nfs_addr() < 0) 194 if (root_nfs_cat(nfs_root_options, incoming,
374 return -1; 195 sizeof(nfs_root_options)))
196 return -1;
375 197
376#ifdef NFSROOT_DEBUG 198 /*
377 root_nfs_print(); 199 * Possibly prepare for more options to be appended
378#endif 200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
379 206
380 return 0; 207 return 0;
381} 208}
382 209
383
384/* 210/*
385 * Parse NFS server and directory information passed on the kernel 211 * Decode the export directory path name and NFS options from
386 * command line. 212 * the kernel command line. This has to be done late in order to
213 * use a dynamically acquired client IP address for the remote
214 * root directory path.
215 *
216 * Returns zero if successful; otherwise -1 is returned.
387 */ 217 */
388static int __init nfs_root_setup(char *line) 218static int __init root_nfs_data(char *cmdline)
389{ 219{
390 ROOT_DEV = Root_NFS; 220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
391 if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { 221 int len, retval = -1;
392 strlcpy(nfs_root_name, line, sizeof(nfs_root_name)); 222 char *tmp = NULL;
393 } else { 223 const size_t tmplen = sizeof(nfs_export_path);
394 int n = strlen(line) + sizeof(NFS_ROOT) - 1; 224
395 if (n >= sizeof(nfs_root_name)) 225 tmp = kzalloc(tmplen, GFP_KERNEL);
396 line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0'; 226 if (tmp == NULL)
397 sprintf(nfs_root_name, NFS_ROOT, line); 227 goto out_nomem;
228 strcpy(tmp, NFS_ROOT);
229
230 if (root_server_path[0] != '\0') {
231 dprintk("Root-NFS: DHCPv4 option 17: %s\n",
232 root_server_path);
233 if (root_nfs_parse_options(root_server_path, tmp, tmplen))
234 goto out_optionstoolong;
398 } 235 }
399 root_server_addr = root_nfs_parse_addr(nfs_root_name);
400 return 1;
401}
402
403__setup("nfsroot=", nfs_root_setup);
404
405/***************************************************************************
406 236
407 Routines to actually mount the root directory 237 if (cmdline[0] != '\0') {
238 dprintk("Root-NFS: nfsroot=%s\n", cmdline);
239 if (root_nfs_parse_options(cmdline, tmp, tmplen))
240 goto out_optionstoolong;
241 }
408 242
409 ***************************************************************************/ 243 /*
244 * Append mandatory options for nfsroot so they override
245 * what has come before
246 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
248 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option,
250 sizeof(nfs_root_options)))
251 goto out_optionstoolong;
410 252
411/* 253 /*
412 * Construct sockaddr_in from address and port number. 254 * Set up nfs_root_device. For NFS mounts, this looks like
413 */ 255 *
414static inline void 256 * server:/path
415set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) 257 *
416{ 258 * At this point, utsname()->nodename contains our local
417 sin->sin_family = AF_INET; 259 * IP address or hostname, set by ipconfig. If "%s" exists
418 sin->sin_addr.s_addr = addr; 260 * in tmp, substitute the nodename, then shovel the whole
419 sin->sin_port = port; 261 * mess into nfs_root_device.
420} 262 */
263 len = snprintf(nfs_export_path, sizeof(nfs_export_path),
264 tmp, utsname()->nodename);
265 if (len > (int)sizeof(nfs_export_path))
266 goto out_devnametoolong;
267 len = snprintf(nfs_root_device, sizeof(nfs_root_device),
268 "%pI4:%s", &servaddr, nfs_export_path);
269 if (len > (int)sizeof(nfs_root_device))
270 goto out_devnametoolong;
421 271
422/* 272 retval = 0;
423 * Query server portmapper for the port of a daemon program.
424 */
425static int __init root_nfs_getport(int program, int version, int proto)
426{
427 struct sockaddr_in sin;
428 273
429 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %pI4\n", 274out:
430 program, version, &servaddr); 275 kfree(tmp);
431 set_sockaddr(&sin, servaddr, 0); 276 return retval;
432 return rpcb_getport_sync(&sin, program, version, proto); 277out_nomem:
278 printk(KERN_ERR "Root-NFS: could not allocate memory\n");
279 goto out;
280out_optionstoolong:
281 printk(KERN_ERR "Root-NFS: mount options string too long\n");
282 goto out;
283out_devnametoolong:
284 printk(KERN_ERR "Root-NFS: root device name too long.\n");
285 goto out;
433} 286}
434 287
435 288/**
436/* 289 * nfs_root_data - Return prepared 'data' for NFSROOT mount
437 * Use portmapper to find mountd and nfsd port numbers if not overriden 290 * @root_device: OUT: address of string containing NFSROOT device
438 * by the user. Use defaults if portmapper is not available. 291 * @root_data: OUT: address of string containing NFSROOT mount options
439 * XXX: Is there any nfs server with no portmapper? 292 *
293 * Returns zero and sets @root_device and @root_data if successful,
294 * otherwise -1 is returned.
440 */ 295 */
441static int __init root_nfs_ports(void) 296int __init nfs_root_data(char **root_device, char **root_data)
442{ 297{
443 int port; 298 servaddr = root_server_addr;
444 int nfsd_ver, mountd_ver; 299 if (servaddr == htonl(INADDR_NONE)) {
445 int nfsd_port, mountd_port; 300 printk(KERN_ERR "Root-NFS: no NFS server address\n");
446 int proto; 301 return -1;
447
448 if (nfs_data.flags & NFS_MOUNT_VER3) {
449 nfsd_ver = NFS3_VERSION;
450 mountd_ver = NFS_MNT3_VERSION;
451 nfsd_port = NFS_PORT;
452 mountd_port = NFS_MNT_PORT;
453 } else {
454 nfsd_ver = NFS2_VERSION;
455 mountd_ver = NFS_MNT_VERSION;
456 nfsd_port = NFS_PORT;
457 mountd_port = NFS_MNT_PORT;
458 }
459
460 proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
461
462 if (nfs_port < 0) {
463 if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
464 printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
465 "number from server, using default\n");
466 port = nfsd_port;
467 }
468 nfs_port = port;
469 dprintk("Root-NFS: Portmapper on server returned %d "
470 "as nfsd port\n", port);
471 } 302 }
472 303
473 if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) { 304 if (root_nfs_data(nfs_root_parms) < 0)
474 printk(KERN_ERR "Root-NFS: Unable to get mountd port " 305 return -1;
475 "number from server, using default\n");
476 port = mountd_port;
477 }
478 mount_port = port;
479 dprintk("Root-NFS: mountd port is %d\n", port);
480 306
307 *root_device = nfs_root_device;
308 *root_data = nfs_root_options;
481 return 0; 309 return 0;
482} 310}
483
484
485/*
486 * Get a file handle from the server for the directory which is to be
487 * mounted.
488 */
489static int __init root_nfs_get_handle(void)
490{
491 struct sockaddr_in sin;
492 unsigned int auth_flav_len = 0;
493 struct nfs_mount_request request = {
494 .sap = (struct sockaddr *)&sin,
495 .salen = sizeof(sin),
496 .dirpath = nfs_export_path,
497 .version = (nfs_data.flags & NFS_MOUNT_VER3) ?
498 NFS_MNT3_VERSION : NFS_MNT_VERSION,
499 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
500 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
501 .auth_flav_len = &auth_flav_len,
502 };
503 int status = -ENOMEM;
504
505 request.fh = nfs_alloc_fhandle();
506 if (!request.fh)
507 goto out;
508 set_sockaddr(&sin, servaddr, htons(mount_port));
509 status = nfs_mount(&request);
510 if (status < 0)
511 printk(KERN_ERR "Root-NFS: Server returned error %d "
512 "while mounting %s\n", status, nfs_export_path);
513 else {
514 nfs_data.root.size = request.fh->size;
515 memcpy(&nfs_data.root.data, request.fh->data, request.fh->size);
516 }
517 nfs_free_fhandle(request.fh);
518out:
519 return status;
520}
521
522/*
523 * Get the NFS port numbers and file handle, and return the prepared 'data'
524 * argument for mount() if everything went OK. Return NULL otherwise.
525 */
526void * __init nfs_root_data(void)
527{
528 if (root_nfs_init() < 0
529 || root_nfs_ports() < 0
530 || root_nfs_get_handle() < 0)
531 return NULL;
532 set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, htons(nfs_port));
533 return (void*)&nfs_data;
534}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
new file mode 100644
index 000000000000..db773428f95f
--- /dev/null
+++ b/fs/nfs/pnfs.c
@@ -0,0 +1,783 @@
1/*
2 * pNFS functions to call and manage layout drivers.
3 *
4 * Copyright (c) 2002 [year of first publication]
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#include <linux/nfs_fs.h>
31#include "internal.h"
32#include "pnfs.h"
33
34#define NFSDBG_FACILITY NFSDBG_PNFS
35
36/* Locking:
37 *
38 * pnfs_spinlock:
39 * protects pnfs_modules_tbl.
40 */
41static DEFINE_SPINLOCK(pnfs_spinlock);
42
43/*
44 * pnfs_modules_tbl holds all pnfs modules
45 */
46static LIST_HEAD(pnfs_modules_tbl);
47
48/* Return the registered pnfs layout driver module matching given id */
49static struct pnfs_layoutdriver_type *
50find_pnfs_driver_locked(u32 id)
51{
52 struct pnfs_layoutdriver_type *local;
53
54 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
55 if (local->id == id)
56 goto out;
57 local = NULL;
58out:
59 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
60 return local;
61}
62
63static struct pnfs_layoutdriver_type *
64find_pnfs_driver(u32 id)
65{
66 struct pnfs_layoutdriver_type *local;
67
68 spin_lock(&pnfs_spinlock);
69 local = find_pnfs_driver_locked(id);
70 spin_unlock(&pnfs_spinlock);
71 return local;
72}
73
74void
75unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{
77 if (nfss->pnfs_curr_ld) {
78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 module_put(nfss->pnfs_curr_ld->owner);
80 }
81 nfss->pnfs_curr_ld = NULL;
82}
83
84/*
85 * Try to set the server's pnfs module to the pnfs layout type specified by id.
86 * Currently only one pNFS layout driver per filesystem is supported.
87 *
88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
89 */
90void
91set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
92{
93 struct pnfs_layoutdriver_type *ld_type = NULL;
94
95 if (id == 0)
96 goto out_no_driver;
97 if (!(server->nfs_client->cl_exchange_flags &
98 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
99 printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
100 id, server->nfs_client->cl_exchange_flags);
101 goto out_no_driver;
102 }
103 ld_type = find_pnfs_driver(id);
104 if (!ld_type) {
105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
106 ld_type = find_pnfs_driver(id);
107 if (!ld_type) {
108 dprintk("%s: No pNFS module found for %u.\n",
109 __func__, id);
110 goto out_no_driver;
111 }
112 }
113 if (!try_module_get(ld_type->owner)) {
114 dprintk("%s: Could not grab reference on module\n", __func__);
115 goto out_no_driver;
116 }
117 server->pnfs_curr_ld = ld_type;
118 if (ld_type->set_layoutdriver(server)) {
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
125 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return;
127
128out_no_driver:
129 dprintk("%s: Using NFSv4 I/O\n", __func__);
130 server->pnfs_curr_ld = NULL;
131}
132
133int
134pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
135{
136 int status = -EINVAL;
137 struct pnfs_layoutdriver_type *tmp;
138
139 if (ld_type->id == 0) {
140 printk(KERN_ERR "%s id 0 is reserved\n", __func__);
141 return status;
142 }
143 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
144 printk(KERN_ERR "%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__);
146 return status;
147 }
148
149 spin_lock(&pnfs_spinlock);
150 tmp = find_pnfs_driver_locked(ld_type->id);
151 if (!tmp) {
152 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
153 status = 0;
154 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
155 ld_type->name);
156 } else {
157 printk(KERN_ERR "%s Module with id %d already loaded!\n",
158 __func__, ld_type->id);
159 }
160 spin_unlock(&pnfs_spinlock);
161
162 return status;
163}
164EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
165
166void
167pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
168{
169 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
170 spin_lock(&pnfs_spinlock);
171 list_del(&ld_type->pnfs_tblid);
172 spin_unlock(&pnfs_spinlock);
173}
174EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
175
176/*
177 * pNFS client layout cache
178 */
179
180static void
181get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
182{
183 assert_spin_locked(&lo->inode->i_lock);
184 lo->refcount++;
185}
186
187static void
188put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
189{
190 assert_spin_locked(&lo->inode->i_lock);
191 BUG_ON(lo->refcount == 0);
192
193 lo->refcount--;
194 if (!lo->refcount) {
195 dprintk("%s: freeing layout cache %p\n", __func__, lo);
196 BUG_ON(!list_empty(&lo->layouts));
197 NFS_I(lo->inode)->layout = NULL;
198 kfree(lo);
199 }
200}
201
202void
203put_layout_hdr(struct inode *inode)
204{
205 spin_lock(&inode->i_lock);
206 put_layout_hdr_locked(NFS_I(inode)->layout);
207 spin_unlock(&inode->i_lock);
208}
209
210static void
211init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
212{
213 INIT_LIST_HEAD(&lseg->fi_list);
214 kref_init(&lseg->kref);
215 lseg->layout = lo;
216}
217
218/* Called without i_lock held, as the free_lseg call may sleep */
219static void
220destroy_lseg(struct kref *kref)
221{
222 struct pnfs_layout_segment *lseg =
223 container_of(kref, struct pnfs_layout_segment, kref);
224 struct inode *ino = lseg->layout->inode;
225
226 dprintk("--> %s\n", __func__);
227 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
228 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
229 put_layout_hdr(ino);
230}
231
232static void
233put_lseg(struct pnfs_layout_segment *lseg)
234{
235 if (!lseg)
236 return;
237
238 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
239 atomic_read(&lseg->kref.refcount));
240 kref_put(&lseg->kref, destroy_lseg);
241}
242
243static void
244pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
245{
246 struct pnfs_layout_segment *lseg, *next;
247 struct nfs_client *clp;
248
249 dprintk("%s:Begin lo %p\n", __func__, lo);
250
251 assert_spin_locked(&lo->inode->i_lock);
252 list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
253 dprintk("%s: freeing lseg %p\n", __func__, lseg);
254 list_move(&lseg->fi_list, tmp_list);
255 }
256 clp = NFS_SERVER(lo->inode)->nfs_client;
257 spin_lock(&clp->cl_lock);
258 /* List does not take a reference, so no need for put here */
259 list_del_init(&lo->layouts);
260 spin_unlock(&clp->cl_lock);
261 write_seqlock(&lo->seqlock);
262 clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 write_sequnlock(&lo->seqlock);
264
265 dprintk("%s:Return\n", __func__);
266}
267
268static void
269pnfs_free_lseg_list(struct list_head *tmp_list)
270{
271 struct pnfs_layout_segment *lseg;
272
273 while (!list_empty(tmp_list)) {
274 lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
275 fi_list);
276 dprintk("%s calling put_lseg on %p\n", __func__, lseg);
277 list_del(&lseg->fi_list);
278 put_lseg(lseg);
279 }
280}
281
282void
283pnfs_destroy_layout(struct nfs_inode *nfsi)
284{
285 struct pnfs_layout_hdr *lo;
286 LIST_HEAD(tmp_list);
287
288 spin_lock(&nfsi->vfs_inode.i_lock);
289 lo = nfsi->layout;
290 if (lo) {
291 pnfs_clear_lseg_list(lo, &tmp_list);
292 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
293 put_layout_hdr_locked(lo);
294 }
295 spin_unlock(&nfsi->vfs_inode.i_lock);
296 pnfs_free_lseg_list(&tmp_list);
297}
298
299/*
300 * Called by the state manger to remove all layouts established under an
301 * expired lease.
302 */
303void
304pnfs_destroy_all_layouts(struct nfs_client *clp)
305{
306 struct pnfs_layout_hdr *lo;
307 LIST_HEAD(tmp_list);
308
309 spin_lock(&clp->cl_lock);
310 list_splice_init(&clp->cl_layouts, &tmp_list);
311 spin_unlock(&clp->cl_lock);
312
313 while (!list_empty(&tmp_list)) {
314 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
315 layouts);
316 dprintk("%s freeing layout for inode %lu\n", __func__,
317 lo->inode->i_ino);
318 pnfs_destroy_layout(NFS_I(lo->inode));
319 }
320}
321
322/* update lo->stateid with new if is more recent
323 *
324 * lo->stateid could be the open stateid, in which case we just use what given.
325 */
326static void
327pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
328 const nfs4_stateid *new)
329{
330 nfs4_stateid *old = &lo->stateid;
331 bool overwrite = false;
332
333 write_seqlock(&lo->seqlock);
334 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
335 memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
336 overwrite = true;
337 else {
338 u32 oldseq, newseq;
339
340 oldseq = be32_to_cpu(old->stateid.seqid);
341 newseq = be32_to_cpu(new->stateid.seqid);
342 if ((int)(newseq - oldseq) > 0)
343 overwrite = true;
344 }
345 if (overwrite)
346 memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 write_sequnlock(&lo->seqlock);
348}
349
350static void
351pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
352 struct nfs4_state *state)
353{
354 int seq;
355
356 dprintk("--> %s\n", __func__);
357 write_seqlock(&lo->seqlock);
358 do {
359 seq = read_seqbegin(&state->seqlock);
360 memcpy(lo->stateid.data, state->stateid.data,
361 sizeof(state->stateid.data));
362 } while (read_seqretry(&state->seqlock, seq));
363 set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 write_sequnlock(&lo->seqlock);
365 dprintk("<-- %s\n", __func__);
366}
367
368void
369pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 struct nfs4_state *open_state)
371{
372 int seq;
373
374 dprintk("--> %s\n", __func__);
375 do {
376 seq = read_seqbegin(&lo->seqlock);
377 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
378 /* This will trigger retry of the read */
379 pnfs_layout_from_open_stateid(lo, open_state);
380 } else
381 memcpy(dst->data, lo->stateid.data,
382 sizeof(lo->stateid.data));
383 } while (read_seqretry(&lo->seqlock, seq));
384 dprintk("<-- %s\n", __func__);
385}
386
387/*
388* Get layout from server.
389* for now, assume that whole file layouts are requested.
390* arg->offset: 0
391* arg->length: all ones
392*/
393static struct pnfs_layout_segment *
394send_layoutget(struct pnfs_layout_hdr *lo,
395 struct nfs_open_context *ctx,
396 u32 iomode)
397{
398 struct inode *ino = lo->inode;
399 struct nfs_server *server = NFS_SERVER(ino);
400 struct nfs4_layoutget *lgp;
401 struct pnfs_layout_segment *lseg = NULL;
402
403 dprintk("--> %s\n", __func__);
404
405 BUG_ON(ctx == NULL);
406 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 if (lgp == NULL) {
408 put_layout_hdr(lo->inode);
409 return NULL;
410 }
411 lgp->args.minlength = NFS4_MAX_UINT64;
412 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 lgp->args.range.iomode = iomode;
414 lgp->args.range.offset = 0;
415 lgp->args.range.length = NFS4_MAX_UINT64;
416 lgp->args.type = server->pnfs_curr_ld->id;
417 lgp->args.inode = ino;
418 lgp->args.ctx = get_nfs_open_context(ctx);
419 lgp->lsegpp = &lseg;
420
421 /* Synchronously retrieve layout information from server and
422 * store in lseg.
423 */
424 nfs4_proc_layoutget(lgp);
425 if (!lseg) {
426 /* remember that LAYOUTGET failed and suspend trying */
427 set_bit(lo_fail_bit(iomode), &lo->state);
428 }
429 return lseg;
430}
431
432/*
433 * Compare two layout segments for sorting into layout cache.
434 * We want to preferentially return RW over RO layouts, so ensure those
435 * are seen first.
436 */
437static s64
438cmp_layout(u32 iomode1, u32 iomode2)
439{
440 /* read > read/write */
441 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
442}
443
444static void
445pnfs_insert_layout(struct pnfs_layout_hdr *lo,
446 struct pnfs_layout_segment *lseg)
447{
448 struct pnfs_layout_segment *lp;
449 int found = 0;
450
451 dprintk("%s:Begin\n", __func__);
452
453 assert_spin_locked(&lo->inode->i_lock);
454 if (list_empty(&lo->segs)) {
455 struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
456
457 spin_lock(&clp->cl_lock);
458 BUG_ON(!list_empty(&lo->layouts));
459 list_add_tail(&lo->layouts, &clp->cl_layouts);
460 spin_unlock(&clp->cl_lock);
461 }
462 list_for_each_entry(lp, &lo->segs, fi_list) {
463 if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
464 continue;
465 list_add_tail(&lseg->fi_list, &lp->fi_list);
466 dprintk("%s: inserted lseg %p "
467 "iomode %d offset %llu length %llu before "
468 "lp %p iomode %d offset %llu length %llu\n",
469 __func__, lseg, lseg->range.iomode,
470 lseg->range.offset, lseg->range.length,
471 lp, lp->range.iomode, lp->range.offset,
472 lp->range.length);
473 found = 1;
474 break;
475 }
476 if (!found) {
477 list_add_tail(&lseg->fi_list, &lo->segs);
478 dprintk("%s: inserted lseg %p "
479 "iomode %d offset %llu length %llu at tail\n",
480 __func__, lseg, lseg->range.iomode,
481 lseg->range.offset, lseg->range.length);
482 }
483 get_layout_hdr_locked(lo);
484
485 dprintk("%s:Return\n", __func__);
486}
487
488static struct pnfs_layout_hdr *
489alloc_init_layout_hdr(struct inode *ino)
490{
491 struct pnfs_layout_hdr *lo;
492
493 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
494 if (!lo)
495 return NULL;
496 lo->refcount = 1;
497 INIT_LIST_HEAD(&lo->layouts);
498 INIT_LIST_HEAD(&lo->segs);
499 seqlock_init(&lo->seqlock);
500 lo->inode = ino;
501 return lo;
502}
503
504static struct pnfs_layout_hdr *
505pnfs_find_alloc_layout(struct inode *ino)
506{
507 struct nfs_inode *nfsi = NFS_I(ino);
508 struct pnfs_layout_hdr *new = NULL;
509
510 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
511
512 assert_spin_locked(&ino->i_lock);
513 if (nfsi->layout)
514 return nfsi->layout;
515
516 spin_unlock(&ino->i_lock);
517 new = alloc_init_layout_hdr(ino);
518 spin_lock(&ino->i_lock);
519
520 if (likely(nfsi->layout == NULL)) /* Won the race? */
521 nfsi->layout = new;
522 else
523 kfree(new);
524 return nfsi->layout;
525}
526
527/*
528 * iomode matching rules:
529 * iomode lseg match
530 * ----- ----- -----
531 * ANY READ true
532 * ANY RW true
533 * RW READ false
534 * RW RW true
535 * READ READ true
536 * READ RW true
537 */
538static int
539is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540{
541 return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
542}
543
544/*
545 * lookup range in layout
546 */
547static struct pnfs_layout_segment *
548pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
549{
550 struct pnfs_layout_segment *lseg, *ret = NULL;
551
552 dprintk("%s:Begin\n", __func__);
553
554 assert_spin_locked(&lo->inode->i_lock);
555 list_for_each_entry(lseg, &lo->segs, fi_list) {
556 if (is_matching_lseg(lseg, iomode)) {
557 ret = lseg;
558 break;
559 }
560 if (cmp_layout(iomode, lseg->range.iomode) > 0)
561 break;
562 }
563
564 dprintk("%s:Return lseg %p ref %d\n",
565 __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
566 return ret;
567}
568
569/*
570 * Layout segment is retreived from the server if not cached.
571 * The appropriate layout segment is referenced and returned to the caller.
572 */
573struct pnfs_layout_segment *
574pnfs_update_layout(struct inode *ino,
575 struct nfs_open_context *ctx,
576 enum pnfs_iomode iomode)
577{
578 struct nfs_inode *nfsi = NFS_I(ino);
579 struct pnfs_layout_hdr *lo;
580 struct pnfs_layout_segment *lseg = NULL;
581
582 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
583 return NULL;
584 spin_lock(&ino->i_lock);
585 lo = pnfs_find_alloc_layout(ino);
586 if (lo == NULL) {
587 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
588 goto out_unlock;
589 }
590
591 /* Check to see if the layout for the given range already exists */
592 lseg = pnfs_has_layout(lo, iomode);
593 if (lseg) {
594 dprintk("%s: Using cached lseg %p for iomode %d)\n",
595 __func__, lseg, iomode);
596 goto out_unlock;
597 }
598
599 /* if LAYOUTGET already failed once we don't try again */
600 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
601 goto out_unlock;
602
603 get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
604 spin_unlock(&ino->i_lock);
605
606 lseg = send_layoutget(lo, ctx, iomode);
607out:
608 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
609 nfsi->layout->state, lseg);
610 return lseg;
611out_unlock:
612 spin_unlock(&ino->i_lock);
613 goto out;
614}
615
616int
617pnfs_layout_process(struct nfs4_layoutget *lgp)
618{
619 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 struct nfs4_layoutget_res *res = &lgp->res;
621 struct pnfs_layout_segment *lseg;
622 struct inode *ino = lo->inode;
623 int status = 0;
624
625 /* Inject layout blob into I/O device driver */
626 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 if (!lseg || IS_ERR(lseg)) {
628 if (!lseg)
629 status = -ENOMEM;
630 else
631 status = PTR_ERR(lseg);
632 dprintk("%s: Could not allocate layout: error %d\n",
633 __func__, status);
634 goto out;
635 }
636
637 spin_lock(&ino->i_lock);
638 init_lseg(lo, lseg);
639 lseg->range = res->range;
640 *lgp->lsegpp = lseg;
641 pnfs_insert_layout(lo, lseg);
642
643 /* Done processing layoutget. Set the layout stateid */
644 pnfs_set_layout_stateid(lo, &res->stateid);
645 spin_unlock(&ino->i_lock);
646out:
647 return status;
648}
649
650/*
651 * Device ID cache. Currently supports one layout type per struct nfs_client.
652 * Add layout type to the lookup key to expand to support multiple types.
653 */
654int
655pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
656 void (*free_callback)(struct pnfs_deviceid_node *))
657{
658 struct pnfs_deviceid_cache *c;
659
660 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
661 if (!c)
662 return -ENOMEM;
663 spin_lock(&clp->cl_lock);
664 if (clp->cl_devid_cache != NULL) {
665 atomic_inc(&clp->cl_devid_cache->dc_ref);
666 dprintk("%s [kref [%d]]\n", __func__,
667 atomic_read(&clp->cl_devid_cache->dc_ref));
668 kfree(c);
669 } else {
670 /* kzalloc initializes hlists */
671 spin_lock_init(&c->dc_lock);
672 atomic_set(&c->dc_ref, 1);
673 c->dc_free_callback = free_callback;
674 clp->cl_devid_cache = c;
675 dprintk("%s [new]\n", __func__);
676 }
677 spin_unlock(&clp->cl_lock);
678 return 0;
679}
680EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
681
682/*
683 * Called from pnfs_layoutdriver_type->free_lseg
684 * last layout segment reference frees deviceid
685 */
686void
687pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
688 struct pnfs_deviceid_node *devid)
689{
690 struct nfs4_deviceid *id = &devid->de_id;
691 struct pnfs_deviceid_node *d;
692 struct hlist_node *n;
693 long h = nfs4_deviceid_hash(id);
694
695 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
696 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
697 return;
698
699 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
700 if (!memcmp(&d->de_id, id, sizeof(*id))) {
701 hlist_del_rcu(&d->de_node);
702 spin_unlock(&c->dc_lock);
703 synchronize_rcu();
704 c->dc_free_callback(devid);
705 return;
706 }
707 spin_unlock(&c->dc_lock);
708 /* Why wasn't it found in the list? */
709 BUG();
710}
711EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
712
713/* Find and reference a deviceid */
714struct pnfs_deviceid_node *
715pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
716{
717 struct pnfs_deviceid_node *d;
718 struct hlist_node *n;
719 long hash = nfs4_deviceid_hash(id);
720
721 dprintk("--> %s hash %ld\n", __func__, hash);
722 rcu_read_lock();
723 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
724 if (!memcmp(&d->de_id, id, sizeof(*id))) {
725 if (!atomic_inc_not_zero(&d->de_ref)) {
726 goto fail;
727 } else {
728 rcu_read_unlock();
729 return d;
730 }
731 }
732 }
733fail:
734 rcu_read_unlock();
735 return NULL;
736}
737EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
738
739/*
740 * Add a deviceid to the cache.
741 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
742 */
743struct pnfs_deviceid_node *
744pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
745{
746 struct pnfs_deviceid_node *d;
747 long hash = nfs4_deviceid_hash(&new->de_id);
748
749 dprintk("--> %s hash %ld\n", __func__, hash);
750 spin_lock(&c->dc_lock);
751 d = pnfs_find_get_deviceid(c, &new->de_id);
752 if (d) {
753 spin_unlock(&c->dc_lock);
754 dprintk("%s [discard]\n", __func__);
755 c->dc_free_callback(new);
756 return d;
757 }
758 INIT_HLIST_NODE(&new->de_node);
759 atomic_set(&new->de_ref, 1);
760 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
761 spin_unlock(&c->dc_lock);
762 dprintk("%s [new]\n", __func__);
763 return new;
764}
765EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
766
767void
768pnfs_put_deviceid_cache(struct nfs_client *clp)
769{
770 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
771
772 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
773 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
774 int i;
775 /* Verify cache is empty */
776 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
777 BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
778 clp->cl_devid_cache = NULL;
779 spin_unlock(&clp->cl_lock);
780 kfree(local);
781 }
782}
783EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
new file mode 100644
index 000000000000..e12367d50489
--- /dev/null
+++ b/fs/nfs/pnfs.h
@@ -0,0 +1,189 @@
1/*
2 * pNFS client data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H
32
33struct pnfs_layout_segment {
34 struct list_head fi_list;
35 struct pnfs_layout_range range;
36 struct kref kref;
37 struct pnfs_layout_hdr *layout;
38};
39
40#ifdef CONFIG_NFS_V4_1
41
42#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
43
44enum {
45 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
46 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
47 NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
48};
49
50/* Per-layout driver specific registration structure */
51struct pnfs_layoutdriver_type {
52 struct list_head pnfs_tblid;
53 const u32 id;
54 const char *name;
55 struct module *owner;
56 int (*set_layoutdriver) (struct nfs_server *);
57 int (*clear_layoutdriver) (struct nfs_server *);
58 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
59 void (*free_lseg) (struct pnfs_layout_segment *lseg);
60};
61
62struct pnfs_layout_hdr {
63 unsigned long refcount;
64 struct list_head layouts; /* other client layouts */
65 struct list_head segs; /* layout segments list */
66 seqlock_t seqlock; /* Protects the stateid */
67 nfs4_stateid stateid;
68 unsigned long state;
69 struct inode *inode;
70};
71
72struct pnfs_device {
73 struct nfs4_deviceid dev_id;
74 unsigned int layout_type;
75 unsigned int mincount;
76 struct page **pages;
77 void *area;
78 unsigned int pgbase;
79 unsigned int pglen;
80};
81
82/*
83 * Device ID RCU cache. A device ID is unique per client ID and layout type.
84 */
85#define NFS4_DEVICE_ID_HASH_BITS 5
86#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
87#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
88
89static inline u32
90nfs4_deviceid_hash(struct nfs4_deviceid *id)
91{
92 unsigned char *cptr = (unsigned char *)id->data;
93 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
94 u32 x = 0;
95
96 while (nbytes--) {
97 x *= 37;
98 x += *cptr++;
99 }
100 return x & NFS4_DEVICE_ID_HASH_MASK;
101}
102
103struct pnfs_deviceid_node {
104 struct hlist_node de_node;
105 struct nfs4_deviceid de_id;
106 atomic_t de_ref;
107};
108
109struct pnfs_deviceid_cache {
110 spinlock_t dc_lock;
111 atomic_t dc_ref;
112 void (*dc_free_callback)(struct pnfs_deviceid_node *);
113 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
114};
115
116extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
117 void (*free_callback)(struct pnfs_deviceid_node *));
118extern void pnfs_put_deviceid_cache(struct nfs_client *);
119extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
120 struct pnfs_deviceid_cache *,
121 struct nfs4_deviceid *);
122extern struct pnfs_deviceid_node *pnfs_add_deviceid(
123 struct pnfs_deviceid_cache *,
124 struct pnfs_deviceid_node *);
125extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
126 struct pnfs_deviceid_node *devid);
127
128extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
129extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
130
131/* nfs4proc.c */
132extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
133 struct pnfs_device *dev);
134extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
135
136/* pnfs.c */
137struct pnfs_layout_segment *
138pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
139 enum pnfs_iomode access_type);
140void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
141void unset_pnfs_layoutdriver(struct nfs_server *);
142int pnfs_layout_process(struct nfs4_layoutget *lgp);
143void pnfs_destroy_layout(struct nfs_inode *);
144void pnfs_destroy_all_layouts(struct nfs_client *);
145void put_layout_hdr(struct inode *inode);
146void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
147 struct nfs4_state *open_state);
148
149
150static inline int lo_fail_bit(u32 iomode)
151{
152 return iomode == IOMODE_RW ?
153 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
154}
155
156/* Return true if a layout driver is being used for this mountpoint */
157static inline int pnfs_enabled_sb(struct nfs_server *nfss)
158{
159 return nfss->pnfs_curr_ld != NULL;
160}
161
162#else /* CONFIG_NFS_V4_1 */
163
164static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
165{
166}
167
168static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
169{
170}
171
172static inline struct pnfs_layout_segment *
173pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
174 enum pnfs_iomode access_type)
175{
176 return NULL;
177}
178
179static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
180{
181}
182
183static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
184{
185}
186
187#endif /* CONFIG_NFS_V4_1 */
188
189#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 611bec22f552..58e7f84fc1fd 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -258,7 +258,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data)
258 258
259static int 259static int
260nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 260nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
261 int flags, struct nameidata *nd) 261 int flags, struct nfs_open_context *ctx)
262{ 262{
263 struct nfs_createdata *data; 263 struct nfs_createdata *data;
264 struct rpc_message msg = { 264 struct rpc_message msg = {
@@ -365,17 +365,32 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
365 return 1; 365 return 1;
366} 366}
367 367
368static void
369nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
370{
371 msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
372}
373
374static int
375nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
376 struct inode *new_dir)
377{
378 if (nfs_async_handle_expired_key(task))
379 return 0;
380 nfs_mark_for_revalidate(old_dir);
381 nfs_mark_for_revalidate(new_dir);
382 return 1;
383}
384
368static int 385static int
369nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, 386nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
370 struct inode *new_dir, struct qstr *new_name) 387 struct inode *new_dir, struct qstr *new_name)
371{ 388{
372 struct nfs_renameargs arg = { 389 struct nfs_renameargs arg = {
373 .fromfh = NFS_FH(old_dir), 390 .old_dir = NFS_FH(old_dir),
374 .fromname = old_name->name, 391 .old_name = old_name,
375 .fromlen = old_name->len, 392 .new_dir = NFS_FH(new_dir),
376 .tofh = NFS_FH(new_dir), 393 .new_name = new_name,
377 .toname = new_name->name,
378 .tolen = new_name->len
379 }; 394 };
380 struct rpc_message msg = { 395 struct rpc_message msg = {
381 .rpc_proc = &nfs_procedures[NFSPROC_RENAME], 396 .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
@@ -519,14 +534,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
519 */ 534 */
520static int 535static int
521nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 536nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
522 u64 cookie, struct page *page, unsigned int count, int plus) 537 u64 cookie, struct page **pages, unsigned int count, int plus)
523{ 538{
524 struct inode *dir = dentry->d_inode; 539 struct inode *dir = dentry->d_inode;
525 struct nfs_readdirargs arg = { 540 struct nfs_readdirargs arg = {
526 .fh = NFS_FH(dir), 541 .fh = NFS_FH(dir),
527 .cookie = cookie, 542 .cookie = cookie,
528 .count = count, 543 .count = count,
529 .pages = &page, 544 .pages = pages,
530 }; 545 };
531 struct rpc_message msg = { 546 struct rpc_message msg = {
532 .rpc_proc = &nfs_procedures[NFSPROC_READDIR], 547 .rpc_proc = &nfs_procedures[NFSPROC_READDIR],
@@ -705,6 +720,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
705 .unlink_setup = nfs_proc_unlink_setup, 720 .unlink_setup = nfs_proc_unlink_setup,
706 .unlink_done = nfs_proc_unlink_done, 721 .unlink_done = nfs_proc_unlink_done,
707 .rename = nfs_proc_rename, 722 .rename = nfs_proc_rename,
723 .rename_setup = nfs_proc_rename_setup,
724 .rename_done = nfs_proc_rename_done,
708 .link = nfs_proc_link, 725 .link = nfs_proc_link,
709 .symlink = nfs_proc_symlink, 726 .symlink = nfs_proc_symlink,
710 .mkdir = nfs_proc_mkdir, 727 .mkdir = nfs_proc_mkdir,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 87adc2744246..e4b62c6f5a6e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -25,6 +25,7 @@
25#include "internal.h" 25#include "internal.h"
26#include "iostat.h" 26#include "iostat.h"
27#include "fscache.h" 27#include "fscache.h"
28#include "pnfs.h"
28 29
29#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
30 31
@@ -46,7 +47,6 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
46 memset(p, 0, sizeof(*p)); 47 memset(p, 0, sizeof(*p));
47 INIT_LIST_HEAD(&p->pages); 48 INIT_LIST_HEAD(&p->pages);
48 p->npages = pagecount; 49 p->npages = pagecount;
49 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
50 if (pagecount <= ARRAY_SIZE(p->page_array)) 50 if (pagecount <= ARRAY_SIZE(p->page_array))
51 p->pagevec = p->page_array; 51 p->pagevec = p->page_array;
52 else { 52 else {
@@ -121,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
121 len = nfs_page_length(page); 121 len = nfs_page_length(page);
122 if (len == 0) 122 if (len == 0)
123 return nfs_return_empty_page(page); 123 return nfs_return_empty_page(page);
124 pnfs_update_layout(inode, ctx, IOMODE_READ);
124 new = nfs_create_request(ctx, inode, page, 0, len); 125 new = nfs_create_request(ctx, inode, page, 0, len);
125 if (IS_ERR(new)) { 126 if (IS_ERR(new)) {
126 unlock_page(page); 127 unlock_page(page);
@@ -625,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
625 if (ret == 0) 626 if (ret == 0)
626 goto read_complete; /* all pages were read */ 627 goto read_complete; /* all pages were read */
627 628
629 pnfs_update_layout(inode, desc.ctx, IOMODE_READ);
628 if (rsize < PAGE_CACHE_SIZE) 630 if (rsize < PAGE_CACHE_SIZE)
629 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 631 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
630 else 632 else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f4cbf0c306c6..3600ec700d58 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -100,6 +100,7 @@ enum {
100 Opt_addr, Opt_mountaddr, Opt_clientaddr, 100 Opt_addr, Opt_mountaddr, Opt_clientaddr,
101 Opt_lookupcache, 101 Opt_lookupcache,
102 Opt_fscache_uniq, 102 Opt_fscache_uniq,
103 Opt_local_lock,
103 104
104 /* Special mount options */ 105 /* Special mount options */
105 Opt_userspace, Opt_deprecated, Opt_sloppy, 106 Opt_userspace, Opt_deprecated, Opt_sloppy,
@@ -171,6 +172,7 @@ static const match_table_t nfs_mount_option_tokens = {
171 172
172 { Opt_lookupcache, "lookupcache=%s" }, 173 { Opt_lookupcache, "lookupcache=%s" },
173 { Opt_fscache_uniq, "fsc=%s" }, 174 { Opt_fscache_uniq, "fsc=%s" },
175 { Opt_local_lock, "local_lock=%s" },
174 176
175 { Opt_err, NULL } 177 { Opt_err, NULL }
176}; 178};
@@ -236,6 +238,22 @@ static match_table_t nfs_lookupcache_tokens = {
236 { Opt_lookupcache_err, NULL } 238 { Opt_lookupcache_err, NULL }
237}; 239};
238 240
241enum {
242 Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix,
243 Opt_local_lock_none,
244
245 Opt_local_lock_err
246};
247
248static match_table_t nfs_local_lock_tokens = {
249 { Opt_local_lock_all, "all" },
250 { Opt_local_lock_flock, "flock" },
251 { Opt_local_lock_posix, "posix" },
252 { Opt_local_lock_none, "none" },
253
254 { Opt_local_lock_err, NULL }
255};
256
239 257
240static void nfs_umount_begin(struct super_block *); 258static void nfs_umount_begin(struct super_block *);
241static int nfs_statfs(struct dentry *, struct kstatfs *); 259static int nfs_statfs(struct dentry *, struct kstatfs *);
@@ -622,6 +640,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
622 const struct proc_nfs_info *nfs_infop; 640 const struct proc_nfs_info *nfs_infop;
623 struct nfs_client *clp = nfss->nfs_client; 641 struct nfs_client *clp = nfss->nfs_client;
624 u32 version = clp->rpc_ops->version; 642 u32 version = clp->rpc_ops->version;
643 int local_flock, local_fcntl;
625 644
626 seq_printf(m, ",vers=%u", version); 645 seq_printf(m, ",vers=%u", version);
627 seq_printf(m, ",rsize=%u", nfss->rsize); 646 seq_printf(m, ",rsize=%u", nfss->rsize);
@@ -670,6 +689,18 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
670 else 689 else
671 seq_printf(m, ",lookupcache=pos"); 690 seq_printf(m, ",lookupcache=pos");
672 } 691 }
692
693 local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK;
694 local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL;
695
696 if (!local_flock && !local_fcntl)
697 seq_printf(m, ",local_lock=none");
698 else if (local_flock && local_fcntl)
699 seq_printf(m, ",local_lock=all");
700 else if (local_flock)
701 seq_printf(m, ",local_lock=flock");
702 else
703 seq_printf(m, ",local_lock=posix");
673} 704}
674 705
675/* 706/*
@@ -1017,9 +1048,13 @@ static int nfs_parse_mount_options(char *raw,
1017 break; 1048 break;
1018 case Opt_lock: 1049 case Opt_lock:
1019 mnt->flags &= ~NFS_MOUNT_NONLM; 1050 mnt->flags &= ~NFS_MOUNT_NONLM;
1051 mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
1052 NFS_MOUNT_LOCAL_FCNTL);
1020 break; 1053 break;
1021 case Opt_nolock: 1054 case Opt_nolock:
1022 mnt->flags |= NFS_MOUNT_NONLM; 1055 mnt->flags |= NFS_MOUNT_NONLM;
1056 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
1057 NFS_MOUNT_LOCAL_FCNTL);
1023 break; 1058 break;
1024 case Opt_v2: 1059 case Opt_v2:
1025 mnt->flags &= ~NFS_MOUNT_VER3; 1060 mnt->flags &= ~NFS_MOUNT_VER3;
@@ -1420,6 +1455,34 @@ static int nfs_parse_mount_options(char *raw,
1420 mnt->fscache_uniq = string; 1455 mnt->fscache_uniq = string;
1421 mnt->options |= NFS_OPTION_FSCACHE; 1456 mnt->options |= NFS_OPTION_FSCACHE;
1422 break; 1457 break;
1458 case Opt_local_lock:
1459 string = match_strdup(args);
1460 if (string == NULL)
1461 goto out_nomem;
1462 token = match_token(string, nfs_local_lock_tokens,
1463 args);
1464 kfree(string);
1465 switch (token) {
1466 case Opt_local_lock_all:
1467 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
1468 NFS_MOUNT_LOCAL_FCNTL);
1469 break;
1470 case Opt_local_lock_flock:
1471 mnt->flags |= NFS_MOUNT_LOCAL_FLOCK;
1472 break;
1473 case Opt_local_lock_posix:
1474 mnt->flags |= NFS_MOUNT_LOCAL_FCNTL;
1475 break;
1476 case Opt_local_lock_none:
1477 mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
1478 NFS_MOUNT_LOCAL_FCNTL);
1479 break;
1480 default:
1481 dfprintk(MOUNT, "NFS: invalid "
1482 "local_lock argument\n");
1483 return 0;
1484 };
1485 break;
1423 1486
1424 /* 1487 /*
1425 * Special options 1488 * Special options
@@ -1825,6 +1888,12 @@ static int nfs_validate_mount_data(void *options,
1825 if (!args->nfs_server.hostname) 1888 if (!args->nfs_server.hostname)
1826 goto out_nomem; 1889 goto out_nomem;
1827 1890
1891 if (!(data->flags & NFS_MOUNT_NONLM))
1892 args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
1893 NFS_MOUNT_LOCAL_FCNTL);
1894 else
1895 args->flags |= (NFS_MOUNT_LOCAL_FLOCK|
1896 NFS_MOUNT_LOCAL_FCNTL);
1828 /* 1897 /*
1829 * The legacy version 6 binary mount data from userspace has a 1898 * The legacy version 6 binary mount data from userspace has a
1830 * field used only to transport selinux information into the 1899 * field used only to transport selinux information into the
@@ -2441,7 +2510,8 @@ static void nfs4_fill_super(struct super_block *sb)
2441 2510
2442static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) 2511static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2443{ 2512{
2444 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); 2513 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3|
2514 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
2445} 2515}
2446 2516
2447static int nfs4_validate_text_mount_data(void *options, 2517static int nfs4_validate_text_mount_data(void *options,
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index ad4d2e787b20..978aaeb8a093 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -32,6 +32,7 @@ static ctl_table nfs_cb_sysctls[] = {
32 .extra1 = (int *)&nfs_set_port_min, 32 .extra1 = (int *)&nfs_set_port_min,
33 .extra2 = (int *)&nfs_set_port_max, 33 .extra2 = (int *)&nfs_set_port_max,
34 }, 34 },
35#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
35 { 36 {
36 .procname = "idmap_cache_timeout", 37 .procname = "idmap_cache_timeout",
37 .data = &nfs_idmap_cache_timeout, 38 .data = &nfs_idmap_cache_timeout,
@@ -39,6 +40,7 @@ static ctl_table nfs_cb_sysctls[] = {
39 .mode = 0644, 40 .mode = 0644,
40 .proc_handler = proc_dointvec_jiffies, 41 .proc_handler = proc_dointvec_jiffies,
41 }, 42 },
43#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
42#endif 44#endif
43 { 45 {
44 .procname = "nfs_mountpoint_timeout", 46 .procname = "nfs_mountpoint_timeout",
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 2f84adaad427..9a16bad5d2ea 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -13,9 +13,12 @@
13#include <linux/nfs_fs.h> 13#include <linux/nfs_fs.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/wait.h> 15#include <linux/wait.h>
16#include <linux/namei.h>
16 17
17#include "internal.h" 18#include "internal.h"
18#include "nfs4_fs.h" 19#include "nfs4_fs.h"
20#include "iostat.h"
21#include "delegation.h"
19 22
20struct nfs_unlinkdata { 23struct nfs_unlinkdata {
21 struct hlist_node list; 24 struct hlist_node list;
@@ -244,7 +247,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry)
244 * @dir: parent directory of dentry 247 * @dir: parent directory of dentry
245 * @dentry: dentry to unlink 248 * @dentry: dentry to unlink
246 */ 249 */
247int 250static int
248nfs_async_unlink(struct inode *dir, struct dentry *dentry) 251nfs_async_unlink(struct inode *dir, struct dentry *dentry)
249{ 252{
250 struct nfs_unlinkdata *data; 253 struct nfs_unlinkdata *data;
@@ -259,7 +262,6 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
259 status = PTR_ERR(data->cred); 262 status = PTR_ERR(data->cred);
260 goto out_free; 263 goto out_free;
261 } 264 }
262 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
263 data->res.dir_attr = &data->dir_attr; 265 data->res.dir_attr = &data->dir_attr;
264 266
265 status = -EBUSY; 267 status = -EBUSY;
@@ -303,3 +305,256 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
303 if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) 305 if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)))
304 nfs_free_unlinkdata(data); 306 nfs_free_unlinkdata(data);
305} 307}
308
309/* Cancel a queued async unlink. Called when a sillyrename run fails. */
310static void
311nfs_cancel_async_unlink(struct dentry *dentry)
312{
313 spin_lock(&dentry->d_lock);
314 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
315 struct nfs_unlinkdata *data = dentry->d_fsdata;
316
317 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
318 spin_unlock(&dentry->d_lock);
319 nfs_free_unlinkdata(data);
320 return;
321 }
322 spin_unlock(&dentry->d_lock);
323}
324
325struct nfs_renamedata {
326 struct nfs_renameargs args;
327 struct nfs_renameres res;
328 struct rpc_cred *cred;
329 struct inode *old_dir;
330 struct dentry *old_dentry;
331 struct nfs_fattr old_fattr;
332 struct inode *new_dir;
333 struct dentry *new_dentry;
334 struct nfs_fattr new_fattr;
335};
336
337/**
338 * nfs_async_rename_done - Sillyrename post-processing
339 * @task: rpc_task of the sillyrename
340 * @calldata: nfs_renamedata for the sillyrename
341 *
342 * Do the directory attribute updates and the d_move
343 */
344static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
345{
346 struct nfs_renamedata *data = calldata;
347 struct inode *old_dir = data->old_dir;
348 struct inode *new_dir = data->new_dir;
349
350 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
351 nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
352 return;
353 }
354
355 if (task->tk_status != 0) {
356 nfs_cancel_async_unlink(data->old_dentry);
357 return;
358 }
359
360 nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir));
361 d_move(data->old_dentry, data->new_dentry);
362}
363
364/**
365 * nfs_async_rename_release - Release the sillyrename data.
366 * @calldata: the struct nfs_renamedata to be released
367 */
368static void nfs_async_rename_release(void *calldata)
369{
370 struct nfs_renamedata *data = calldata;
371 struct super_block *sb = data->old_dir->i_sb;
372
373 if (data->old_dentry->d_inode)
374 nfs_mark_for_revalidate(data->old_dentry->d_inode);
375
376 dput(data->old_dentry);
377 dput(data->new_dentry);
378 iput(data->old_dir);
379 iput(data->new_dir);
380 nfs_sb_deactive(sb);
381 put_rpccred(data->cred);
382 kfree(data);
383}
384
385#if defined(CONFIG_NFS_V4_1)
386static void nfs_rename_prepare(struct rpc_task *task, void *calldata)
387{
388 struct nfs_renamedata *data = calldata;
389 struct nfs_server *server = NFS_SERVER(data->old_dir);
390
391 if (nfs4_setup_sequence(server, &data->args.seq_args,
392 &data->res.seq_res, 1, task))
393 return;
394 rpc_call_start(task);
395}
396#endif /* CONFIG_NFS_V4_1 */
397
398static const struct rpc_call_ops nfs_rename_ops = {
399 .rpc_call_done = nfs_async_rename_done,
400 .rpc_release = nfs_async_rename_release,
401#if defined(CONFIG_NFS_V4_1)
402 .rpc_call_prepare = nfs_rename_prepare,
403#endif /* CONFIG_NFS_V4_1 */
404};
405
406/**
407 * nfs_async_rename - perform an asynchronous rename operation
408 * @old_dir: directory that currently holds the dentry to be renamed
409 * @new_dir: target directory for the rename
410 * @old_dentry: original dentry to be renamed
411 * @new_dentry: dentry to which the old_dentry should be renamed
412 *
413 * It's expected that valid references to the dentries and inodes are held
414 */
415static struct rpc_task *
416nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
417 struct dentry *old_dentry, struct dentry *new_dentry)
418{
419 struct nfs_renamedata *data;
420 struct rpc_message msg = { };
421 struct rpc_task_setup task_setup_data = {
422 .rpc_message = &msg,
423 .callback_ops = &nfs_rename_ops,
424 .workqueue = nfsiod_workqueue,
425 .rpc_client = NFS_CLIENT(old_dir),
426 .flags = RPC_TASK_ASYNC,
427 };
428
429 data = kzalloc(sizeof(*data), GFP_KERNEL);
430 if (data == NULL)
431 return ERR_PTR(-ENOMEM);
432 task_setup_data.callback_data = data,
433
434 data->cred = rpc_lookup_cred();
435 if (IS_ERR(data->cred)) {
436 struct rpc_task *task = ERR_CAST(data->cred);
437 kfree(data);
438 return task;
439 }
440
441 msg.rpc_argp = &data->args;
442 msg.rpc_resp = &data->res;
443 msg.rpc_cred = data->cred;
444
445 /* set up nfs_renamedata */
446 data->old_dir = old_dir;
447 atomic_inc(&old_dir->i_count);
448 data->new_dir = new_dir;
449 atomic_inc(&new_dir->i_count);
450 data->old_dentry = dget(old_dentry);
451 data->new_dentry = dget(new_dentry);
452 nfs_fattr_init(&data->old_fattr);
453 nfs_fattr_init(&data->new_fattr);
454
455 /* set up nfs_renameargs */
456 data->args.old_dir = NFS_FH(old_dir);
457 data->args.old_name = &old_dentry->d_name;
458 data->args.new_dir = NFS_FH(new_dir);
459 data->args.new_name = &new_dentry->d_name;
460
461 /* set up nfs_renameres */
462 data->res.old_fattr = &data->old_fattr;
463 data->res.new_fattr = &data->new_fattr;
464
465 nfs_sb_active(old_dir->i_sb);
466
467 NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir);
468
469 return rpc_run_task(&task_setup_data);
470}
471
472/**
473 * nfs_sillyrename - Perform a silly-rename of a dentry
474 * @dir: inode of directory that contains dentry
475 * @dentry: dentry to be sillyrenamed
476 *
477 * NFSv2/3 is stateless and the server doesn't know when the client is
478 * holding a file open. To prevent application problems when a file is
479 * unlinked while it's still open, the client performs a "silly-rename".
480 * That is, it renames the file to a hidden file in the same directory,
481 * and only performs the unlink once the last reference to it is put.
482 *
483 * The final cleanup is done during dentry_iput.
484 */
485int
486nfs_sillyrename(struct inode *dir, struct dentry *dentry)
487{
488 static unsigned int sillycounter;
489 const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
490 const int countersize = sizeof(sillycounter)*2;
491 const int slen = sizeof(".nfs")+fileidsize+countersize-1;
492 char silly[slen+1];
493 struct dentry *sdentry;
494 struct rpc_task *task;
495 int error = -EIO;
496
497 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
498 dentry->d_parent->d_name.name, dentry->d_name.name,
499 atomic_read(&dentry->d_count));
500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
501
502 /*
503 * We don't allow a dentry to be silly-renamed twice.
504 */
505 error = -EBUSY;
506 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
507 goto out;
508
509 sprintf(silly, ".nfs%*.*Lx",
510 fileidsize, fileidsize,
511 (unsigned long long)NFS_FILEID(dentry->d_inode));
512
513 /* Return delegation in anticipation of the rename */
514 nfs_inode_return_delegation(dentry->d_inode);
515
516 sdentry = NULL;
517 do {
518 char *suffix = silly + slen - countersize;
519
520 dput(sdentry);
521 sillycounter++;
522 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
523
524 dfprintk(VFS, "NFS: trying to rename %s to %s\n",
525 dentry->d_name.name, silly);
526
527 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
528 /*
529 * N.B. Better to return EBUSY here ... it could be
530 * dangerous to delete the file while it's in use.
531 */
532 if (IS_ERR(sdentry))
533 goto out;
534 } while (sdentry->d_inode != NULL); /* need negative lookup */
535
536 /* queue unlink first. Can't do this from rpc_release as it
537 * has to allocate memory
538 */
539 error = nfs_async_unlink(dir, dentry);
540 if (error)
541 goto out_dput;
542
543 /* run the rename task, undo unlink if it fails */
544 task = nfs_async_rename(dir, dir, dentry, sdentry);
545 if (IS_ERR(task)) {
546 error = -EBUSY;
547 nfs_cancel_async_unlink(dentry);
548 goto out_dput;
549 }
550
551 /* wait for the RPC task to complete, unless a SIGKILL intervenes */
552 error = rpc_wait_for_completion_task(task);
553 if (error == 0)
554 error = task->tk_status;
555 rpc_put_task(task);
556out_dput:
557 dput(sdentry);
558out:
559 return error;
560}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 874972d9427c..4c14c17a5276 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -55,7 +55,6 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
55 if (p) { 55 if (p) {
56 memset(p, 0, sizeof(*p)); 56 memset(p, 0, sizeof(*p));
57 INIT_LIST_HEAD(&p->pages); 57 INIT_LIST_HEAD(&p->pages);
58 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
59 } 58 }
60 return p; 59 return p;
61} 60}
@@ -75,7 +74,6 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
75 memset(p, 0, sizeof(*p)); 74 memset(p, 0, sizeof(*p));
76 INIT_LIST_HEAD(&p->pages); 75 INIT_LIST_HEAD(&p->pages);
77 p->npages = pagecount; 76 p->npages = pagecount;
78 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
79 if (pagecount <= ARRAY_SIZE(p->page_array)) 77 if (pagecount <= ARRAY_SIZE(p->page_array))
80 p->pagevec = p->page_array; 78 p->pagevec = p->page_array;
81 else { 79 else {
@@ -292,9 +290,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
292 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 290 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
293 291
294 nfs_pageio_cond_complete(pgio, page->index); 292 nfs_pageio_cond_complete(pgio, page->index);
295 ret = nfs_page_async_flush(pgio, page, 293 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
296 wbc->sync_mode == WB_SYNC_NONE ||
297 wbc->nonblocking != 0);
298 if (ret == -EAGAIN) { 294 if (ret == -EAGAIN) {
299 redirty_page_for_writepage(wbc, page); 295 redirty_page_for_writepage(wbc, page);
300 ret = 0; 296 ret = 0;
@@ -1433,15 +1429,17 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1433 int flags = FLUSH_SYNC; 1429 int flags = FLUSH_SYNC;
1434 int ret = 0; 1430 int ret = 0;
1435 1431
1436 /* Don't commit yet if this is a non-blocking flush and there are 1432 if (wbc->sync_mode == WB_SYNC_NONE) {
1437 * lots of outstanding writes for this mapping. 1433 /* Don't commit yet if this is a non-blocking flush and there
1438 */ 1434 * are a lot of outstanding writes for this mapping.
1439 if (wbc->sync_mode == WB_SYNC_NONE && 1435 */
1440 nfsi->ncommit <= (nfsi->npages >> 1)) 1436 if (nfsi->ncommit <= (nfsi->npages >> 1))
1441 goto out_mark_dirty; 1437 goto out_mark_dirty;
1442 1438
1443 if (wbc->nonblocking || wbc->for_background) 1439 /* don't wait for the COMMIT response */
1444 flags = 0; 1440 flags = 0;
1441 }
1442
1445 ret = nfs_commit_inode(inode, flags); 1443 ret = nfs_commit_inode(inode, flags);
1446 if (ret >= 0) { 1444 if (ret >= 0) {
1447 if (wbc->sync_mode == WB_SYNC_NONE) { 1445 if (wbc->sync_mode == WB_SYNC_NONE) {