aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-03-25 13:03:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-25 13:03:28 -0400
commit40471856f2e38e9bfa8d605295e8234421110dd6 (patch)
tree9757e42e40bdbfcff7c52ab133e32b5c2203153b /fs
parentae005cbed12d0b340b04b59d6f5c56e710b3895d (diff)
parent0acd2201920d0968919f4f5797d63f7b6f2b19d4 (diff)
Merge branch 'nfs-for-2.6.39' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6
* 'nfs-for-2.6.39' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6: (28 commits) Cleanup XDR parsing for LAYOUTGET, GETDEVICEINFO NFSv4.1 convert layoutcommit sync to boolean NFSv4.1 pnfs_layoutcommit_inode fixes NFS: Determine initial mount security NFS: use secinfo when crossing mountpoints NFS: Add secinfo procedure NFS: lookup supports alternate client NFS: convert call_sync() to a function NFSv4.1 remove temp code that prevented ds commits NFSv4.1: layoutcommit NFSv4.1: filelayout driver specific code for COMMIT NFSv4.1: remove GETATTR from ds commits NFSv4.1: add generic layer hooks for pnfs COMMIT NFSv4.1: alloc and free commit_buckets NFSv4.1: shift filelayout_free_lseg NFSv4.1: pull out code from nfs_commit_release NFSv4.1: pull error handling out of nfs_commit_list NFSv4.1: add callback to nfs4_commit_done NFSv4.1: rearrange nfs_commit_rpcsetup NFSv4.1: don't send COMMIT to ds for data sync writes ...
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/dir.c89
-rw-r--r--fs/nfs/file.c3
-rw-r--r--fs/nfs/getroot.c4
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/internal.h27
-rw-r--r--fs/nfs/namespace.c115
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4filelayout.c352
-rw-r--r--fs/nfs/nfs4filelayout.h2
-rw-r--r--fs/nfs/nfs4filelayoutdev.c178
-rw-r--r--fs/nfs/nfs4proc.c302
-rw-r--r--fs/nfs/nfs4xdr.c313
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/pnfs.c142
-rw-r--r--fs/nfs/pnfs.h83
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/write.c214
-rw-r--r--fs/nfs_common/nfsacl.c1
19 files changed, 1600 insertions, 252 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index abdf38d5971d..7237672216c8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -44,6 +44,7 @@
44/* #define NFS_DEBUG_VERBOSE 1 */ 44/* #define NFS_DEBUG_VERBOSE 1 */
45 45
46static int nfs_opendir(struct inode *, struct file *); 46static int nfs_opendir(struct inode *, struct file *);
47static int nfs_closedir(struct inode *, struct file *);
47static int nfs_readdir(struct file *, void *, filldir_t); 48static int nfs_readdir(struct file *, void *, filldir_t);
48static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); 49static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
49static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); 50static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
@@ -64,7 +65,7 @@ const struct file_operations nfs_dir_operations = {
64 .read = generic_read_dir, 65 .read = generic_read_dir,
65 .readdir = nfs_readdir, 66 .readdir = nfs_readdir,
66 .open = nfs_opendir, 67 .open = nfs_opendir,
67 .release = nfs_release, 68 .release = nfs_closedir,
68 .fsync = nfs_fsync_dir, 69 .fsync = nfs_fsync_dir,
69}; 70};
70 71
@@ -133,13 +134,35 @@ const struct inode_operations nfs4_dir_inode_operations = {
133 134
134#endif /* CONFIG_NFS_V4 */ 135#endif /* CONFIG_NFS_V4 */
135 136
137static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred)
138{
139 struct nfs_open_dir_context *ctx;
140 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
141 if (ctx != NULL) {
142 ctx->duped = 0;
143 ctx->dir_cookie = 0;
144 ctx->dup_cookie = 0;
145 ctx->cred = get_rpccred(cred);
146 } else
147 ctx = ERR_PTR(-ENOMEM);
148 return ctx;
149}
150
151static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx)
152{
153 put_rpccred(ctx->cred);
154 kfree(ctx);
155}
156
136/* 157/*
137 * Open file 158 * Open file
138 */ 159 */
139static int 160static int
140nfs_opendir(struct inode *inode, struct file *filp) 161nfs_opendir(struct inode *inode, struct file *filp)
141{ 162{
142 int res; 163 int res = 0;
164 struct nfs_open_dir_context *ctx;
165 struct rpc_cred *cred;
143 166
144 dfprintk(FILE, "NFS: open dir(%s/%s)\n", 167 dfprintk(FILE, "NFS: open dir(%s/%s)\n",
145 filp->f_path.dentry->d_parent->d_name.name, 168 filp->f_path.dentry->d_parent->d_name.name,
@@ -147,8 +170,15 @@ nfs_opendir(struct inode *inode, struct file *filp)
147 170
148 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 171 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
149 172
150 /* Call generic open code in order to cache credentials */ 173 cred = rpc_lookup_cred();
151 res = nfs_open(inode, filp); 174 if (IS_ERR(cred))
175 return PTR_ERR(cred);
176 ctx = alloc_nfs_open_dir_context(cred);
177 if (IS_ERR(ctx)) {
178 res = PTR_ERR(ctx);
179 goto out;
180 }
181 filp->private_data = ctx;
152 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { 182 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
153 /* This is a mountpoint, so d_revalidate will never 183 /* This is a mountpoint, so d_revalidate will never
154 * have been called, so we need to refresh the 184 * have been called, so we need to refresh the
@@ -156,9 +186,18 @@ nfs_opendir(struct inode *inode, struct file *filp)
156 */ 186 */
157 __nfs_revalidate_inode(NFS_SERVER(inode), inode); 187 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
158 } 188 }
189out:
190 put_rpccred(cred);
159 return res; 191 return res;
160} 192}
161 193
194static int
195nfs_closedir(struct inode *inode, struct file *filp)
196{
197 put_nfs_open_dir_context(filp->private_data);
198 return 0;
199}
200
162struct nfs_cache_array_entry { 201struct nfs_cache_array_entry {
163 u64 cookie; 202 u64 cookie;
164 u64 ino; 203 u64 ino;
@@ -284,19 +323,20 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
284{ 323{
285 loff_t diff = desc->file->f_pos - desc->current_index; 324 loff_t diff = desc->file->f_pos - desc->current_index;
286 unsigned int index; 325 unsigned int index;
326 struct nfs_open_dir_context *ctx = desc->file->private_data;
287 327
288 if (diff < 0) 328 if (diff < 0)
289 goto out_eof; 329 goto out_eof;
290 if (diff >= array->size) { 330 if (diff >= array->size) {
291 if (array->eof_index >= 0) 331 if (array->eof_index >= 0)
292 goto out_eof; 332 goto out_eof;
293 desc->current_index += array->size;
294 return -EAGAIN; 333 return -EAGAIN;
295 } 334 }
296 335
297 index = (unsigned int)diff; 336 index = (unsigned int)diff;
298 *desc->dir_cookie = array->array[index].cookie; 337 *desc->dir_cookie = array->array[index].cookie;
299 desc->cache_entry_index = index; 338 desc->cache_entry_index = index;
339 ctx->duped = 0;
300 return 0; 340 return 0;
301out_eof: 341out_eof:
302 desc->eof = 1; 342 desc->eof = 1;
@@ -307,10 +347,18 @@ static
307int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) 347int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
308{ 348{
309 int i; 349 int i;
350 loff_t new_pos;
310 int status = -EAGAIN; 351 int status = -EAGAIN;
352 struct nfs_open_dir_context *ctx = desc->file->private_data;
311 353
312 for (i = 0; i < array->size; i++) { 354 for (i = 0; i < array->size; i++) {
313 if (array->array[i].cookie == *desc->dir_cookie) { 355 if (array->array[i].cookie == *desc->dir_cookie) {
356 new_pos = desc->current_index + i;
357 if (new_pos < desc->file->f_pos) {
358 ctx->dup_cookie = *desc->dir_cookie;
359 ctx->duped = 1;
360 }
361 desc->file->f_pos = new_pos;
314 desc->cache_entry_index = i; 362 desc->cache_entry_index = i;
315 return 0; 363 return 0;
316 } 364 }
@@ -342,6 +390,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
342 390
343 if (status == -EAGAIN) { 391 if (status == -EAGAIN) {
344 desc->last_cookie = array->last_cookie; 392 desc->last_cookie = array->last_cookie;
393 desc->current_index += array->size;
345 desc->page_index++; 394 desc->page_index++;
346 } 395 }
347 nfs_readdir_release_array(desc->page); 396 nfs_readdir_release_array(desc->page);
@@ -354,7 +403,8 @@ static
354int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, 403int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
355 struct nfs_entry *entry, struct file *file, struct inode *inode) 404 struct nfs_entry *entry, struct file *file, struct inode *inode)
356{ 405{
357 struct rpc_cred *cred = nfs_file_cred(file); 406 struct nfs_open_dir_context *ctx = file->private_data;
407 struct rpc_cred *cred = ctx->cred;
358 unsigned long timestamp, gencount; 408 unsigned long timestamp, gencount;
359 int error; 409 int error;
360 410
@@ -693,6 +743,20 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
693 int i = 0; 743 int i = 0;
694 int res = 0; 744 int res = 0;
695 struct nfs_cache_array *array = NULL; 745 struct nfs_cache_array *array = NULL;
746 struct nfs_open_dir_context *ctx = file->private_data;
747
748 if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) {
749 if (printk_ratelimit()) {
750 pr_notice("NFS: directory %s/%s contains a readdir loop. "
751 "Please contact your server vendor. "
752 "Offending cookie: %llu\n",
753 file->f_dentry->d_parent->d_name.name,
754 file->f_dentry->d_name.name,
755 *desc->dir_cookie);
756 }
757 res = -ELOOP;
758 goto out;
759 }
696 760
697 array = nfs_readdir_get_array(desc->page); 761 array = nfs_readdir_get_array(desc->page);
698 if (IS_ERR(array)) { 762 if (IS_ERR(array)) {
@@ -785,6 +849,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
785 struct inode *inode = dentry->d_inode; 849 struct inode *inode = dentry->d_inode;
786 nfs_readdir_descriptor_t my_desc, 850 nfs_readdir_descriptor_t my_desc,
787 *desc = &my_desc; 851 *desc = &my_desc;
852 struct nfs_open_dir_context *dir_ctx = filp->private_data;
788 int res; 853 int res;
789 854
790 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 855 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -801,7 +866,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
801 memset(desc, 0, sizeof(*desc)); 866 memset(desc, 0, sizeof(*desc));
802 867
803 desc->file = filp; 868 desc->file = filp;
804 desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; 869 desc->dir_cookie = &dir_ctx->dir_cookie;
805 desc->decode = NFS_PROTO(inode)->decode_dirent; 870 desc->decode = NFS_PROTO(inode)->decode_dirent;
806 desc->plus = NFS_USE_READDIRPLUS(inode); 871 desc->plus = NFS_USE_READDIRPLUS(inode);
807 872
@@ -853,6 +918,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
853{ 918{
854 struct dentry *dentry = filp->f_path.dentry; 919 struct dentry *dentry = filp->f_path.dentry;
855 struct inode *inode = dentry->d_inode; 920 struct inode *inode = dentry->d_inode;
921 struct nfs_open_dir_context *dir_ctx = filp->private_data;
856 922
857 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", 923 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
858 dentry->d_parent->d_name.name, 924 dentry->d_parent->d_name.name,
@@ -872,7 +938,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
872 } 938 }
873 if (offset != filp->f_pos) { 939 if (offset != filp->f_pos) {
874 filp->f_pos = offset; 940 filp->f_pos = offset;
875 nfs_file_open_context(filp)->dir_cookie = 0; 941 dir_ctx->dir_cookie = 0;
942 dir_ctx->duped = 0;
876 } 943 }
877out: 944out:
878 mutex_unlock(&inode->i_mutex); 945 mutex_unlock(&inode->i_mutex);
@@ -1068,7 +1135,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1068 if (fhandle == NULL || fattr == NULL) 1135 if (fhandle == NULL || fattr == NULL)
1069 goto out_error; 1136 goto out_error;
1070 1137
1071 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1138 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1072 if (error) 1139 if (error)
1073 goto out_bad; 1140 goto out_bad;
1074 if (nfs_compare_fh(NFS_FH(inode), fhandle)) 1141 if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1224,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1224 parent = dentry->d_parent; 1291 parent = dentry->d_parent;
1225 /* Protect against concurrent sillydeletes */ 1292 /* Protect against concurrent sillydeletes */
1226 nfs_block_sillyrename(parent); 1293 nfs_block_sillyrename(parent);
1227 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1294 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1228 if (error == -ENOENT) 1295 if (error == -ENOENT)
1229 goto no_entry; 1296 goto no_entry;
1230 if (error < 0) { 1297 if (error < 0) {
@@ -1562,7 +1629,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1562 if (dentry->d_inode) 1629 if (dentry->d_inode)
1563 goto out; 1630 goto out;
1564 if (fhandle->size == 0) { 1631 if (fhandle->size == 0) {
1565 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1632 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1566 if (error) 1633 if (error)
1567 goto out_error; 1634 goto out_error;
1568 } 1635 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d85a534b15cd..3ac5bd695e5e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -326,6 +326,9 @@ nfs_file_fsync(struct file *file, int datasync)
326 ret = xchg(&ctx->error, 0); 326 ret = xchg(&ctx->error, 0);
327 if (!ret && status < 0) 327 if (!ret && status < 0)
328 ret = status; 328 ret = status;
329 if (!ret && !datasync)
330 /* application has asked for meta-data sync */
331 ret = pnfs_layoutcommit_inode(inode, true);
329 return ret; 332 return ret;
330} 333}
331 334
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 1084792bc0fe..dcb61548887f 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -222,6 +222,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
222 goto out; 222 goto out;
223 } 223 }
224 224
225 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
226 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
227 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
228
225 inode = nfs_fhget(sb, mntfh, fattr); 229 inode = nfs_fhget(sb, mntfh, fattr);
226 if (IS_ERR(inode)) { 230 if (IS_ERR(inode)) {
227 dprintk("nfs_get_root: get root inode failed\n"); 231 dprintk("nfs_get_root: get root inode failed\n");
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 01768e5e2c9b..57bb31ad7a5e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -254,7 +254,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
254 struct inode *inode = ERR_PTR(-ENOENT); 254 struct inode *inode = ERR_PTR(-ENOENT);
255 unsigned long hash; 255 unsigned long hash;
256 256
257 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) 257 nfs_attr_check_mountpoint(sb, fattr);
258
259 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0)
258 goto out_no_inode; 260 goto out_no_inode;
259 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) 261 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
260 goto out_no_inode; 262 goto out_no_inode;
@@ -298,8 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
298 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) 300 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
299 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 301 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
300 /* Deal with crossing mountpoints */ 302 /* Deal with crossing mountpoints */
301 if ((fattr->valid & NFS_ATTR_FATTR_FSID) 303 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
302 && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { 304 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
303 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 305 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
304 inode->i_op = &nfs_referral_inode_operations; 306 inode->i_op = &nfs_referral_inode_operations;
305 else 307 else
@@ -639,7 +641,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cr
639 ctx->mode = f_mode; 641 ctx->mode = f_mode;
640 ctx->flags = 0; 642 ctx->flags = 0;
641 ctx->error = 0; 643 ctx->error = 0;
642 ctx->dir_cookie = 0;
643 nfs_init_lock_context(&ctx->lock_context); 644 nfs_init_lock_context(&ctx->lock_context);
644 ctx->lock_context.open_context = ctx; 645 ctx->lock_context.open_context = ctx;
645 INIT_LIST_HEAD(&ctx->list); 646 INIT_LIST_HEAD(&ctx->list);
@@ -1471,6 +1472,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1471 nfsi->delegation_state = 0; 1472 nfsi->delegation_state = 0;
1472 init_rwsem(&nfsi->rwsem); 1473 init_rwsem(&nfsi->rwsem);
1473 nfsi->layout = NULL; 1474 nfsi->layout = NULL;
1475 atomic_set(&nfsi->commits_outstanding, 0);
1474#endif 1476#endif
1475} 1477}
1476 1478
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 72e0bddf7a2f..ce118ce885dd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -39,6 +39,12 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
39 return 0; 39 return 0;
40} 40}
41 41
42static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
43{
44 if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
45 fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT;
46}
47
42struct nfs_clone_mount { 48struct nfs_clone_mount {
43 const struct super_block *sb; 49 const struct super_block *sb;
44 const struct dentry *dentry; 50 const struct dentry *dentry;
@@ -214,6 +220,7 @@ extern const u32 nfs41_maxwrite_overhead;
214/* nfs4proc.c */ 220/* nfs4proc.c */
215#ifdef CONFIG_NFS_V4 221#ifdef CONFIG_NFS_V4
216extern struct rpc_procinfo nfs4_procedures[]; 222extern struct rpc_procinfo nfs4_procedures[];
223void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *);
217#endif 224#endif
218 225
219extern int nfs4_init_ds_session(struct nfs_client *clp); 226extern int nfs4_init_ds_session(struct nfs_client *clp);
@@ -276,11 +283,25 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
276extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 283extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
277 284
278/* write.c */ 285/* write.c */
286extern void nfs_commit_free(struct nfs_write_data *p);
279extern int nfs_initiate_write(struct nfs_write_data *data, 287extern int nfs_initiate_write(struct nfs_write_data *data,
280 struct rpc_clnt *clnt, 288 struct rpc_clnt *clnt,
281 const struct rpc_call_ops *call_ops, 289 const struct rpc_call_ops *call_ops,
282 int how); 290 int how);
283extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 291extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
292extern int nfs_initiate_commit(struct nfs_write_data *data,
293 struct rpc_clnt *clnt,
294 const struct rpc_call_ops *call_ops,
295 int how);
296extern void nfs_init_commit(struct nfs_write_data *data,
297 struct list_head *head,
298 struct pnfs_layout_segment *lseg);
299void nfs_retry_commit(struct list_head *page_list,
300 struct pnfs_layout_segment *lseg);
301void nfs_commit_clear_lock(struct nfs_inode *nfsi);
302void nfs_commitdata_release(void *data);
303void nfs_commit_release_pages(struct nfs_write_data *data);
304
284#ifdef CONFIG_MIGRATION 305#ifdef CONFIG_MIGRATION
285extern int nfs_migrate_page(struct address_space *, 306extern int nfs_migrate_page(struct address_space *,
286 struct page *, struct page *); 307 struct page *, struct page *);
@@ -296,12 +317,14 @@ extern int nfs4_init_client(struct nfs_client *clp,
296 rpc_authflavor_t authflavour, 317 rpc_authflavor_t authflavour,
297 int noresvport); 318 int noresvport);
298extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); 319extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
299extern int _nfs4_call_sync(struct nfs_server *server, 320extern int _nfs4_call_sync(struct rpc_clnt *clnt,
321 struct nfs_server *server,
300 struct rpc_message *msg, 322 struct rpc_message *msg,
301 struct nfs4_sequence_args *args, 323 struct nfs4_sequence_args *args,
302 struct nfs4_sequence_res *res, 324 struct nfs4_sequence_res *res,
303 int cache_reply); 325 int cache_reply);
304extern int _nfs4_call_sync_session(struct nfs_server *server, 326extern int _nfs4_call_sync_session(struct rpc_clnt *clnt,
327 struct nfs_server *server,
305 struct rpc_message *msg, 328 struct rpc_message *msg,
306 struct nfs4_sequence_args *args, 329 struct nfs4_sequence_args *args,
307 struct nfs4_sequence_res *res, 330 struct nfs4_sequence_res *res,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index bf1c68009ffd..ad92bf731ff5 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -15,6 +15,7 @@
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
17#include <linux/vfs.h> 17#include <linux/vfs.h>
18#include <linux/sunrpc/gss_api.h>
18#include "internal.h" 19#include "internal.h"
19 20
20#define NFSDBG_FACILITY NFSDBG_VFS 21#define NFSDBG_FACILITY NFSDBG_VFS
@@ -27,7 +28,8 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
27 28
28static struct vfsmount *nfs_do_submount(struct dentry *dentry, 29static struct vfsmount *nfs_do_submount(struct dentry *dentry,
29 struct nfs_fh *fh, 30 struct nfs_fh *fh,
30 struct nfs_fattr *fattr); 31 struct nfs_fattr *fattr,
32 rpc_authflavor_t authflavor);
31 33
32/* 34/*
33 * nfs_path - reconstruct the path given an arbitrary dentry 35 * nfs_path - reconstruct the path given an arbitrary dentry
@@ -116,6 +118,100 @@ Elong:
116 return ERR_PTR(-ENAMETOOLONG); 118 return ERR_PTR(-ENAMETOOLONG);
117} 119}
118 120
121#ifdef CONFIG_NFS_V4
122static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, struct inode *inode)
123{
124 struct gss_api_mech *mech;
125 struct xdr_netobj oid;
126 int i;
127 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
128
129 for (i = 0; i < flavors->num_flavors; i++) {
130 struct nfs4_secinfo_flavor *flavor;
131 flavor = &flavors->flavors[i];
132
133 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
134 pseudoflavor = flavor->flavor;
135 break;
136 } else if (flavor->flavor == RPC_AUTH_GSS) {
137 oid.len = flavor->gss.sec_oid4.len;
138 oid.data = flavor->gss.sec_oid4.data;
139 mech = gss_mech_get_by_OID(&oid);
140 if (!mech)
141 continue;
142 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
143 gss_mech_put(mech);
144 break;
145 }
146 }
147
148 return pseudoflavor;
149}
150
151static rpc_authflavor_t nfs_negotiate_security(const struct dentry *parent, const struct dentry *dentry)
152{
153 int status = 0;
154 struct page *page;
155 struct nfs4_secinfo_flavors *flavors;
156 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
157 rpc_authflavor_t flavor = RPC_AUTH_UNIX;
158
159 secinfo = NFS_PROTO(parent->d_inode)->secinfo;
160 if (secinfo != NULL) {
161 page = alloc_page(GFP_KERNEL);
162 if (!page) {
163 status = -ENOMEM;
164 goto out;
165 }
166 flavors = page_address(page);
167 status = secinfo(parent->d_inode, &dentry->d_name, flavors);
168 flavor = nfs_find_best_sec(flavors, dentry->d_inode);
169 put_page(page);
170 }
171
172 return flavor;
173
174out:
175 status = -ENOMEM;
176 return status;
177}
178
179static rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
180 struct dentry *dentry, struct path *path,
181 struct nfs_fh *fh, struct nfs_fattr *fattr)
182{
183 rpc_authflavor_t flavor;
184 struct rpc_clnt *clone;
185 struct rpc_auth *auth;
186 int err;
187
188 flavor = nfs_negotiate_security(parent, path->dentry);
189 if (flavor < 0)
190 goto out;
191 clone = rpc_clone_client(server->client);
192 auth = rpcauth_create(flavor, clone);
193 if (!auth) {
194 flavor = -EIO;
195 goto out;
196 }
197 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
198 &path->dentry->d_name,
199 fh, fattr);
200 if (err < 0)
201 flavor = err;
202out:
203 return flavor;
204}
205#else /* CONFIG_NFS_V4 */
206static inline rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server,
207 struct dentry *parent, struct dentry *dentry,
208 struct path *path, struct nfs_fh *fh,
209 struct nfs_fattr *fattr)
210{
211 return -EPERM;
212}
213#endif /* CONFIG_NFS_V4 */
214
119/* 215/*
120 * nfs_d_automount - Handle crossing a mountpoint on the server 216 * nfs_d_automount - Handle crossing a mountpoint on the server
121 * @path - The mountpoint 217 * @path - The mountpoint
@@ -136,6 +232,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
136 struct nfs_fh *fh = NULL; 232 struct nfs_fh *fh = NULL;
137 struct nfs_fattr *fattr = NULL; 233 struct nfs_fattr *fattr = NULL;
138 int err; 234 int err;
235 rpc_authflavor_t flavor = 1;
139 236
140 dprintk("--> nfs_d_automount()\n"); 237 dprintk("--> nfs_d_automount()\n");
141 238
@@ -153,9 +250,16 @@ struct vfsmount *nfs_d_automount(struct path *path)
153 250
154 /* Look it up again to get its attributes */ 251 /* Look it up again to get its attributes */
155 parent = dget_parent(path->dentry); 252 parent = dget_parent(path->dentry);
156 err = server->nfs_client->rpc_ops->lookup(parent->d_inode, 253 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
157 &path->dentry->d_name, 254 &path->dentry->d_name,
158 fh, fattr); 255 fh, fattr);
256 if (err == -EPERM) {
257 flavor = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr);
258 if (flavor < 0)
259 err = flavor;
260 else
261 err = 0;
262 }
159 dput(parent); 263 dput(parent);
160 if (err != 0) { 264 if (err != 0) {
161 mnt = ERR_PTR(err); 265 mnt = ERR_PTR(err);
@@ -165,7 +269,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
165 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 269 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
166 mnt = nfs_do_refmount(path->dentry); 270 mnt = nfs_do_refmount(path->dentry);
167 else 271 else
168 mnt = nfs_do_submount(path->dentry, fh, fattr); 272 mnt = nfs_do_submount(path->dentry, fh, fattr, flavor);
169 if (IS_ERR(mnt)) 273 if (IS_ERR(mnt))
170 goto out; 274 goto out;
171 275
@@ -232,17 +336,20 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
232 * @dentry - parent directory 336 * @dentry - parent directory
233 * @fh - filehandle for new root dentry 337 * @fh - filehandle for new root dentry
234 * @fattr - attributes for new root inode 338 * @fattr - attributes for new root inode
339 * @authflavor - security flavor to use when performing the mount
235 * 340 *
236 */ 341 */
237static struct vfsmount *nfs_do_submount(struct dentry *dentry, 342static struct vfsmount *nfs_do_submount(struct dentry *dentry,
238 struct nfs_fh *fh, 343 struct nfs_fh *fh,
239 struct nfs_fattr *fattr) 344 struct nfs_fattr *fattr,
345 rpc_authflavor_t authflavor)
240{ 346{
241 struct nfs_clone_mount mountdata = { 347 struct nfs_clone_mount mountdata = {
242 .sb = dentry->d_sb, 348 .sb = dentry->d_sb,
243 .dentry = dentry, 349 .dentry = dentry,
244 .fh = fh, 350 .fh = fh,
245 .fattr = fattr, 351 .fattr = fattr,
352 .authflavor = authflavor,
246 }; 353 };
247 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 354 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
248 char *page = (char *) __get_free_page(GFP_USER); 355 char *page = (char *) __get_free_page(GFP_USER);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d0c80d8b3f96..38053d823eb0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -141,7 +141,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
141} 141}
142 142
143static int 143static int
144nfs3_proc_lookup(struct inode *dir, struct qstr *name, 144nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
145 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 145 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
146{ 146{
147 struct nfs3_diropargs arg = { 147 struct nfs3_diropargs arg = {
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c64be1cff080..e1c261ddd65d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -57,7 +57,8 @@ enum nfs4_session_state {
57struct nfs4_minor_version_ops { 57struct nfs4_minor_version_ops {
58 u32 minor_version; 58 u32 minor_version;
59 59
60 int (*call_sync)(struct nfs_server *server, 60 int (*call_sync)(struct rpc_clnt *clnt,
61 struct nfs_server *server,
61 struct rpc_message *msg, 62 struct rpc_message *msg,
62 struct nfs4_sequence_args *args, 63 struct nfs4_sequence_args *args,
63 struct nfs4_sequence_res *res, 64 struct nfs4_sequence_res *res,
@@ -262,6 +263,8 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *);
262extern int nfs4_init_session(struct nfs_server *server); 263extern int nfs4_init_session(struct nfs_server *server);
263extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 264extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
264 struct nfs_fsinfo *fsinfo); 265 struct nfs_fsinfo *fsinfo);
266extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
267 bool sync);
265 268
266static inline bool 269static inline bool
267is_ds_only_client(struct nfs_client *clp) 270is_ds_only_client(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 428558464817..6f8192f4cfc7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -154,6 +154,23 @@ static int filelayout_read_done_cb(struct rpc_task *task,
154} 154}
155 155
156/* 156/*
157 * We reference the rpc_cred of the first WRITE that triggers the need for
158 * a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
159 * rfc5661 is not clear about which credential should be used.
160 */
161static void
162filelayout_set_layoutcommit(struct nfs_write_data *wdata)
163{
164 if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
165 wdata->res.verf->committed == NFS_FILE_SYNC)
166 return;
167
168 pnfs_set_layoutcommit(wdata);
169 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
170 (unsigned long) wdata->lseg->pls_end_pos);
171}
172
173/*
157 * Call ops for the async read/write cases 174 * Call ops for the async read/write cases
158 * In the case of dense layouts, the offset needs to be reset to its 175 * In the case of dense layouts, the offset needs to be reset to its
159 * original value. 176 * original value.
@@ -210,6 +227,38 @@ static int filelayout_write_done_cb(struct rpc_task *task,
210 return -EAGAIN; 227 return -EAGAIN;
211 } 228 }
212 229
230 filelayout_set_layoutcommit(data);
231 return 0;
232}
233
234/* Fake up some data that will cause nfs_commit_release to retry the writes. */
235static void prepare_to_resend_writes(struct nfs_write_data *data)
236{
237 struct nfs_page *first = nfs_list_entry(data->pages.next);
238
239 data->task.tk_status = 0;
240 memcpy(data->verf.verifier, first->wb_verf.verifier,
241 sizeof(first->wb_verf.verifier));
242 data->verf.verifier[0]++; /* ensure verifier mismatch */
243}
244
245static int filelayout_commit_done_cb(struct rpc_task *task,
246 struct nfs_write_data *data)
247{
248 int reset = 0;
249
250 if (filelayout_async_handle_error(task, data->args.context->state,
251 data->ds_clp, &reset) == -EAGAIN) {
252 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
253 __func__, data->ds_clp, data->ds_clp->cl_session);
254 if (reset) {
255 prepare_to_resend_writes(data);
256 filelayout_set_lo_fail(data->lseg);
257 } else
258 nfs_restart_rpc(task, data->ds_clp);
259 return -EAGAIN;
260 }
261
213 return 0; 262 return 0;
214} 263}
215 264
@@ -240,6 +289,16 @@ static void filelayout_write_release(void *data)
240 wdata->mds_ops->rpc_release(data); 289 wdata->mds_ops->rpc_release(data);
241} 290}
242 291
292static void filelayout_commit_release(void *data)
293{
294 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
295
296 nfs_commit_release_pages(wdata);
297 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
298 nfs_commit_clear_lock(NFS_I(wdata->inode));
299 nfs_commitdata_release(wdata);
300}
301
243struct rpc_call_ops filelayout_read_call_ops = { 302struct rpc_call_ops filelayout_read_call_ops = {
244 .rpc_call_prepare = filelayout_read_prepare, 303 .rpc_call_prepare = filelayout_read_prepare,
245 .rpc_call_done = filelayout_read_call_done, 304 .rpc_call_done = filelayout_read_call_done,
@@ -252,6 +311,12 @@ struct rpc_call_ops filelayout_write_call_ops = {
252 .rpc_release = filelayout_write_release, 311 .rpc_release = filelayout_write_release,
253}; 312};
254 313
314struct rpc_call_ops filelayout_commit_call_ops = {
315 .rpc_call_prepare = filelayout_write_prepare,
316 .rpc_call_done = filelayout_write_call_done,
317 .rpc_release = filelayout_commit_release,
318};
319
255static enum pnfs_try_status 320static enum pnfs_try_status
256filelayout_read_pagelist(struct nfs_read_data *data) 321filelayout_read_pagelist(struct nfs_read_data *data)
257{ 322{
@@ -320,10 +385,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
320 data->inode->i_ino, sync, (size_t) data->args.count, offset, 385 data->inode->i_ino, sync, (size_t) data->args.count, offset,
321 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 386 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
322 387
323 /* We can't handle commit to ds yet */
324 if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
325 data->args.stable = NFS_FILE_SYNC;
326
327 data->write_done_cb = filelayout_write_done_cb; 388 data->write_done_cb = filelayout_write_done_cb;
328 data->ds_clp = ds->ds_clp; 389 data->ds_clp = ds->ds_clp;
329 fh = nfs4_fl_select_ds_fh(lseg, j); 390 fh = nfs4_fl_select_ds_fh(lseg, j);
@@ -441,12 +502,33 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
441 struct nfs4_layoutget_res *lgr, 502 struct nfs4_layoutget_res *lgr,
442 struct nfs4_deviceid *id) 503 struct nfs4_deviceid *id)
443{ 504{
444 uint32_t *p = (uint32_t *)lgr->layout.buf; 505 struct xdr_stream stream;
506 struct xdr_buf buf = {
507 .pages = lgr->layoutp->pages,
508 .page_len = lgr->layoutp->len,
509 .buflen = lgr->layoutp->len,
510 .len = lgr->layoutp->len,
511 };
512 struct page *scratch;
513 __be32 *p;
445 uint32_t nfl_util; 514 uint32_t nfl_util;
446 int i; 515 int i;
447 516
448 dprintk("%s: set_layout_map Begin\n", __func__); 517 dprintk("%s: set_layout_map Begin\n", __func__);
449 518
519 scratch = alloc_page(GFP_KERNEL);
520 if (!scratch)
521 return -ENOMEM;
522
523 xdr_init_decode(&stream, &buf, NULL);
524 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
525
526 /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
527 * num_fh (4) */
528 p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20);
529 if (unlikely(!p))
530 goto out_err;
531
450 memcpy(id, p, sizeof(*id)); 532 memcpy(id, p, sizeof(*id));
451 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); 533 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
452 print_deviceid(id); 534 print_deviceid(id);
@@ -468,32 +550,57 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
468 __func__, nfl_util, fl->num_fh, fl->first_stripe_index, 550 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
469 fl->pattern_offset); 551 fl->pattern_offset);
470 552
553 if (!fl->num_fh)
554 goto out_err;
555
471 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), 556 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
472 GFP_KERNEL); 557 GFP_KERNEL);
473 if (!fl->fh_array) 558 if (!fl->fh_array)
474 return -ENOMEM; 559 goto out_err;
475 560
476 for (i = 0; i < fl->num_fh; i++) { 561 for (i = 0; i < fl->num_fh; i++) {
477 /* Do we want to use a mempool here? */ 562 /* Do we want to use a mempool here? */
478 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); 563 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
479 if (!fl->fh_array[i]) { 564 if (!fl->fh_array[i])
480 filelayout_free_fh_array(fl); 565 goto out_err_free;
481 return -ENOMEM; 566
482 } 567 p = xdr_inline_decode(&stream, 4);
568 if (unlikely(!p))
569 goto out_err_free;
483 fl->fh_array[i]->size = be32_to_cpup(p++); 570 fl->fh_array[i]->size = be32_to_cpup(p++);
484 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { 571 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
485 printk(KERN_ERR "Too big fh %d received %d\n", 572 printk(KERN_ERR "Too big fh %d received %d\n",
486 i, fl->fh_array[i]->size); 573 i, fl->fh_array[i]->size);
487 filelayout_free_fh_array(fl); 574 goto out_err_free;
488 return -EIO;
489 } 575 }
576
577 p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
578 if (unlikely(!p))
579 goto out_err_free;
490 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); 580 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
491 p += XDR_QUADLEN(fl->fh_array[i]->size);
492 dprintk("DEBUG: %s: fh len %d\n", __func__, 581 dprintk("DEBUG: %s: fh len %d\n", __func__,
493 fl->fh_array[i]->size); 582 fl->fh_array[i]->size);
494 } 583 }
495 584
585 __free_page(scratch);
496 return 0; 586 return 0;
587
588out_err_free:
589 filelayout_free_fh_array(fl);
590out_err:
591 __free_page(scratch);
592 return -EIO;
593}
594
595static void
596filelayout_free_lseg(struct pnfs_layout_segment *lseg)
597{
598 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
599
600 dprintk("--> %s\n", __func__);
601 nfs4_fl_put_deviceid(fl->dsaddr);
602 kfree(fl->commit_buckets);
603 _filelayout_free_lseg(fl);
497} 604}
498 605
499static struct pnfs_layout_segment * 606static struct pnfs_layout_segment *
@@ -514,17 +621,28 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
514 _filelayout_free_lseg(fl); 621 _filelayout_free_lseg(fl);
515 return NULL; 622 return NULL;
516 } 623 }
517 return &fl->generic_hdr;
518}
519 624
520static void 625 /* This assumes there is only one IOMODE_RW lseg. What
521filelayout_free_lseg(struct pnfs_layout_segment *lseg) 626 * we really want to do is have a layout_hdr level
522{ 627 * dictionary of <multipath_list4, fh> keys, each
523 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 628 * associated with a struct list_head, populated by calls
524 629 * to filelayout_write_pagelist().
525 dprintk("--> %s\n", __func__); 630 * */
526 nfs4_fl_put_deviceid(fl->dsaddr); 631 if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
527 _filelayout_free_lseg(fl); 632 int i;
633 int size = (fl->stripe_type == STRIPE_SPARSE) ?
634 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
635
636 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
637 if (!fl->commit_buckets) {
638 filelayout_free_lseg(&fl->generic_hdr);
639 return NULL;
640 }
641 fl->number_of_buckets = size;
642 for (i = 0; i < size; i++)
643 INIT_LIST_HEAD(&fl->commit_buckets[i]);
644 }
645 return &fl->generic_hdr;
528} 646}
529 647
530/* 648/*
@@ -552,6 +670,191 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
552 return (p_stripe == r_stripe); 670 return (p_stripe == r_stripe);
553} 671}
554 672
673static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
674{
675 return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
676}
677
678static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
679{
680 if (fl->stripe_type == STRIPE_SPARSE)
681 return nfs4_fl_calc_ds_index(&fl->generic_hdr, j);
682 else
683 return j;
684}
685
686struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
687{
688 struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
689 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
690 u32 i, j;
691 struct list_head *list;
692
693 /* Note that we are calling nfs4_fl_calc_j_index on each page
694 * that ends up being committed to a data server. An attractive
695 * alternative is to add a field to nfs_write_data and nfs_page
696 * to store the value calculated in filelayout_write_pagelist
697 * and just use that here.
698 */
699 j = nfs4_fl_calc_j_index(lseg,
700 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
701 i = select_bucket_index(fl, j);
702 list = &fl->commit_buckets[i];
703 if (list_empty(list)) {
704 /* Non-empty buckets hold a reference on the lseg */
705 get_lseg(lseg);
706 }
707 return list;
708}
709
710static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
711{
712 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
713
714 if (flseg->stripe_type == STRIPE_SPARSE)
715 return i;
716 else
717 return nfs4_fl_calc_ds_index(lseg, i);
718}
719
720static struct nfs_fh *
721select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
722{
723 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
724
725 if (flseg->stripe_type == STRIPE_SPARSE) {
726 if (flseg->num_fh == 1)
727 i = 0;
728 else if (flseg->num_fh == 0)
729 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
730 return NULL;
731 }
732 return flseg->fh_array[i];
733}
734
735static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
736{
737 struct pnfs_layout_segment *lseg = data->lseg;
738 struct nfs4_pnfs_ds *ds;
739 u32 idx;
740 struct nfs_fh *fh;
741
742 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
743 ds = nfs4_fl_prepare_ds(lseg, idx);
744 if (!ds) {
745 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
746 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
747 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
748 prepare_to_resend_writes(data);
749 data->mds_ops->rpc_release(data);
750 return -EAGAIN;
751 }
752 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
753 data->write_done_cb = filelayout_commit_done_cb;
754 data->ds_clp = ds->ds_clp;
755 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
756 if (fh)
757 data->args.fh = fh;
758 return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
759 &filelayout_commit_call_ops, how);
760}
761
762/*
763 * This is only useful while we are using whole file layouts.
764 */
765static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
766{
767 struct pnfs_layout_segment *lseg, *rv = NULL;
768
769 spin_lock(&inode->i_lock);
770 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
771 if (lseg->pls_range.iomode == IOMODE_RW)
772 rv = get_lseg(lseg);
773 spin_unlock(&inode->i_lock);
774 return rv;
775}
776
777static int alloc_ds_commits(struct inode *inode, struct list_head *list)
778{
779 struct pnfs_layout_segment *lseg;
780 struct nfs4_filelayout_segment *fl;
781 struct nfs_write_data *data;
782 int i, j;
783
784 /* Won't need this when non-whole file layout segments are supported
785 * instead we will use a pnfs_layout_hdr structure */
786 lseg = find_only_write_lseg(inode);
787 if (!lseg)
788 return 0;
789 fl = FILELAYOUT_LSEG(lseg);
790 for (i = 0; i < fl->number_of_buckets; i++) {
791 if (list_empty(&fl->commit_buckets[i]))
792 continue;
793 data = nfs_commitdata_alloc();
794 if (!data)
795 goto out_bad;
796 data->ds_commit_index = i;
797 data->lseg = lseg;
798 list_add(&data->pages, list);
799 }
800 put_lseg(lseg);
801 return 0;
802
803out_bad:
804 for (j = i; j < fl->number_of_buckets; j++) {
805 if (list_empty(&fl->commit_buckets[i]))
806 continue;
807 nfs_retry_commit(&fl->commit_buckets[i], lseg);
808 put_lseg(lseg); /* associated with emptying bucket */
809 }
810 put_lseg(lseg);
811 /* Caller will clean up entries put on list */
812 return -ENOMEM;
813}
814
815/* This follows nfs_commit_list pretty closely */
816static int
817filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
818 int how)
819{
820 struct nfs_write_data *data, *tmp;
821 LIST_HEAD(list);
822
823 if (!list_empty(mds_pages)) {
824 data = nfs_commitdata_alloc();
825 if (!data)
826 goto out_bad;
827 data->lseg = NULL;
828 list_add(&data->pages, &list);
829 }
830
831 if (alloc_ds_commits(inode, &list))
832 goto out_bad;
833
834 list_for_each_entry_safe(data, tmp, &list, pages) {
835 list_del_init(&data->pages);
836 atomic_inc(&NFS_I(inode)->commits_outstanding);
837 if (!data->lseg) {
838 nfs_init_commit(data, mds_pages, NULL);
839 nfs_initiate_commit(data, NFS_CLIENT(inode),
840 data->mds_ops, how);
841 } else {
842 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg);
843 filelayout_initiate_commit(data, how);
844 }
845 }
846 return 0;
847 out_bad:
848 list_for_each_entry_safe(data, tmp, &list, pages) {
849 nfs_retry_commit(&data->pages, data->lseg);
850 list_del_init(&data->pages);
851 nfs_commit_free(data);
852 }
853 nfs_retry_commit(mds_pages, NULL);
854 nfs_commit_clear_lock(NFS_I(inode));
855 return -ENOMEM;
856}
857
555static struct pnfs_layoutdriver_type filelayout_type = { 858static struct pnfs_layoutdriver_type filelayout_type = {
556 .id = LAYOUT_NFSV4_1_FILES, 859 .id = LAYOUT_NFSV4_1_FILES,
557 .name = "LAYOUT_NFSV4_1_FILES", 860 .name = "LAYOUT_NFSV4_1_FILES",
@@ -559,6 +862,9 @@ static struct pnfs_layoutdriver_type filelayout_type = {
559 .alloc_lseg = filelayout_alloc_lseg, 862 .alloc_lseg = filelayout_alloc_lseg,
560 .free_lseg = filelayout_free_lseg, 863 .free_lseg = filelayout_free_lseg,
561 .pg_test = filelayout_pg_test, 864 .pg_test = filelayout_pg_test,
865 .mark_pnfs_commit = filelayout_mark_pnfs_commit,
866 .choose_commit_list = filelayout_choose_commit_list,
867 .commit_pagelist = filelayout_commit_pagelist,
562 .read_pagelist = filelayout_read_pagelist, 868 .read_pagelist = filelayout_read_pagelist,
563 .write_pagelist = filelayout_write_pagelist, 869 .write_pagelist = filelayout_write_pagelist,
564}; 870};
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index ee0c907742b5..085a354e0f08 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -79,6 +79,8 @@ struct nfs4_filelayout_segment {
79 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 79 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
80 unsigned int num_fh; 80 unsigned int num_fh;
81 struct nfs_fh **fh_array; 81 struct nfs_fh **fh_array;
82 struct list_head *commit_buckets; /* Sort commits to ds */
83 int number_of_buckets;
82}; 84};
83 85
84static inline struct nfs4_filelayout_segment * 86static inline struct nfs4_filelayout_segment *
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 68143c162e3b..de5350f2b249 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -261,7 +261,7 @@ out:
261 * Currently only support ipv4, and one multi-path address. 261 * Currently only support ipv4, and one multi-path address.
262 */ 262 */
263static struct nfs4_pnfs_ds * 263static struct nfs4_pnfs_ds *
264decode_and_add_ds(__be32 **pp, struct inode *inode) 264decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
265{ 265{
266 struct nfs4_pnfs_ds *ds = NULL; 266 struct nfs4_pnfs_ds *ds = NULL;
267 char *buf; 267 char *buf;
@@ -269,25 +269,34 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
269 u32 ip_addr, port; 269 u32 ip_addr, port;
270 int nlen, rlen, i; 270 int nlen, rlen, i;
271 int tmp[2]; 271 int tmp[2];
272 __be32 *r_netid, *r_addr, *p = *pp; 272 __be32 *p;
273 273
274 /* r_netid */ 274 /* r_netid */
275 p = xdr_inline_decode(streamp, 4);
276 if (unlikely(!p))
277 goto out_err;
275 nlen = be32_to_cpup(p++); 278 nlen = be32_to_cpup(p++);
276 r_netid = p;
277 p += XDR_QUADLEN(nlen);
278 279
279 /* r_addr */ 280 p = xdr_inline_decode(streamp, nlen);
280 rlen = be32_to_cpup(p++); 281 if (unlikely(!p))
281 r_addr = p; 282 goto out_err;
282 p += XDR_QUADLEN(rlen);
283 *pp = p;
284 283
285 /* Check that netid is "tcp" */ 284 /* Check that netid is "tcp" */
286 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { 285 if (nlen != 3 || memcmp((char *)p, "tcp", 3)) {
287 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); 286 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
288 goto out_err; 287 goto out_err;
289 } 288 }
290 289
290 /* r_addr */
291 p = xdr_inline_decode(streamp, 4);
292 if (unlikely(!p))
293 goto out_err;
294 rlen = be32_to_cpup(p);
295
296 p = xdr_inline_decode(streamp, rlen);
297 if (unlikely(!p))
298 goto out_err;
299
291 /* ipv6 length plus port is legal */ 300 /* ipv6 length plus port is legal */
292 if (rlen > INET6_ADDRSTRLEN + 8) { 301 if (rlen > INET6_ADDRSTRLEN + 8) {
293 dprintk("%s: Invalid address, length %d\n", __func__, 302 dprintk("%s: Invalid address, length %d\n", __func__,
@@ -300,7 +309,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
300 goto out_err; 309 goto out_err;
301 } 310 }
302 buf[rlen] = '\0'; 311 buf[rlen] = '\0';
303 memcpy(buf, r_addr, rlen); 312 memcpy(buf, p, rlen);
304 313
305 /* replace the port dots with dashes for the in4_pton() delimiter*/ 314 /* replace the port dots with dashes for the in4_pton() delimiter*/
306 for (i = 0; i < 2; i++) { 315 for (i = 0; i < 2; i++) {
@@ -336,90 +345,154 @@ out_err:
336static struct nfs4_file_layout_dsaddr* 345static struct nfs4_file_layout_dsaddr*
337decode_device(struct inode *ino, struct pnfs_device *pdev) 346decode_device(struct inode *ino, struct pnfs_device *pdev)
338{ 347{
339 int i, dummy; 348 int i;
340 u32 cnt, num; 349 u32 cnt, num;
341 u8 *indexp; 350 u8 *indexp;
342 __be32 *p = (__be32 *)pdev->area, *indicesp; 351 __be32 *p;
343 struct nfs4_file_layout_dsaddr *dsaddr; 352 u8 *stripe_indices;
353 u8 max_stripe_index;
354 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
355 struct xdr_stream stream;
356 struct xdr_buf buf = {
357 .pages = pdev->pages,
358 .page_len = pdev->pglen,
359 .buflen = pdev->pglen,
360 .len = pdev->pglen,
361 };
362 struct page *scratch;
363
364 /* set up xdr stream */
365 scratch = alloc_page(GFP_KERNEL);
366 if (!scratch)
367 goto out_err;
368
369 xdr_init_decode(&stream, &buf, NULL);
370 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
344 371
345 /* Get the stripe count (number of stripe index) */ 372 /* Get the stripe count (number of stripe index) */
346 cnt = be32_to_cpup(p++); 373 p = xdr_inline_decode(&stream, 4);
374 if (unlikely(!p))
375 goto out_err_free_scratch;
376
377 cnt = be32_to_cpup(p);
347 dprintk("%s stripe count %d\n", __func__, cnt); 378 dprintk("%s stripe count %d\n", __func__, cnt);
348 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { 379 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
349 printk(KERN_WARNING "%s: stripe count %d greater than " 380 printk(KERN_WARNING "%s: stripe count %d greater than "
350 "supported maximum %d\n", __func__, 381 "supported maximum %d\n", __func__,
351 cnt, NFS4_PNFS_MAX_STRIPE_CNT); 382 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
352 goto out_err; 383 goto out_err_free_scratch;
384 }
385
386 /* read stripe indices */
387 stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
388 if (!stripe_indices)
389 goto out_err_free_scratch;
390
391 p = xdr_inline_decode(&stream, cnt << 2);
392 if (unlikely(!p))
393 goto out_err_free_stripe_indices;
394
395 indexp = &stripe_indices[0];
396 max_stripe_index = 0;
397 for (i = 0; i < cnt; i++) {
398 *indexp = be32_to_cpup(p++);
399 max_stripe_index = max(max_stripe_index, *indexp);
400 indexp++;
353 } 401 }
354 402
355 /* Check the multipath list count */ 403 /* Check the multipath list count */
356 indicesp = p; 404 p = xdr_inline_decode(&stream, 4);
357 p += XDR_QUADLEN(cnt << 2); 405 if (unlikely(!p))
358 num = be32_to_cpup(p++); 406 goto out_err_free_stripe_indices;
407
408 num = be32_to_cpup(p);
359 dprintk("%s ds_num %u\n", __func__, num); 409 dprintk("%s ds_num %u\n", __func__, num);
360 if (num > NFS4_PNFS_MAX_MULTI_CNT) { 410 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
361 printk(KERN_WARNING "%s: multipath count %d greater than " 411 printk(KERN_WARNING "%s: multipath count %d greater than "
362 "supported maximum %d\n", __func__, 412 "supported maximum %d\n", __func__,
363 num, NFS4_PNFS_MAX_MULTI_CNT); 413 num, NFS4_PNFS_MAX_MULTI_CNT);
364 goto out_err; 414 goto out_err_free_stripe_indices;
365 } 415 }
416
417 /* validate stripe indices are all < num */
418 if (max_stripe_index >= num) {
419 printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n",
420 __func__, max_stripe_index, num);
421 goto out_err_free_stripe_indices;
422 }
423
366 dsaddr = kzalloc(sizeof(*dsaddr) + 424 dsaddr = kzalloc(sizeof(*dsaddr) +
367 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), 425 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
368 GFP_KERNEL); 426 GFP_KERNEL);
369 if (!dsaddr) 427 if (!dsaddr)
370 goto out_err; 428 goto out_err_free_stripe_indices;
371
372 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
373 if (!dsaddr->stripe_indices)
374 goto out_err_free;
375 429
376 dsaddr->stripe_count = cnt; 430 dsaddr->stripe_count = cnt;
431 dsaddr->stripe_indices = stripe_indices;
432 stripe_indices = NULL;
377 dsaddr->ds_num = num; 433 dsaddr->ds_num = num;
378 434
379 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); 435 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
380 436
381 /* Go back an read stripe indices */
382 p = indicesp;
383 indexp = &dsaddr->stripe_indices[0];
384 for (i = 0; i < dsaddr->stripe_count; i++) {
385 *indexp = be32_to_cpup(p++);
386 if (*indexp >= num)
387 goto out_err_free;
388 indexp++;
389 }
390 /* Skip already read multipath list count */
391 p++;
392
393 for (i = 0; i < dsaddr->ds_num; i++) { 437 for (i = 0; i < dsaddr->ds_num; i++) {
394 int j; 438 int j;
439 u32 mp_count;
440
441 p = xdr_inline_decode(&stream, 4);
442 if (unlikely(!p))
443 goto out_err_free_deviceid;
395 444
396 dummy = be32_to_cpup(p++); /* multipath count */ 445 mp_count = be32_to_cpup(p); /* multipath count */
397 if (dummy > 1) { 446 if (mp_count > 1) {
398 printk(KERN_WARNING 447 printk(KERN_WARNING
399 "%s: Multipath count %d not supported, " 448 "%s: Multipath count %d not supported, "
400 "skipping all greater than 1\n", __func__, 449 "skipping all greater than 1\n", __func__,
401 dummy); 450 mp_count);
402 } 451 }
403 for (j = 0; j < dummy; j++) { 452 for (j = 0; j < mp_count; j++) {
404 if (j == 0) { 453 if (j == 0) {
405 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); 454 dsaddr->ds_list[i] = decode_and_add_ds(&stream,
455 ino);
406 if (dsaddr->ds_list[i] == NULL) 456 if (dsaddr->ds_list[i] == NULL)
407 goto out_err_free; 457 goto out_err_free_deviceid;
408 } else { 458 } else {
409 u32 len; 459 u32 len;
410 /* skip extra multipath */ 460 /* skip extra multipath */
411 len = be32_to_cpup(p++); 461
412 p += XDR_QUADLEN(len); 462 /* read len, skip */
413 len = be32_to_cpup(p++); 463 p = xdr_inline_decode(&stream, 4);
414 p += XDR_QUADLEN(len); 464 if (unlikely(!p))
415 continue; 465 goto out_err_free_deviceid;
466 len = be32_to_cpup(p);
467
468 p = xdr_inline_decode(&stream, len);
469 if (unlikely(!p))
470 goto out_err_free_deviceid;
471
472 /* read len, skip */
473 p = xdr_inline_decode(&stream, 4);
474 if (unlikely(!p))
475 goto out_err_free_deviceid;
476 len = be32_to_cpup(p);
477
478 p = xdr_inline_decode(&stream, len);
479 if (unlikely(!p))
480 goto out_err_free_deviceid;
416 } 481 }
417 } 482 }
418 } 483 }
484
485 __free_page(scratch);
419 return dsaddr; 486 return dsaddr;
420 487
421out_err_free: 488out_err_free_deviceid:
422 nfs4_fl_free_deviceid(dsaddr); 489 nfs4_fl_free_deviceid(dsaddr);
490 /* stripe_indicies was part of dsaddr */
491 goto out_err_free_scratch;
492out_err_free_stripe_indices:
493 kfree(stripe_indices);
494out_err_free_scratch:
495 __free_page(scratch);
423out_err: 496out_err:
424 dprintk("%s ERROR: returning NULL\n", __func__); 497 dprintk("%s ERROR: returning NULL\n", __func__);
425 return NULL; 498 return NULL;
@@ -498,11 +571,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
498 goto out_free; 571 goto out_free;
499 } 572 }
500 573
501 /* set pdev->area */
502 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
503 if (!pdev->area)
504 goto out_free;
505
506 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); 574 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
507 pdev->layout_type = LAYOUT_NFSV4_1_FILES; 575 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
508 pdev->pages = pages; 576 pdev->pages = pages;
@@ -521,8 +589,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
521 */ 589 */
522 dsaddr = decode_and_add_device(inode, pdev); 590 dsaddr = decode_and_add_device(inode, pdev);
523out_free: 591out_free:
524 if (pdev->area != NULL)
525 vunmap(pdev->area);
526 for (i = 0; i < max_pages; i++) 592 for (i = 0; i < max_pages; i++)
527 __free_page(pages[i]); 593 __free_page(pages[i]);
528 kfree(pages); 594 kfree(pages);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1d84e7088af9..dfd1e6d7e6c3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -41,6 +41,7 @@
41#include <linux/string.h> 41#include <linux/string.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/sunrpc/clnt.h> 43#include <linux/sunrpc/clnt.h>
44#include <linux/sunrpc/gss_api.h>
44#include <linux/nfs.h> 45#include <linux/nfs.h>
45#include <linux/nfs4.h> 46#include <linux/nfs4.h>
46#include <linux/nfs_fs.h> 47#include <linux/nfs_fs.h>
@@ -71,7 +72,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data);
71static int _nfs4_recover_proc_open(struct nfs4_opendata *data); 72static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
72static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 73static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
73static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 74static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
74static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 75static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir,
76 const struct qstr *name, struct nfs_fh *fhandle,
77 struct nfs_fattr *fattr);
75static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 78static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
76static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 79static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
77 struct nfs_fattr *fattr, struct iattr *sattr, 80 struct nfs_fattr *fattr, struct iattr *sattr,
@@ -85,6 +88,8 @@ static int nfs4_map_errors(int err)
85 switch (err) { 88 switch (err) {
86 case -NFS4ERR_RESOURCE: 89 case -NFS4ERR_RESOURCE:
87 return -EREMOTEIO; 90 return -EREMOTEIO;
91 case -NFS4ERR_WRONGSEC:
92 return -EPERM;
88 case -NFS4ERR_BADOWNER: 93 case -NFS4ERR_BADOWNER:
89 case -NFS4ERR_BADNAME: 94 case -NFS4ERR_BADNAME:
90 return -EINVAL; 95 return -EINVAL;
@@ -657,7 +662,8 @@ struct rpc_call_ops nfs41_call_priv_sync_ops = {
657 .rpc_call_done = nfs41_call_sync_done, 662 .rpc_call_done = nfs41_call_sync_done,
658}; 663};
659 664
660static int nfs4_call_sync_sequence(struct nfs_server *server, 665static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
666 struct nfs_server *server,
661 struct rpc_message *msg, 667 struct rpc_message *msg,
662 struct nfs4_sequence_args *args, 668 struct nfs4_sequence_args *args,
663 struct nfs4_sequence_res *res, 669 struct nfs4_sequence_res *res,
@@ -673,7 +679,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
673 .cache_reply = cache_reply, 679 .cache_reply = cache_reply,
674 }; 680 };
675 struct rpc_task_setup task_setup = { 681 struct rpc_task_setup task_setup = {
676 .rpc_client = server->client, 682 .rpc_client = clnt,
677 .rpc_message = msg, 683 .rpc_message = msg,
678 .callback_ops = &nfs41_call_sync_ops, 684 .callback_ops = &nfs41_call_sync_ops,
679 .callback_data = &data 685 .callback_data = &data
@@ -692,13 +698,14 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
692 return ret; 698 return ret;
693} 699}
694 700
695int _nfs4_call_sync_session(struct nfs_server *server, 701int _nfs4_call_sync_session(struct rpc_clnt *clnt,
702 struct nfs_server *server,
696 struct rpc_message *msg, 703 struct rpc_message *msg,
697 struct nfs4_sequence_args *args, 704 struct nfs4_sequence_args *args,
698 struct nfs4_sequence_res *res, 705 struct nfs4_sequence_res *res,
699 int cache_reply) 706 int cache_reply)
700{ 707{
701 return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0); 708 return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0);
702} 709}
703 710
704#else 711#else
@@ -709,19 +716,28 @@ static int nfs4_sequence_done(struct rpc_task *task,
709} 716}
710#endif /* CONFIG_NFS_V4_1 */ 717#endif /* CONFIG_NFS_V4_1 */
711 718
712int _nfs4_call_sync(struct nfs_server *server, 719int _nfs4_call_sync(struct rpc_clnt *clnt,
720 struct nfs_server *server,
713 struct rpc_message *msg, 721 struct rpc_message *msg,
714 struct nfs4_sequence_args *args, 722 struct nfs4_sequence_args *args,
715 struct nfs4_sequence_res *res, 723 struct nfs4_sequence_res *res,
716 int cache_reply) 724 int cache_reply)
717{ 725{
718 args->sa_session = res->sr_session = NULL; 726 args->sa_session = res->sr_session = NULL;
719 return rpc_call_sync(server->client, msg, 0); 727 return rpc_call_sync(clnt, msg, 0);
720} 728}
721 729
722#define nfs4_call_sync(server, msg, args, res, cache_reply) \ 730static inline
723 (server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \ 731int nfs4_call_sync(struct rpc_clnt *clnt,
724 &(res)->seq_res, (cache_reply)) 732 struct nfs_server *server,
733 struct rpc_message *msg,
734 struct nfs4_sequence_args *args,
735 struct nfs4_sequence_res *res,
736 int cache_reply)
737{
738 return server->nfs_client->cl_mvops->call_sync(clnt, server, msg,
739 args, res, cache_reply);
740}
725 741
726static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) 742static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
727{ 743{
@@ -1831,7 +1847,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1831 } else 1847 } else
1832 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); 1848 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
1833 1849
1834 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 1850 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
1835 if (status == 0 && state != NULL) 1851 if (status == 0 && state != NULL)
1836 renew_lease(server, timestamp); 1852 renew_lease(server, timestamp);
1837 return status; 1853 return status;
@@ -2090,7 +2106,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
2090 }; 2106 };
2091 int status; 2107 int status;
2092 2108
2093 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2109 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2094 if (status == 0) { 2110 if (status == 0) {
2095 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); 2111 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
2096 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| 2112 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
@@ -2160,7 +2176,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2160 }; 2176 };
2161 2177
2162 nfs_fattr_init(info->fattr); 2178 nfs_fattr_init(info->fattr);
2163 return nfs4_call_sync(server, &msg, &args, &res, 0); 2179 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2164} 2180}
2165 2181
2166static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, 2182static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -2176,15 +2192,43 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2176 return err; 2192 return err;
2177} 2193}
2178 2194
2195static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2196 struct nfs_fsinfo *info, rpc_authflavor_t flavor)
2197{
2198 struct rpc_auth *auth;
2199 int ret;
2200
2201 auth = rpcauth_create(flavor, server->client);
2202 if (!auth) {
2203 ret = -EIO;
2204 goto out;
2205 }
2206 ret = nfs4_lookup_root(server, fhandle, info);
2207 if (ret < 0)
2208 ret = -EAGAIN;
2209out:
2210 return ret;
2211}
2212
2179/* 2213/*
2180 * get the file handle for the "/" directory on the server 2214 * get the file handle for the "/" directory on the server
2181 */ 2215 */
2182static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2216static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2183 struct nfs_fsinfo *info) 2217 struct nfs_fsinfo *info)
2184{ 2218{
2185 int status; 2219 int i, len, status = 0;
2220 rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS + 2];
2221
2222 flav_array[0] = RPC_AUTH_UNIX;
2223 len = gss_mech_list_pseudoflavors(&flav_array[1]);
2224 flav_array[1+len] = RPC_AUTH_NULL;
2225 len += 2;
2186 2226
2187 status = nfs4_lookup_root(server, fhandle, info); 2227 for (i = 0; i < len; i++) {
2228 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
2229 if (status == 0)
2230 break;
2231 }
2188 if (status == 0) 2232 if (status == 0)
2189 status = nfs4_server_capabilities(server, fhandle); 2233 status = nfs4_server_capabilities(server, fhandle);
2190 if (status == 0) 2234 if (status == 0)
@@ -2249,7 +2293,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
2249 }; 2293 };
2250 2294
2251 nfs_fattr_init(fattr); 2295 nfs_fattr_init(fattr);
2252 return nfs4_call_sync(server, &msg, &args, &res, 0); 2296 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2253} 2297}
2254 2298
2255static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2299static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
@@ -2309,9 +2353,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2309 return status; 2353 return status;
2310} 2354}
2311 2355
2312static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *dirfh, 2356static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server,
2313 const struct qstr *name, struct nfs_fh *fhandle, 2357 const struct nfs_fh *dirfh, const struct qstr *name,
2314 struct nfs_fattr *fattr) 2358 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2315{ 2359{
2316 int status; 2360 int status;
2317 struct nfs4_lookup_arg args = { 2361 struct nfs4_lookup_arg args = {
@@ -2333,7 +2377,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d
2333 nfs_fattr_init(fattr); 2377 nfs_fattr_init(fattr);
2334 2378
2335 dprintk("NFS call lookupfh %s\n", name->name); 2379 dprintk("NFS call lookupfh %s\n", name->name);
2336 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2380 status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
2337 dprintk("NFS reply lookupfh: %d\n", status); 2381 dprintk("NFS reply lookupfh: %d\n", status);
2338 return status; 2382 return status;
2339} 2383}
@@ -2345,7 +2389,7 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
2345 struct nfs4_exception exception = { }; 2389 struct nfs4_exception exception = { };
2346 int err; 2390 int err;
2347 do { 2391 do {
2348 err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); 2392 err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr);
2349 /* FIXME: !!!! */ 2393 /* FIXME: !!!! */
2350 if (err == -NFS4ERR_MOVED) { 2394 if (err == -NFS4ERR_MOVED) {
2351 err = -EREMOTE; 2395 err = -EREMOTE;
@@ -2356,27 +2400,41 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
2356 return err; 2400 return err;
2357} 2401}
2358 2402
2359static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, 2403static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
2360 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2404 const struct qstr *name, struct nfs_fh *fhandle,
2405 struct nfs_fattr *fattr)
2361{ 2406{
2362 int status; 2407 int status;
2363 2408
2364 dprintk("NFS call lookup %s\n", name->name); 2409 dprintk("NFS call lookup %s\n", name->name);
2365 status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); 2410 status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
2366 if (status == -NFS4ERR_MOVED) 2411 if (status == -NFS4ERR_MOVED)
2367 status = nfs4_get_referral(dir, name, fattr, fhandle); 2412 status = nfs4_get_referral(dir, name, fattr, fhandle);
2368 dprintk("NFS reply lookup: %d\n", status); 2413 dprintk("NFS reply lookup: %d\n", status);
2369 return status; 2414 return status;
2370} 2415}
2371 2416
2372static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2417void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh)
2418{
2419 memset(fh, 0, sizeof(struct nfs_fh));
2420 fattr->fsid.major = 1;
2421 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
2422 NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT;
2423 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
2424 fattr->nlink = 2;
2425}
2426
2427static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
2428 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2373{ 2429{
2374 struct nfs4_exception exception = { }; 2430 struct nfs4_exception exception = { };
2375 int err; 2431 int err;
2376 do { 2432 do {
2377 err = nfs4_handle_exception(NFS_SERVER(dir), 2433 err = nfs4_handle_exception(NFS_SERVER(dir),
2378 _nfs4_proc_lookup(dir, name, fhandle, fattr), 2434 _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr),
2379 &exception); 2435 &exception);
2436 if (err == -EPERM)
2437 nfs_fixup_secinfo_attributes(fattr, fhandle);
2380 } while (exception.retry); 2438 } while (exception.retry);
2381 return err; 2439 return err;
2382} 2440}
@@ -2421,7 +2479,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
2421 if (res.fattr == NULL) 2479 if (res.fattr == NULL)
2422 return -ENOMEM; 2480 return -ENOMEM;
2423 2481
2424 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2482 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2425 if (!status) { 2483 if (!status) {
2426 entry->mask = 0; 2484 entry->mask = 0;
2427 if (res.access & NFS4_ACCESS_READ) 2485 if (res.access & NFS4_ACCESS_READ)
@@ -2488,7 +2546,7 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
2488 .rpc_resp = &res, 2546 .rpc_resp = &res,
2489 }; 2547 };
2490 2548
2491 return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); 2549 return nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
2492} 2550}
2493 2551
2494static int nfs4_proc_readlink(struct inode *inode, struct page *page, 2552static int nfs4_proc_readlink(struct inode *inode, struct page *page,
@@ -2577,7 +2635,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2577 if (res.dir_attr == NULL) 2635 if (res.dir_attr == NULL)
2578 goto out; 2636 goto out;
2579 2637
2580 status = nfs4_call_sync(server, &msg, &args, &res, 1); 2638 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
2581 if (status == 0) { 2639 if (status == 0) {
2582 update_changeattr(dir, &res.cinfo); 2640 update_changeattr(dir, &res.cinfo);
2583 nfs_post_op_update_inode(dir, res.dir_attr); 2641 nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2678,7 +2736,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2678 if (res.old_fattr == NULL || res.new_fattr == NULL) 2736 if (res.old_fattr == NULL || res.new_fattr == NULL)
2679 goto out; 2737 goto out;
2680 2738
2681 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2739 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2682 if (!status) { 2740 if (!status) {
2683 update_changeattr(old_dir, &res.old_cinfo); 2741 update_changeattr(old_dir, &res.old_cinfo);
2684 nfs_post_op_update_inode(old_dir, res.old_fattr); 2742 nfs_post_op_update_inode(old_dir, res.old_fattr);
@@ -2729,7 +2787,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2729 if (res.fattr == NULL || res.dir_attr == NULL) 2787 if (res.fattr == NULL || res.dir_attr == NULL)
2730 goto out; 2788 goto out;
2731 2789
2732 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2790 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2733 if (!status) { 2791 if (!status) {
2734 update_changeattr(dir, &res.cinfo); 2792 update_changeattr(dir, &res.cinfo);
2735 nfs_post_op_update_inode(dir, res.dir_attr); 2793 nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2792,8 +2850,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
2792 2850
2793static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) 2851static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data)
2794{ 2852{
2795 int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg, 2853 int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
2796 &data->arg, &data->res, 1); 2854 &data->arg.seq_args, &data->res.seq_res, 1);
2797 if (status == 0) { 2855 if (status == 0) {
2798 update_changeattr(dir, &data->res.dir_cinfo); 2856 update_changeattr(dir, &data->res.dir_cinfo);
2799 nfs_post_op_update_inode(dir, data->res.dir_fattr); 2857 nfs_post_op_update_inode(dir, data->res.dir_fattr);
@@ -2905,7 +2963,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2905 (unsigned long long)cookie); 2963 (unsigned long long)cookie);
2906 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); 2964 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
2907 res.pgbase = args.pgbase; 2965 res.pgbase = args.pgbase;
2908 status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); 2966 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
2909 if (status >= 0) { 2967 if (status >= 0) {
2910 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 2968 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
2911 status += args.pgbase; 2969 status += args.pgbase;
@@ -2997,7 +3055,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
2997 }; 3055 };
2998 3056
2999 nfs_fattr_init(fsstat->fattr); 3057 nfs_fattr_init(fsstat->fattr);
3000 return nfs4_call_sync(server, &msg, &args, &res, 0); 3058 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3001} 3059}
3002 3060
3003static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) 3061static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
@@ -3028,7 +3086,7 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
3028 .rpc_resp = &res, 3086 .rpc_resp = &res,
3029 }; 3087 };
3030 3088
3031 return nfs4_call_sync(server, &msg, &args, &res, 0); 3089 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3032} 3090}
3033 3091
3034static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) 3092static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
@@ -3073,7 +3131,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
3073 } 3131 }
3074 3132
3075 nfs_fattr_init(pathconf->fattr); 3133 nfs_fattr_init(pathconf->fattr);
3076 return nfs4_call_sync(server, &msg, &args, &res, 0); 3134 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3077} 3135}
3078 3136
3079static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, 3137static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -3195,12 +3253,9 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3195 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 3253 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
3196} 3254}
3197 3255
3198static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) 3256static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3199{ 3257{
3200 struct inode *inode = data->inode; 3258 struct inode *inode = data->inode;
3201
3202 if (!nfs4_sequence_done(task, &data->res.seq_res))
3203 return -EAGAIN;
3204 3259
3205 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { 3260 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
3206 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3261 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
@@ -3210,11 +3265,24 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3210 return 0; 3265 return 0;
3211} 3266}
3212 3267
3268static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3269{
3270 if (!nfs4_sequence_done(task, &data->res.seq_res))
3271 return -EAGAIN;
3272 return data->write_done_cb(task, data);
3273}
3274
3213static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 3275static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
3214{ 3276{
3215 struct nfs_server *server = NFS_SERVER(data->inode); 3277 struct nfs_server *server = NFS_SERVER(data->inode);
3216 3278
3217 data->args.bitmask = server->cache_consistency_bitmask; 3279 if (data->lseg) {
3280 data->args.bitmask = NULL;
3281 data->res.fattr = NULL;
3282 } else
3283 data->args.bitmask = server->cache_consistency_bitmask;
3284 if (!data->write_done_cb)
3285 data->write_done_cb = nfs4_commit_done_cb;
3218 data->res.server = server; 3286 data->res.server = server;
3219 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3287 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3220} 3288}
@@ -3452,7 +3520,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3452 resp_buf = buf; 3520 resp_buf = buf;
3453 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); 3521 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
3454 } 3522 }
3455 ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); 3523 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
3456 if (ret) 3524 if (ret)
3457 goto out_free; 3525 goto out_free;
3458 if (res.acl_len > args.acl_len) 3526 if (res.acl_len > args.acl_len)
@@ -3527,7 +3595,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3527 if (i < 0) 3595 if (i < 0)
3528 return i; 3596 return i;
3529 nfs_inode_return_delegation(inode); 3597 nfs_inode_return_delegation(inode);
3530 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3598 ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
3531 3599
3532 /* 3600 /*
3533 * Free each page after tx, so the only ref left is 3601 * Free each page after tx, so the only ref left is
@@ -3890,7 +3958,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3890 lsp = request->fl_u.nfs4_fl.owner; 3958 lsp = request->fl_u.nfs4_fl.owner;
3891 arg.lock_owner.id = lsp->ls_id.id; 3959 arg.lock_owner.id = lsp->ls_id.id;
3892 arg.lock_owner.s_dev = server->s_dev; 3960 arg.lock_owner.s_dev = server->s_dev;
3893 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 3961 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
3894 switch (status) { 3962 switch (status) {
3895 case 0: 3963 case 0:
3896 request->fl_type = F_UNLCK; 3964 request->fl_type = F_UNLCK;
@@ -4618,12 +4686,46 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
4618 nfs_fattr_init(&fs_locations->fattr); 4686 nfs_fattr_init(&fs_locations->fattr);
4619 fs_locations->server = server; 4687 fs_locations->server = server;
4620 fs_locations->nlocations = 0; 4688 fs_locations->nlocations = 0;
4621 status = nfs4_call_sync(server, &msg, &args, &res, 0); 4689 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
4622 nfs_fixup_referral_attributes(&fs_locations->fattr); 4690 nfs_fixup_referral_attributes(&fs_locations->fattr);
4623 dprintk("%s: returned status = %d\n", __func__, status); 4691 dprintk("%s: returned status = %d\n", __func__, status);
4624 return status; 4692 return status;
4625} 4693}
4626 4694
4695static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
4696{
4697 int status;
4698 struct nfs4_secinfo_arg args = {
4699 .dir_fh = NFS_FH(dir),
4700 .name = name,
4701 };
4702 struct nfs4_secinfo_res res = {
4703 .flavors = flavors,
4704 };
4705 struct rpc_message msg = {
4706 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO],
4707 .rpc_argp = &args,
4708 .rpc_resp = &res,
4709 };
4710
4711 dprintk("NFS call secinfo %s\n", name->name);
4712 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
4713 dprintk("NFS reply secinfo: %d\n", status);
4714 return status;
4715}
4716
4717int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
4718{
4719 struct nfs4_exception exception = { };
4720 int err;
4721 do {
4722 err = nfs4_handle_exception(NFS_SERVER(dir),
4723 _nfs4_proc_secinfo(dir, name, flavors),
4724 &exception);
4725 } while (exception.retry);
4726 return err;
4727}
4728
4627#ifdef CONFIG_NFS_V4_1 4729#ifdef CONFIG_NFS_V4_1
4628/* 4730/*
4629 * Check the exchange flags returned by the server for invalid flags, having 4731 * Check the exchange flags returned by the server for invalid flags, having
@@ -5516,8 +5618,6 @@ static void nfs4_layoutget_release(void *calldata)
5516 struct nfs4_layoutget *lgp = calldata; 5618 struct nfs4_layoutget *lgp = calldata;
5517 5619
5518 dprintk("--> %s\n", __func__); 5620 dprintk("--> %s\n", __func__);
5519 if (lgp->res.layout.buf != NULL)
5520 free_page((unsigned long) lgp->res.layout.buf);
5521 put_nfs_open_context(lgp->args.ctx); 5621 put_nfs_open_context(lgp->args.ctx);
5522 kfree(calldata); 5622 kfree(calldata);
5523 dprintk("<-- %s\n", __func__); 5623 dprintk("<-- %s\n", __func__);
@@ -5549,12 +5649,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5549 5649
5550 dprintk("--> %s\n", __func__); 5650 dprintk("--> %s\n", __func__);
5551 5651
5552 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); 5652 lgp->res.layoutp = &lgp->args.layout;
5553 if (lgp->res.layout.buf == NULL) {
5554 nfs4_layoutget_release(lgp);
5555 return -ENOMEM;
5556 }
5557
5558 lgp->res.seq_res.sr_slot = NULL; 5653 lgp->res.seq_res.sr_slot = NULL;
5559 task = rpc_run_task(&task_setup_data); 5654 task = rpc_run_task(&task_setup_data);
5560 if (IS_ERR(task)) 5655 if (IS_ERR(task))
@@ -5586,7 +5681,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5586 int status; 5681 int status;
5587 5682
5588 dprintk("--> %s\n", __func__); 5683 dprintk("--> %s\n", __func__);
5589 status = nfs4_call_sync(server, &msg, &args, &res, 0); 5684 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
5590 dprintk("<-- %s status=%d\n", __func__, status); 5685 dprintk("<-- %s status=%d\n", __func__, status);
5591 5686
5592 return status; 5687 return status;
@@ -5606,6 +5701,100 @@ int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5606} 5701}
5607EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); 5702EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5608 5703
5704static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
5705{
5706 struct nfs4_layoutcommit_data *data = calldata;
5707 struct nfs_server *server = NFS_SERVER(data->args.inode);
5708
5709 if (nfs4_setup_sequence(server, &data->args.seq_args,
5710 &data->res.seq_res, 1, task))
5711 return;
5712 rpc_call_start(task);
5713}
5714
5715static void
5716nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
5717{
5718 struct nfs4_layoutcommit_data *data = calldata;
5719 struct nfs_server *server = NFS_SERVER(data->args.inode);
5720
5721 if (!nfs4_sequence_done(task, &data->res.seq_res))
5722 return;
5723
5724 switch (task->tk_status) { /* Just ignore these failures */
5725 case NFS4ERR_DELEG_REVOKED: /* layout was recalled */
5726 case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
5727 case NFS4ERR_BADLAYOUT: /* no layout */
5728 case NFS4ERR_GRACE: /* loca_recalim always false */
5729 task->tk_status = 0;
5730 }
5731
5732 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5733 nfs_restart_rpc(task, server->nfs_client);
5734 return;
5735 }
5736
5737 if (task->tk_status == 0)
5738 nfs_post_op_update_inode_force_wcc(data->args.inode,
5739 data->res.fattr);
5740}
5741
5742static void nfs4_layoutcommit_release(void *calldata)
5743{
5744 struct nfs4_layoutcommit_data *data = calldata;
5745
5746 /* Matched by references in pnfs_set_layoutcommit */
5747 put_lseg(data->lseg);
5748 put_rpccred(data->cred);
5749 kfree(data);
5750}
5751
5752static const struct rpc_call_ops nfs4_layoutcommit_ops = {
5753 .rpc_call_prepare = nfs4_layoutcommit_prepare,
5754 .rpc_call_done = nfs4_layoutcommit_done,
5755 .rpc_release = nfs4_layoutcommit_release,
5756};
5757
5758int
5759nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
5760{
5761 struct rpc_message msg = {
5762 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT],
5763 .rpc_argp = &data->args,
5764 .rpc_resp = &data->res,
5765 .rpc_cred = data->cred,
5766 };
5767 struct rpc_task_setup task_setup_data = {
5768 .task = &data->task,
5769 .rpc_client = NFS_CLIENT(data->args.inode),
5770 .rpc_message = &msg,
5771 .callback_ops = &nfs4_layoutcommit_ops,
5772 .callback_data = data,
5773 .flags = RPC_TASK_ASYNC,
5774 };
5775 struct rpc_task *task;
5776 int status = 0;
5777
5778 dprintk("NFS: %4d initiating layoutcommit call. sync %d "
5779 "lbw: %llu inode %lu\n",
5780 data->task.tk_pid, sync,
5781 data->args.lastbytewritten,
5782 data->args.inode->i_ino);
5783
5784 task = rpc_run_task(&task_setup_data);
5785 if (IS_ERR(task))
5786 return PTR_ERR(task);
5787 if (sync == false)
5788 goto out;
5789 status = nfs4_wait_for_completion_rpc_task(task);
5790 if (status != 0)
5791 goto out;
5792 status = task->tk_status;
5793out:
5794 dprintk("%s: status %d\n", __func__, status);
5795 rpc_put_task(task);
5796 return status;
5797}
5609#endif /* CONFIG_NFS_V4_1 */ 5798#endif /* CONFIG_NFS_V4_1 */
5610 5799
5611struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5800struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5741,6 +5930,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5741 .close_context = nfs4_close_context, 5930 .close_context = nfs4_close_context,
5742 .open_context = nfs4_atomic_open, 5931 .open_context = nfs4_atomic_open,
5743 .init_client = nfs4_init_client, 5932 .init_client = nfs4_init_client,
5933 .secinfo = nfs4_proc_secinfo,
5744}; 5934};
5745 5935
5746static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 5936static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0cf560f77884..dddfb5795d7b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -46,6 +46,7 @@
46#include <linux/kdev_t.h> 46#include <linux/kdev_t.h>
47#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
48#include <linux/sunrpc/msg_prot.h> 48#include <linux/sunrpc/msg_prot.h>
49#include <linux/sunrpc/gss_api.h>
49#include <linux/nfs.h> 50#include <linux/nfs.h>
50#include <linux/nfs4.h> 51#include <linux/nfs4.h>
51#include <linux/nfs_fs.h> 52#include <linux/nfs_fs.h>
@@ -112,7 +113,7 @@ static int nfs4_stat_to_errno(int);
112#define encode_restorefh_maxsz (op_encode_hdr_maxsz) 113#define encode_restorefh_maxsz (op_encode_hdr_maxsz)
113#define decode_restorefh_maxsz (op_decode_hdr_maxsz) 114#define decode_restorefh_maxsz (op_decode_hdr_maxsz)
114#define encode_fsinfo_maxsz (encode_getattr_maxsz) 115#define encode_fsinfo_maxsz (encode_getattr_maxsz)
115#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) 116#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 15)
116#define encode_renew_maxsz (op_encode_hdr_maxsz + 3) 117#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
117#define decode_renew_maxsz (op_decode_hdr_maxsz) 118#define decode_renew_maxsz (op_decode_hdr_maxsz)
118#define encode_setclientid_maxsz \ 119#define encode_setclientid_maxsz \
@@ -253,6 +254,8 @@ static int nfs4_stat_to_errno(int);
253 (encode_getattr_maxsz) 254 (encode_getattr_maxsz)
254#define decode_fs_locations_maxsz \ 255#define decode_fs_locations_maxsz \
255 (0) 256 (0)
257#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
258#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)))
256 259
257#if defined(CONFIG_NFS_V4_1) 260#if defined(CONFIG_NFS_V4_1)
258#define NFS4_MAX_MACHINE_NAME_LEN (64) 261#define NFS4_MAX_MACHINE_NAME_LEN (64)
@@ -324,6 +327,18 @@ static int nfs4_stat_to_errno(int);
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ 327#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \ 328 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) 329 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
330#define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
331 2 /* offset */ + \
332 2 /* length */ + \
333 1 /* reclaim */ + \
334 encode_stateid_maxsz + \
335 1 /* new offset (true) */ + \
336 2 /* last byte written */ + \
337 1 /* nt_timechanged (false) */ + \
338 1 /* layoutupdate4 layout type */ + \
339 1 /* NULL filelayout layoutupdate4 payload */)
340#define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
341
327#else /* CONFIG_NFS_V4_1 */ 342#else /* CONFIG_NFS_V4_1 */
328#define encode_sequence_maxsz 0 343#define encode_sequence_maxsz 0
329#define decode_sequence_maxsz 0 344#define decode_sequence_maxsz 0
@@ -676,6 +691,14 @@ static int nfs4_stat_to_errno(int);
676 decode_putfh_maxsz + \ 691 decode_putfh_maxsz + \
677 decode_lookup_maxsz + \ 692 decode_lookup_maxsz + \
678 decode_fs_locations_maxsz) 693 decode_fs_locations_maxsz)
694#define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \
695 encode_sequence_maxsz + \
696 encode_putfh_maxsz + \
697 encode_secinfo_maxsz)
698#define NFS4_dec_secinfo_sz (compound_decode_hdr_maxsz + \
699 decode_sequence_maxsz + \
700 decode_putfh_maxsz + \
701 decode_secinfo_maxsz)
679#if defined(CONFIG_NFS_V4_1) 702#if defined(CONFIG_NFS_V4_1)
680#define NFS4_enc_exchange_id_sz \ 703#define NFS4_enc_exchange_id_sz \
681 (compound_encode_hdr_maxsz + \ 704 (compound_encode_hdr_maxsz + \
@@ -727,6 +750,17 @@ static int nfs4_stat_to_errno(int);
727 decode_sequence_maxsz + \ 750 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \ 751 decode_putfh_maxsz + \
729 decode_layoutget_maxsz) 752 decode_layoutget_maxsz)
753#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
754 encode_sequence_maxsz +\
755 encode_putfh_maxsz + \
756 encode_layoutcommit_maxsz + \
757 encode_getattr_maxsz)
758#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
759 decode_sequence_maxsz + \
760 decode_putfh_maxsz + \
761 decode_layoutcommit_maxsz + \
762 decode_getattr_maxsz)
763
730 764
731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 765const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
732 compound_encode_hdr_maxsz + 766 compound_encode_hdr_maxsz +
@@ -1620,6 +1654,18 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
1620 hdr->replen += decode_delegreturn_maxsz; 1654 hdr->replen += decode_delegreturn_maxsz;
1621} 1655}
1622 1656
1657static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1658{
1659 int len = name->len;
1660 __be32 *p;
1661
1662 p = reserve_space(xdr, 8 + len);
1663 *p++ = cpu_to_be32(OP_SECINFO);
1664 xdr_encode_opaque(p, name->name, len);
1665 hdr->nops++;
1666 hdr->replen += decode_secinfo_maxsz;
1667}
1668
1623#if defined(CONFIG_NFS_V4_1) 1669#if defined(CONFIG_NFS_V4_1)
1624/* NFSv4.1 operations */ 1670/* NFSv4.1 operations */
1625static void encode_exchange_id(struct xdr_stream *xdr, 1671static void encode_exchange_id(struct xdr_stream *xdr,
@@ -1816,6 +1862,34 @@ encode_layoutget(struct xdr_stream *xdr,
1816 hdr->nops++; 1862 hdr->nops++;
1817 hdr->replen += decode_layoutget_maxsz; 1863 hdr->replen += decode_layoutget_maxsz;
1818} 1864}
1865
1866static int
1867encode_layoutcommit(struct xdr_stream *xdr,
1868 const struct nfs4_layoutcommit_args *args,
1869 struct compound_hdr *hdr)
1870{
1871 __be32 *p;
1872
1873 dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
1874 NFS_SERVER(args->inode)->pnfs_curr_ld->id);
1875
1876 p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE);
1877 *p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
1878 /* Only whole file layouts */
1879 p = xdr_encode_hyper(p, 0); /* offset */
1880 p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */
1881 *p++ = cpu_to_be32(0); /* reclaim */
1882 p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
1883 *p++ = cpu_to_be32(1); /* newoffset = TRUE */
1884 p = xdr_encode_hyper(p, args->lastbytewritten);
1885 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */
1886 *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
1887 *p++ = cpu_to_be32(0); /* no file layout payload */
1888
1889 hdr->nops++;
1890 hdr->replen += decode_layoutcommit_maxsz;
1891 return 0;
1892}
1819#endif /* CONFIG_NFS_V4_1 */ 1893#endif /* CONFIG_NFS_V4_1 */
1820 1894
1821/* 1895/*
@@ -2294,7 +2368,8 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2294 encode_sequence(xdr, &args->seq_args, &hdr); 2368 encode_sequence(xdr, &args->seq_args, &hdr);
2295 encode_putfh(xdr, args->fh, &hdr); 2369 encode_putfh(xdr, args->fh, &hdr);
2296 encode_commit(xdr, args, &hdr); 2370 encode_commit(xdr, args, &hdr);
2297 encode_getfattr(xdr, args->bitmask, &hdr); 2371 if (args->bitmask)
2372 encode_getfattr(xdr, args->bitmask, &hdr);
2298 encode_nops(&hdr); 2373 encode_nops(&hdr);
2299} 2374}
2300 2375
@@ -2465,6 +2540,24 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
2465 encode_nops(&hdr); 2540 encode_nops(&hdr);
2466} 2541}
2467 2542
2543/*
2544 * Encode SECINFO request
2545 */
2546static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
2547 struct xdr_stream *xdr,
2548 struct nfs4_secinfo_arg *args)
2549{
2550 struct compound_hdr hdr = {
2551 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2552 };
2553
2554 encode_compound_hdr(xdr, req, &hdr);
2555 encode_sequence(xdr, &args->seq_args, &hdr);
2556 encode_putfh(xdr, args->dir_fh, &hdr);
2557 encode_secinfo(xdr, args->name, &hdr);
2558 encode_nops(&hdr);
2559}
2560
2468#if defined(CONFIG_NFS_V4_1) 2561#if defined(CONFIG_NFS_V4_1)
2469/* 2562/*
2470 * EXCHANGE_ID request 2563 * EXCHANGE_ID request
@@ -2604,8 +2697,32 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
2604 encode_sequence(xdr, &args->seq_args, &hdr); 2697 encode_sequence(xdr, &args->seq_args, &hdr);
2605 encode_putfh(xdr, NFS_FH(args->inode), &hdr); 2698 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2606 encode_layoutget(xdr, args, &hdr); 2699 encode_layoutget(xdr, args, &hdr);
2700
2701 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
2702 args->layout.pages, 0, args->layout.pglen);
2703
2607 encode_nops(&hdr); 2704 encode_nops(&hdr);
2608} 2705}
2706
2707/*
2708 * Encode LAYOUTCOMMIT request
2709 */
2710static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
2711 struct xdr_stream *xdr,
2712 struct nfs4_layoutcommit_args *args)
2713{
2714 struct compound_hdr hdr = {
2715 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2716 };
2717
2718 encode_compound_hdr(xdr, req, &hdr);
2719 encode_sequence(xdr, &args->seq_args, &hdr);
2720 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2721 encode_layoutcommit(xdr, args, &hdr);
2722 encode_getfattr(xdr, args->bitmask, &hdr);
2723 encode_nops(&hdr);
2724 return 0;
2725}
2609#endif /* CONFIG_NFS_V4_1 */ 2726#endif /* CONFIG_NFS_V4_1 */
2610 2727
2611static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2728static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -2925,6 +3042,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
2925 if (unlikely(!p)) 3042 if (unlikely(!p))
2926 goto out_overflow; 3043 goto out_overflow;
2927 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; 3044 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
3045 return -be32_to_cpup(p);
2928 } 3046 }
2929 return 0; 3047 return 0;
2930out_overflow: 3048out_overflow:
@@ -3912,6 +4030,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3912 fattr->valid |= status; 4030 fattr->valid |= status;
3913 4031
3914 status = decode_attr_error(xdr, bitmap); 4032 status = decode_attr_error(xdr, bitmap);
4033 if (status == -NFS4ERR_WRONGSEC) {
4034 nfs_fixup_secinfo_attributes(fattr, fh);
4035 status = 0;
4036 }
3915 if (status < 0) 4037 if (status < 0)
3916 goto xdr_error; 4038 goto xdr_error;
3917 4039
@@ -4680,6 +4802,73 @@ static int decode_delegreturn(struct xdr_stream *xdr)
4680 return decode_op_hdr(xdr, OP_DELEGRETURN); 4802 return decode_op_hdr(xdr, OP_DELEGRETURN);
4681} 4803}
4682 4804
4805static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor)
4806{
4807 __be32 *p;
4808
4809 p = xdr_inline_decode(xdr, 4);
4810 if (unlikely(!p))
4811 goto out_overflow;
4812 flavor->gss.sec_oid4.len = be32_to_cpup(p);
4813 if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN)
4814 goto out_err;
4815
4816 p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len);
4817 if (unlikely(!p))
4818 goto out_overflow;
4819 memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len);
4820
4821 p = xdr_inline_decode(xdr, 8);
4822 if (unlikely(!p))
4823 goto out_overflow;
4824 flavor->gss.qop4 = be32_to_cpup(p++);
4825 flavor->gss.service = be32_to_cpup(p);
4826
4827 return 0;
4828
4829out_overflow:
4830 print_overflow_msg(__func__, xdr);
4831 return -EIO;
4832out_err:
4833 return -EINVAL;
4834}
4835
4836static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
4837{
4838 struct nfs4_secinfo_flavor *sec_flavor;
4839 int status;
4840 __be32 *p;
4841 int i;
4842
4843 status = decode_op_hdr(xdr, OP_SECINFO);
4844 p = xdr_inline_decode(xdr, 4);
4845 if (unlikely(!p))
4846 goto out_overflow;
4847 res->flavors->num_flavors = be32_to_cpup(p);
4848
4849 for (i = 0; i < res->flavors->num_flavors; i++) {
4850 sec_flavor = &res->flavors->flavors[i];
4851 if ((char *)&sec_flavor[1] - (char *)res > PAGE_SIZE)
4852 break;
4853
4854 p = xdr_inline_decode(xdr, 4);
4855 if (unlikely(!p))
4856 goto out_overflow;
4857 sec_flavor->flavor = be32_to_cpup(p);
4858
4859 if (sec_flavor->flavor == RPC_AUTH_GSS) {
4860 if (decode_secinfo_gss(xdr, sec_flavor))
4861 break;
4862 }
4863 }
4864
4865 return 0;
4866
4867out_overflow:
4868 print_overflow_msg(__func__, xdr);
4869 return -EIO;
4870}
4871
4683#if defined(CONFIG_NFS_V4_1) 4872#if defined(CONFIG_NFS_V4_1)
4684static int decode_exchange_id(struct xdr_stream *xdr, 4873static int decode_exchange_id(struct xdr_stream *xdr,
4685 struct nfs41_exchange_id_res *res) 4874 struct nfs41_exchange_id_res *res)
@@ -4950,6 +5139,9 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4950 __be32 *p; 5139 __be32 *p;
4951 int status; 5140 int status;
4952 u32 layout_count; 5141 u32 layout_count;
5142 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
5143 struct kvec *iov = rcvbuf->head;
5144 u32 hdrlen, recvd;
4953 5145
4954 status = decode_op_hdr(xdr, OP_LAYOUTGET); 5146 status = decode_op_hdr(xdr, OP_LAYOUTGET);
4955 if (status) 5147 if (status)
@@ -4966,17 +5158,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4966 return -EINVAL; 5158 return -EINVAL;
4967 } 5159 }
4968 5160
4969 p = xdr_inline_decode(xdr, 24); 5161 p = xdr_inline_decode(xdr, 28);
4970 if (unlikely(!p)) 5162 if (unlikely(!p))
4971 goto out_overflow; 5163 goto out_overflow;
4972 p = xdr_decode_hyper(p, &res->range.offset); 5164 p = xdr_decode_hyper(p, &res->range.offset);
4973 p = xdr_decode_hyper(p, &res->range.length); 5165 p = xdr_decode_hyper(p, &res->range.length);
4974 res->range.iomode = be32_to_cpup(p++); 5166 res->range.iomode = be32_to_cpup(p++);
4975 res->type = be32_to_cpup(p++); 5167 res->type = be32_to_cpup(p++);
4976 5168 res->layoutp->len = be32_to_cpup(p);
4977 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
4978 if (unlikely(status))
4979 return status;
4980 5169
4981 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", 5170 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
4982 __func__, 5171 __func__,
@@ -4984,12 +5173,18 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4984 (unsigned long)res->range.length, 5173 (unsigned long)res->range.length,
4985 res->range.iomode, 5174 res->range.iomode,
4986 res->type, 5175 res->type,
4987 res->layout.len); 5176 res->layoutp->len);
4988 5177
4989 /* nfs4_proc_layoutget allocated a single page */ 5178 hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
4990 if (res->layout.len > PAGE_SIZE) 5179 recvd = req->rq_rcv_buf.len - hdrlen;
4991 return -ENOMEM; 5180 if (res->layoutp->len > recvd) {
4992 memcpy(res->layout.buf, p, res->layout.len); 5181 dprintk("NFS: server cheating in layoutget reply: "
5182 "layout len %u > recvd %u\n",
5183 res->layoutp->len, recvd);
5184 return -EINVAL;
5185 }
5186
5187 xdr_read_pages(xdr, res->layoutp->len);
4993 5188
4994 if (layout_count > 1) { 5189 if (layout_count > 1) {
4995 /* We only handle a length one array at the moment. Any 5190 /* We only handle a length one array at the moment. Any
@@ -5006,6 +5201,35 @@ out_overflow:
5006 print_overflow_msg(__func__, xdr); 5201 print_overflow_msg(__func__, xdr);
5007 return -EIO; 5202 return -EIO;
5008} 5203}
5204
5205static int decode_layoutcommit(struct xdr_stream *xdr,
5206 struct rpc_rqst *req,
5207 struct nfs4_layoutcommit_res *res)
5208{
5209 __be32 *p;
5210 __u32 sizechanged;
5211 int status;
5212
5213 status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
5214 if (status)
5215 return status;
5216
5217 p = xdr_inline_decode(xdr, 4);
5218 if (unlikely(!p))
5219 goto out_overflow;
5220 sizechanged = be32_to_cpup(p);
5221
5222 if (sizechanged) {
5223 /* throw away new size */
5224 p = xdr_inline_decode(xdr, 8);
5225 if (unlikely(!p))
5226 goto out_overflow;
5227 }
5228 return 0;
5229out_overflow:
5230 print_overflow_msg(__func__, xdr);
5231 return -EIO;
5232}
5009#endif /* CONFIG_NFS_V4_1 */ 5233#endif /* CONFIG_NFS_V4_1 */
5010 5234
5011/* 5235/*
@@ -5723,8 +5947,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5723 status = decode_commit(xdr, res); 5947 status = decode_commit(xdr, res);
5724 if (status) 5948 if (status)
5725 goto out; 5949 goto out;
5726 decode_getfattr(xdr, res->fattr, res->server, 5950 if (res->fattr)
5727 !RPC_IS_ASYNC(rqstp->rq_task)); 5951 decode_getfattr(xdr, res->fattr, res->server,
5952 !RPC_IS_ASYNC(rqstp->rq_task));
5728out: 5953out:
5729 return status; 5954 return status;
5730} 5955}
@@ -5919,6 +6144,32 @@ out:
5919 return status; 6144 return status;
5920} 6145}
5921 6146
6147/*
6148 * Decode SECINFO response
6149 */
6150static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
6151 struct xdr_stream *xdr,
6152 struct nfs4_secinfo_res *res)
6153{
6154 struct compound_hdr hdr;
6155 int status;
6156
6157 status = decode_compound_hdr(xdr, &hdr);
6158 if (status)
6159 goto out;
6160 status = decode_sequence(xdr, &res->seq_res, rqstp);
6161 if (status)
6162 goto out;
6163 status = decode_putfh(xdr);
6164 if (status)
6165 goto out;
6166 status = decode_secinfo(xdr, res);
6167 if (status)
6168 goto out;
6169out:
6170 return status;
6171}
6172
5922#if defined(CONFIG_NFS_V4_1) 6173#if defined(CONFIG_NFS_V4_1)
5923/* 6174/*
5924 * Decode EXCHANGE_ID response 6175 * Decode EXCHANGE_ID response
@@ -6066,6 +6317,34 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
6066out: 6317out:
6067 return status; 6318 return status;
6068} 6319}
6320
6321/*
6322 * Decode LAYOUTCOMMIT response
6323 */
6324static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
6325 struct xdr_stream *xdr,
6326 struct nfs4_layoutcommit_res *res)
6327{
6328 struct compound_hdr hdr;
6329 int status;
6330
6331 status = decode_compound_hdr(xdr, &hdr);
6332 if (status)
6333 goto out;
6334 status = decode_sequence(xdr, &res->seq_res, rqstp);
6335 if (status)
6336 goto out;
6337 status = decode_putfh(xdr);
6338 if (status)
6339 goto out;
6340 status = decode_layoutcommit(xdr, rqstp, res);
6341 if (status)
6342 goto out;
6343 decode_getfattr(xdr, res->fattr, res->server,
6344 !RPC_IS_ASYNC(rqstp->rq_task));
6345out:
6346 return status;
6347}
6069#endif /* CONFIG_NFS_V4_1 */ 6348#endif /* CONFIG_NFS_V4_1 */
6070 6349
6071/** 6350/**
@@ -6180,10 +6459,6 @@ static struct {
6180 { NFS4ERR_SYMLINK, -ELOOP }, 6459 { NFS4ERR_SYMLINK, -ELOOP },
6181 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, 6460 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
6182 { NFS4ERR_DEADLOCK, -EDEADLK }, 6461 { NFS4ERR_DEADLOCK, -EDEADLK },
6183 { NFS4ERR_WRONGSEC, -EPERM }, /* FIXME: this needs
6184 * to be handled by a
6185 * middle-layer.
6186 */
6187 { -1, -EIO } 6462 { -1, -EIO }
6188}; 6463};
6189 6464
@@ -6258,6 +6533,7 @@ struct rpc_procinfo nfs4_procedures[] = {
6258 PROC(SETACL, enc_setacl, dec_setacl), 6533 PROC(SETACL, enc_setacl, dec_setacl),
6259 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), 6534 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
6260 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), 6535 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
6536 PROC(SECINFO, enc_secinfo, dec_secinfo),
6261#if defined(CONFIG_NFS_V4_1) 6537#if defined(CONFIG_NFS_V4_1)
6262 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), 6538 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
6263 PROC(CREATE_SESSION, enc_create_session, dec_create_session), 6539 PROC(CREATE_SESSION, enc_create_session, dec_create_session),
@@ -6267,6 +6543,7 @@ struct rpc_procinfo nfs4_procedures[] = {
6267 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6543 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6268 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), 6544 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6269 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6545 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
6546 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
6270#endif /* CONFIG_NFS_V4_1 */ 6547#endif /* CONFIG_NFS_V4_1 */
6271}; 6548};
6272 6549
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 23e794410669..87a593c2b055 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -223,6 +223,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
223 desc->pg_count = 0; 223 desc->pg_count = 0;
224 desc->pg_bsize = bsize; 224 desc->pg_bsize = bsize;
225 desc->pg_base = 0; 225 desc->pg_base = 0;
226 desc->pg_moreio = 0;
226 desc->pg_inode = inode; 227 desc->pg_inode = inode;
227 desc->pg_doio = doio; 228 desc->pg_doio = doio;
228 desc->pg_ioflags = io_flags; 229 desc->pg_ioflags = io_flags;
@@ -335,9 +336,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
335 struct nfs_page *req) 336 struct nfs_page *req)
336{ 337{
337 while (!nfs_pageio_do_add_request(desc, req)) { 338 while (!nfs_pageio_do_add_request(desc, req)) {
339 desc->pg_moreio = 1;
338 nfs_pageio_doio(desc); 340 nfs_pageio_doio(desc);
339 if (desc->pg_error < 0) 341 if (desc->pg_error < 0)
340 return 0; 342 return 0;
343 desc->pg_moreio = 0;
341 } 344 }
342 return 1; 345 return 1;
343} 346}
@@ -395,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi,
395 pgoff_t idx_end; 398 pgoff_t idx_end;
396 int found, i; 399 int found, i;
397 int res; 400 int res;
401 struct list_head *list;
398 402
399 res = 0; 403 res = 0;
400 if (npages == 0) 404 if (npages == 0)
@@ -415,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi,
415 idx_start = req->wb_index + 1; 419 idx_start = req->wb_index + 1;
416 if (nfs_set_page_tag_locked(req)) { 420 if (nfs_set_page_tag_locked(req)) {
417 kref_get(&req->wb_kref); 421 kref_get(&req->wb_kref);
418 nfs_list_remove_request(req);
419 radix_tree_tag_clear(&nfsi->nfs_page_tree, 422 radix_tree_tag_clear(&nfsi->nfs_page_tree,
420 req->wb_index, tag); 423 req->wb_index, tag);
421 nfs_list_add_request(req, dst); 424 list = pnfs_choose_commit_list(req, dst);
425 nfs_list_add_request(req, list);
422 res++; 426 res++;
423 if (res == INT_MAX) 427 if (res == INT_MAX)
424 goto out; 428 goto out;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f38813a0a295..d9ab97269ce6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -259,6 +259,7 @@ put_lseg(struct pnfs_layout_segment *lseg)
259 pnfs_free_lseg_list(&free_me); 259 pnfs_free_lseg_list(&free_me);
260 } 260 }
261} 261}
262EXPORT_SYMBOL_GPL(put_lseg);
262 263
263static bool 264static bool
264should_free_lseg(u32 lseg_iomode, u32 recall_iomode) 265should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
@@ -471,6 +472,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
471 struct nfs_server *server = NFS_SERVER(ino); 472 struct nfs_server *server = NFS_SERVER(ino);
472 struct nfs4_layoutget *lgp; 473 struct nfs4_layoutget *lgp;
473 struct pnfs_layout_segment *lseg = NULL; 474 struct pnfs_layout_segment *lseg = NULL;
475 struct page **pages = NULL;
476 int i;
477 u32 max_resp_sz, max_pages;
474 478
475 dprintk("--> %s\n", __func__); 479 dprintk("--> %s\n", __func__);
476 480
@@ -478,6 +482,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
478 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); 482 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
479 if (lgp == NULL) 483 if (lgp == NULL)
480 return NULL; 484 return NULL;
485
486 /* allocate pages for xdr post processing */
487 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
488 max_pages = max_resp_sz >> PAGE_SHIFT;
489
490 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
491 if (!pages)
492 goto out_err_free;
493
494 for (i = 0; i < max_pages; i++) {
495 pages[i] = alloc_page(GFP_KERNEL);
496 if (!pages[i])
497 goto out_err_free;
498 }
499
481 lgp->args.minlength = NFS4_MAX_UINT64; 500 lgp->args.minlength = NFS4_MAX_UINT64;
482 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 501 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
483 lgp->args.range.iomode = iomode; 502 lgp->args.range.iomode = iomode;
@@ -486,6 +505,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
486 lgp->args.type = server->pnfs_curr_ld->id; 505 lgp->args.type = server->pnfs_curr_ld->id;
487 lgp->args.inode = ino; 506 lgp->args.inode = ino;
488 lgp->args.ctx = get_nfs_open_context(ctx); 507 lgp->args.ctx = get_nfs_open_context(ctx);
508 lgp->args.layout.pages = pages;
509 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
489 lgp->lsegpp = &lseg; 510 lgp->lsegpp = &lseg;
490 511
491 /* Synchronously retrieve layout information from server and 512 /* Synchronously retrieve layout information from server and
@@ -496,7 +517,26 @@ send_layoutget(struct pnfs_layout_hdr *lo,
496 /* remember that LAYOUTGET failed and suspend trying */ 517 /* remember that LAYOUTGET failed and suspend trying */
497 set_bit(lo_fail_bit(iomode), &lo->plh_flags); 518 set_bit(lo_fail_bit(iomode), &lo->plh_flags);
498 } 519 }
520
521 /* free xdr pages */
522 for (i = 0; i < max_pages; i++)
523 __free_page(pages[i]);
524 kfree(pages);
525
499 return lseg; 526 return lseg;
527
528out_err_free:
529 /* free any allocated xdr pages, lgp as it's not used */
530 if (pages) {
531 for (i = 0; i < max_pages; i++) {
532 if (!pages[i])
533 break;
534 __free_page(pages[i]);
535 }
536 kfree(pages);
537 }
538 kfree(lgp);
539 return NULL;
500} 540}
501 541
502bool pnfs_roc(struct inode *ino) 542bool pnfs_roc(struct inode *ino)
@@ -945,3 +985,105 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
945 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 985 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
946 return trypnfs; 986 return trypnfs;
947} 987}
988
989/*
990 * Currently there is only one (whole file) write lseg.
991 */
992static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
993{
994 struct pnfs_layout_segment *lseg, *rv = NULL;
995
996 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
997 if (lseg->pls_range.iomode == IOMODE_RW)
998 rv = lseg;
999 return rv;
1000}
1001
1002void
1003pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1004{
1005 struct nfs_inode *nfsi = NFS_I(wdata->inode);
1006 loff_t end_pos = wdata->args.offset + wdata->res.count;
1007
1008 spin_lock(&nfsi->vfs_inode.i_lock);
1009 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1010 /* references matched in nfs4_layoutcommit_release */
1011 get_lseg(wdata->lseg);
1012 wdata->lseg->pls_lc_cred =
1013 get_rpccred(wdata->args.context->state->owner->so_cred);
1014 mark_inode_dirty_sync(wdata->inode);
1015 dprintk("%s: Set layoutcommit for inode %lu ",
1016 __func__, wdata->inode->i_ino);
1017 }
1018 if (end_pos > wdata->lseg->pls_end_pos)
1019 wdata->lseg->pls_end_pos = end_pos;
1020 spin_unlock(&nfsi->vfs_inode.i_lock);
1021}
1022EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1023
1024/*
1025 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
1026 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
1027 * data to disk to allow the server to recover the data if it crashes.
1028 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
1029 * is off, and a COMMIT is sent to a data server, or
1030 * if WRITEs to a data server return NFS_DATA_SYNC.
1031 */
1032int
1033pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1034{
1035 struct nfs4_layoutcommit_data *data;
1036 struct nfs_inode *nfsi = NFS_I(inode);
1037 struct pnfs_layout_segment *lseg;
1038 struct rpc_cred *cred;
1039 loff_t end_pos;
1040 int status = 0;
1041
1042 dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
1043
1044 if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
1045 return 0;
1046
1047 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
1048 data = kzalloc(sizeof(*data), GFP_NOFS);
1049 if (!data) {
1050 mark_inode_dirty_sync(inode);
1051 status = -ENOMEM;
1052 goto out;
1053 }
1054
1055 spin_lock(&inode->i_lock);
1056 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1057 spin_unlock(&inode->i_lock);
1058 kfree(data);
1059 goto out;
1060 }
1061 /*
1062 * Currently only one (whole file) write lseg which is referenced
1063 * in pnfs_set_layoutcommit and will be found.
1064 */
1065 lseg = pnfs_list_write_lseg(inode);
1066
1067 end_pos = lseg->pls_end_pos;
1068 cred = lseg->pls_lc_cred;
1069 lseg->pls_end_pos = 0;
1070 lseg->pls_lc_cred = NULL;
1071
1072 memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data,
1073 sizeof(nfsi->layout->plh_stateid.data));
1074 spin_unlock(&inode->i_lock);
1075
1076 data->args.inode = inode;
1077 data->lseg = lseg;
1078 data->cred = cred;
1079 nfs_fattr_init(&data->fattr);
1080 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
1081 data->res.fattr = &data->fattr;
1082 data->args.lastbytewritten = end_pos - 1;
1083 data->res.server = NFS_SERVER(inode);
1084
1085 status = nfs4_proc_layoutcommit(data, sync);
1086out:
1087 dprintk("<-- %s status %d\n", __func__, status);
1088 return status;
1089}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 6380b9405bcd..bc4827202e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -43,6 +43,8 @@ struct pnfs_layout_segment {
43 atomic_t pls_refcount; 43 atomic_t pls_refcount;
44 unsigned long pls_flags; 44 unsigned long pls_flags;
45 struct pnfs_layout_hdr *pls_layout; 45 struct pnfs_layout_hdr *pls_layout;
46 struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */
47 loff_t pls_end_pos; /* LAYOUTCOMMIT write end */
46}; 48};
47 49
48enum pnfs_try_status { 50enum pnfs_try_status {
@@ -74,6 +76,13 @@ struct pnfs_layoutdriver_type {
74 /* test for nfs page cache coalescing */ 76 /* test for nfs page cache coalescing */
75 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 77 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
76 78
79 /* Returns true if layoutdriver wants to divert this request to
80 * driver's commit routine.
81 */
82 bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg);
83 struct list_head * (*choose_commit_list) (struct nfs_page *req);
84 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
85
77 /* 86 /*
78 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 87 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
79 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS 88 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
@@ -100,7 +109,6 @@ struct pnfs_device {
100 unsigned int layout_type; 109 unsigned int layout_type;
101 unsigned int mincount; 110 unsigned int mincount;
102 struct page **pages; 111 struct page **pages;
103 void *area;
104 unsigned int pgbase; 112 unsigned int pgbase;
105 unsigned int pglen; 113 unsigned int pglen;
106}; 114};
@@ -145,7 +153,8 @@ bool pnfs_roc(struct inode *ino);
145void pnfs_roc_release(struct inode *ino); 153void pnfs_roc_release(struct inode *ino);
146void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 154void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
147bool pnfs_roc_drain(struct inode *ino, u32 *barrier); 155bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
148 156void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
157int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
149 158
150static inline int lo_fail_bit(u32 iomode) 159static inline int lo_fail_bit(u32 iomode)
151{ 160{
@@ -169,6 +178,51 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
169 return nfss->pnfs_curr_ld != NULL; 178 return nfss->pnfs_curr_ld != NULL;
170} 179}
171 180
181static inline void
182pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
183{
184 if (lseg) {
185 struct pnfs_layoutdriver_type *ld;
186
187 ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
188 if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) {
189 set_bit(PG_PNFS_COMMIT, &req->wb_flags);
190 req->wb_commit_lseg = get_lseg(lseg);
191 }
192 }
193}
194
195static inline int
196pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
197{
198 if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
199 return PNFS_NOT_ATTEMPTED;
200 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
201}
202
203static inline struct list_head *
204pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
205{
206 struct list_head *rv;
207
208 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) {
209 struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode;
210
211 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
212 rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req);
213 /* matched by ref taken when PG_PNFS_COMMIT is set */
214 put_lseg(req->wb_commit_lseg);
215 } else
216 rv = mds;
217 return rv;
218}
219
220static inline void pnfs_clear_request_commit(struct nfs_page *req)
221{
222 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags))
223 put_lseg(req->wb_commit_lseg);
224}
225
172#else /* CONFIG_NFS_V4_1 */ 226#else /* CONFIG_NFS_V4_1 */
173 227
174static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) 228static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -252,6 +306,31 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
252 pgio->pg_test = NULL; 306 pgio->pg_test = NULL;
253} 307}
254 308
309static inline void
310pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
311{
312}
313
314static inline int
315pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
316{
317 return PNFS_NOT_ATTEMPTED;
318}
319
320static inline struct list_head *
321pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
322{
323 return mds;
324}
325
326static inline void pnfs_clear_request_commit(struct nfs_page *req)
327{
328}
329
330static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
331{
332 return 0;
333}
255#endif /* CONFIG_NFS_V4_1 */ 334#endif /* CONFIG_NFS_V4_1 */
256 335
257#endif /* FS_NFS_PNFS_H */ 336#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b8ec170f2a0f..ac40b8535d7e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -177,7 +177,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
177} 177}
178 178
179static int 179static int
180nfs_proc_lookup(struct inode *dir, struct qstr *name, 180nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
181 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 181 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
182{ 182{
183 struct nfs_diropargs arg = { 183 struct nfs_diropargs arg = {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 47a3ad63e0d5..85d75254328e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -59,6 +59,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
59 } 59 }
60 return p; 60 return p;
61} 61}
62EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
62 63
63void nfs_commit_free(struct nfs_write_data *p) 64void nfs_commit_free(struct nfs_write_data *p)
64{ 65{
@@ -66,6 +67,7 @@ void nfs_commit_free(struct nfs_write_data *p)
66 kfree(p->pagevec); 67 kfree(p->pagevec);
67 mempool_free(p, nfs_commit_mempool); 68 mempool_free(p, nfs_commit_mempool);
68} 69}
70EXPORT_SYMBOL_GPL(nfs_commit_free);
69 71
70struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 72struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
71{ 73{
@@ -179,8 +181,8 @@ static int wb_priority(struct writeback_control *wbc)
179 if (wbc->for_reclaim) 181 if (wbc->for_reclaim)
180 return FLUSH_HIGHPRI | FLUSH_STABLE; 182 return FLUSH_HIGHPRI | FLUSH_STABLE;
181 if (wbc->for_kupdate || wbc->for_background) 183 if (wbc->for_kupdate || wbc->for_background)
182 return FLUSH_LOWPRI; 184 return FLUSH_LOWPRI | FLUSH_COND_STABLE;
183 return 0; 185 return FLUSH_COND_STABLE;
184} 186}
185 187
186/* 188/*
@@ -441,7 +443,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
441 * Add a request to the inode's commit list. 443 * Add a request to the inode's commit list.
442 */ 444 */
443static void 445static void
444nfs_mark_request_commit(struct nfs_page *req) 446nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
445{ 447{
446 struct inode *inode = req->wb_context->path.dentry->d_inode; 448 struct inode *inode = req->wb_context->path.dentry->d_inode;
447 struct nfs_inode *nfsi = NFS_I(inode); 449 struct nfs_inode *nfsi = NFS_I(inode);
@@ -453,6 +455,7 @@ nfs_mark_request_commit(struct nfs_page *req)
453 NFS_PAGE_TAG_COMMIT); 455 NFS_PAGE_TAG_COMMIT);
454 nfsi->ncommit++; 456 nfsi->ncommit++;
455 spin_unlock(&inode->i_lock); 457 spin_unlock(&inode->i_lock);
458 pnfs_mark_request_commit(req, lseg);
456 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 459 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
457 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 460 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
458 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 461 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
@@ -474,14 +477,18 @@ nfs_clear_request_commit(struct nfs_page *req)
474static inline 477static inline
475int nfs_write_need_commit(struct nfs_write_data *data) 478int nfs_write_need_commit(struct nfs_write_data *data)
476{ 479{
477 return data->verf.committed != NFS_FILE_SYNC; 480 if (data->verf.committed == NFS_DATA_SYNC)
481 return data->lseg == NULL;
482 else
483 return data->verf.committed != NFS_FILE_SYNC;
478} 484}
479 485
480static inline 486static inline
481int nfs_reschedule_unstable_write(struct nfs_page *req) 487int nfs_reschedule_unstable_write(struct nfs_page *req,
488 struct nfs_write_data *data)
482{ 489{
483 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { 490 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
484 nfs_mark_request_commit(req); 491 nfs_mark_request_commit(req, data->lseg);
485 return 1; 492 return 1;
486 } 493 }
487 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 494 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
@@ -492,7 +499,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
492} 499}
493#else 500#else
494static inline void 501static inline void
495nfs_mark_request_commit(struct nfs_page *req) 502nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
496{ 503{
497} 504}
498 505
@@ -509,7 +516,8 @@ int nfs_write_need_commit(struct nfs_write_data *data)
509} 516}
510 517
511static inline 518static inline
512int nfs_reschedule_unstable_write(struct nfs_page *req) 519int nfs_reschedule_unstable_write(struct nfs_page *req,
520 struct nfs_write_data *data)
513{ 521{
514 return 0; 522 return 0;
515} 523}
@@ -612,9 +620,11 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
612 } 620 }
613 621
614 if (nfs_clear_request_commit(req) && 622 if (nfs_clear_request_commit(req) &&
615 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, 623 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
616 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) 624 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
617 NFS_I(inode)->ncommit--; 625 NFS_I(inode)->ncommit--;
626 pnfs_clear_request_commit(req);
627 }
618 628
619 /* Okay, the request matches. Update the region */ 629 /* Okay, the request matches. Update the region */
620 if (offset < req->wb_offset) { 630 if (offset < req->wb_offset) {
@@ -762,11 +772,12 @@ int nfs_updatepage(struct file *file, struct page *page,
762 return status; 772 return status;
763} 773}
764 774
765static void nfs_writepage_release(struct nfs_page *req) 775static void nfs_writepage_release(struct nfs_page *req,
776 struct nfs_write_data *data)
766{ 777{
767 struct page *page = req->wb_page; 778 struct page *page = req->wb_page;
768 779
769 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) 780 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
770 nfs_inode_remove_request(req); 781 nfs_inode_remove_request(req);
771 nfs_clear_page_tag_locked(req); 782 nfs_clear_page_tag_locked(req);
772 nfs_end_page_writeback(page); 783 nfs_end_page_writeback(page);
@@ -863,7 +874,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
863 data->args.context = get_nfs_open_context(req->wb_context); 874 data->args.context = get_nfs_open_context(req->wb_context);
864 data->args.lock_context = req->wb_lock_context; 875 data->args.lock_context = req->wb_lock_context;
865 data->args.stable = NFS_UNSTABLE; 876 data->args.stable = NFS_UNSTABLE;
866 if (how & FLUSH_STABLE) { 877 if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
867 data->args.stable = NFS_DATA_SYNC; 878 data->args.stable = NFS_DATA_SYNC;
868 if (!nfs_need_commit(NFS_I(inode))) 879 if (!nfs_need_commit(NFS_I(inode)))
869 data->args.stable = NFS_FILE_SYNC; 880 data->args.stable = NFS_FILE_SYNC;
@@ -912,6 +923,12 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
912 923
913 nfs_list_remove_request(req); 924 nfs_list_remove_request(req);
914 925
926 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
927 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
928 desc->pg_count > wsize))
929 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
930
931
915 nbytes = desc->pg_count; 932 nbytes = desc->pg_count;
916 do { 933 do {
917 size_t len = min(nbytes, wsize); 934 size_t len = min(nbytes, wsize);
@@ -1002,6 +1019,10 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
1002 if ((!lseg) && list_is_singular(&data->pages)) 1019 if ((!lseg) && list_is_singular(&data->pages))
1003 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); 1020 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
1004 1021
1022 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1023 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
1024 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1025
1005 /* Set up the argument struct */ 1026 /* Set up the argument struct */
1006 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); 1027 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
1007out: 1028out:
@@ -1074,7 +1095,7 @@ static void nfs_writeback_release_partial(void *calldata)
1074 1095
1075out: 1096out:
1076 if (atomic_dec_and_test(&req->wb_complete)) 1097 if (atomic_dec_and_test(&req->wb_complete))
1077 nfs_writepage_release(req); 1098 nfs_writepage_release(req, data);
1078 nfs_writedata_release(calldata); 1099 nfs_writedata_release(calldata);
1079} 1100}
1080 1101
@@ -1141,7 +1162,7 @@ static void nfs_writeback_release_full(void *calldata)
1141 1162
1142 if (nfs_write_need_commit(data)) { 1163 if (nfs_write_need_commit(data)) {
1143 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1164 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1144 nfs_mark_request_commit(req); 1165 nfs_mark_request_commit(req, data->lseg);
1145 dprintk(" marked for commit\n"); 1166 dprintk(" marked for commit\n");
1146 goto next; 1167 goto next;
1147 } 1168 }
@@ -1251,57 +1272,82 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1251#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1272#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1252static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1273static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1253{ 1274{
1275 int ret;
1276
1254 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1277 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1255 return 1; 1278 return 1;
1256 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, 1279 if (!may_wait)
1257 NFS_INO_COMMIT, nfs_wait_bit_killable, 1280 return 0;
1258 TASK_KILLABLE)) 1281 ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
1259 return 1; 1282 NFS_INO_COMMIT,
1260 return 0; 1283 nfs_wait_bit_killable,
1284 TASK_KILLABLE);
1285 return (ret < 0) ? ret : 1;
1261} 1286}
1262 1287
1263static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1288void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1264{ 1289{
1265 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1290 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1266 smp_mb__after_clear_bit(); 1291 smp_mb__after_clear_bit();
1267 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1292 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1268} 1293}
1294EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1269 1295
1270 1296void nfs_commitdata_release(void *data)
1271static void nfs_commitdata_release(void *data)
1272{ 1297{
1273 struct nfs_write_data *wdata = data; 1298 struct nfs_write_data *wdata = data;
1274 1299
1300 put_lseg(wdata->lseg);
1275 put_nfs_open_context(wdata->args.context); 1301 put_nfs_open_context(wdata->args.context);
1276 nfs_commit_free(wdata); 1302 nfs_commit_free(wdata);
1277} 1303}
1304EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1278 1305
1279/* 1306int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1280 * Set up the argument/result storage required for the RPC call. 1307 const struct rpc_call_ops *call_ops,
1281 */ 1308 int how)
1282static int nfs_commit_rpcsetup(struct list_head *head,
1283 struct nfs_write_data *data,
1284 int how)
1285{ 1309{
1286 struct nfs_page *first = nfs_list_entry(head->next);
1287 struct inode *inode = first->wb_context->path.dentry->d_inode;
1288 int priority = flush_task_priority(how);
1289 struct rpc_task *task; 1310 struct rpc_task *task;
1311 int priority = flush_task_priority(how);
1290 struct rpc_message msg = { 1312 struct rpc_message msg = {
1291 .rpc_argp = &data->args, 1313 .rpc_argp = &data->args,
1292 .rpc_resp = &data->res, 1314 .rpc_resp = &data->res,
1293 .rpc_cred = first->wb_context->cred, 1315 .rpc_cred = data->cred,
1294 }; 1316 };
1295 struct rpc_task_setup task_setup_data = { 1317 struct rpc_task_setup task_setup_data = {
1296 .task = &data->task, 1318 .task = &data->task,
1297 .rpc_client = NFS_CLIENT(inode), 1319 .rpc_client = clnt,
1298 .rpc_message = &msg, 1320 .rpc_message = &msg,
1299 .callback_ops = &nfs_commit_ops, 1321 .callback_ops = call_ops,
1300 .callback_data = data, 1322 .callback_data = data,
1301 .workqueue = nfsiod_workqueue, 1323 .workqueue = nfsiod_workqueue,
1302 .flags = RPC_TASK_ASYNC, 1324 .flags = RPC_TASK_ASYNC,
1303 .priority = priority, 1325 .priority = priority,
1304 }; 1326 };
1327 /* Set up the initial task struct. */
1328 NFS_PROTO(data->inode)->commit_setup(data, &msg);
1329
1330 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
1331
1332 task = rpc_run_task(&task_setup_data);
1333 if (IS_ERR(task))
1334 return PTR_ERR(task);
1335 if (how & FLUSH_SYNC)
1336 rpc_wait_for_completion_task(task);
1337 rpc_put_task(task);
1338 return 0;
1339}
1340EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1341
1342/*
1343 * Set up the argument/result storage required for the RPC call.
1344 */
1345void nfs_init_commit(struct nfs_write_data *data,
1346 struct list_head *head,
1347 struct pnfs_layout_segment *lseg)
1348{
1349 struct nfs_page *first = nfs_list_entry(head->next);
1350 struct inode *inode = first->wb_context->path.dentry->d_inode;
1305 1351
1306 /* Set up the RPC argument and reply structs 1352 /* Set up the RPC argument and reply structs
1307 * NB: take care not to mess about with data->commit et al. */ 1353 * NB: take care not to mess about with data->commit et al. */
@@ -1309,7 +1355,9 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1309 list_splice_init(head, &data->pages); 1355 list_splice_init(head, &data->pages);
1310 1356
1311 data->inode = inode; 1357 data->inode = inode;
1312 data->cred = msg.rpc_cred; 1358 data->cred = first->wb_context->cred;
1359 data->lseg = lseg; /* reference transferred */
1360 data->mds_ops = &nfs_commit_ops;
1313 1361
1314 data->args.fh = NFS_FH(data->inode); 1362 data->args.fh = NFS_FH(data->inode);
1315 /* Note: we always request a commit of the entire inode */ 1363 /* Note: we always request a commit of the entire inode */
@@ -1320,20 +1368,25 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1320 data->res.fattr = &data->fattr; 1368 data->res.fattr = &data->fattr;
1321 data->res.verf = &data->verf; 1369 data->res.verf = &data->verf;
1322 nfs_fattr_init(&data->fattr); 1370 nfs_fattr_init(&data->fattr);
1371}
1372EXPORT_SYMBOL_GPL(nfs_init_commit);
1323 1373
1324 /* Set up the initial task struct. */ 1374void nfs_retry_commit(struct list_head *page_list,
1325 NFS_PROTO(inode)->commit_setup(data, &msg); 1375 struct pnfs_layout_segment *lseg)
1326 1376{
1327 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1377 struct nfs_page *req;
1328 1378
1329 task = rpc_run_task(&task_setup_data); 1379 while (!list_empty(page_list)) {
1330 if (IS_ERR(task)) 1380 req = nfs_list_entry(page_list->next);
1331 return PTR_ERR(task); 1381 nfs_list_remove_request(req);
1332 if (how & FLUSH_SYNC) 1382 nfs_mark_request_commit(req, lseg);
1333 rpc_wait_for_completion_task(task); 1383 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1334 rpc_put_task(task); 1384 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1335 return 0; 1385 BDI_RECLAIMABLE);
1386 nfs_clear_page_tag_locked(req);
1387 }
1336} 1388}
1389EXPORT_SYMBOL_GPL(nfs_retry_commit);
1337 1390
1338/* 1391/*
1339 * Commit dirty pages 1392 * Commit dirty pages
@@ -1342,7 +1395,6 @@ static int
1342nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1395nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1343{ 1396{
1344 struct nfs_write_data *data; 1397 struct nfs_write_data *data;
1345 struct nfs_page *req;
1346 1398
1347 data = nfs_commitdata_alloc(); 1399 data = nfs_commitdata_alloc();
1348 1400
@@ -1350,17 +1402,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1350 goto out_bad; 1402 goto out_bad;
1351 1403
1352 /* Set up the argument struct */ 1404 /* Set up the argument struct */
1353 return nfs_commit_rpcsetup(head, data, how); 1405 nfs_init_commit(data, head, NULL);
1406 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
1354 out_bad: 1407 out_bad:
1355 while (!list_empty(head)) { 1408 nfs_retry_commit(head, NULL);
1356 req = nfs_list_entry(head->next);
1357 nfs_list_remove_request(req);
1358 nfs_mark_request_commit(req);
1359 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1360 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1361 BDI_RECLAIMABLE);
1362 nfs_clear_page_tag_locked(req);
1363 }
1364 nfs_commit_clear_lock(NFS_I(inode)); 1409 nfs_commit_clear_lock(NFS_I(inode));
1365 return -ENOMEM; 1410 return -ENOMEM;
1366} 1411}
@@ -1380,10 +1425,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1380 return; 1425 return;
1381} 1426}
1382 1427
1383static void nfs_commit_release(void *calldata) 1428void nfs_commit_release_pages(struct nfs_write_data *data)
1384{ 1429{
1385 struct nfs_write_data *data = calldata; 1430 struct nfs_page *req;
1386 struct nfs_page *req;
1387 int status = data->task.tk_status; 1431 int status = data->task.tk_status;
1388 1432
1389 while (!list_empty(&data->pages)) { 1433 while (!list_empty(&data->pages)) {
@@ -1417,6 +1461,14 @@ static void nfs_commit_release(void *calldata)
1417 next: 1461 next:
1418 nfs_clear_page_tag_locked(req); 1462 nfs_clear_page_tag_locked(req);
1419 } 1463 }
1464}
1465EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1466
1467static void nfs_commit_release(void *calldata)
1468{
1469 struct nfs_write_data *data = calldata;
1470
1471 nfs_commit_release_pages(data);
1420 nfs_commit_clear_lock(NFS_I(data->inode)); 1472 nfs_commit_clear_lock(NFS_I(data->inode));
1421 nfs_commitdata_release(calldata); 1473 nfs_commitdata_release(calldata);
1422} 1474}
@@ -1433,23 +1485,30 @@ int nfs_commit_inode(struct inode *inode, int how)
1433{ 1485{
1434 LIST_HEAD(head); 1486 LIST_HEAD(head);
1435 int may_wait = how & FLUSH_SYNC; 1487 int may_wait = how & FLUSH_SYNC;
1436 int res = 0; 1488 int res;
1437 1489
1438 if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) 1490 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1491 if (res <= 0)
1439 goto out_mark_dirty; 1492 goto out_mark_dirty;
1440 spin_lock(&inode->i_lock); 1493 spin_lock(&inode->i_lock);
1441 res = nfs_scan_commit(inode, &head, 0, 0); 1494 res = nfs_scan_commit(inode, &head, 0, 0);
1442 spin_unlock(&inode->i_lock); 1495 spin_unlock(&inode->i_lock);
1443 if (res) { 1496 if (res) {
1444 int error = nfs_commit_list(inode, &head, how); 1497 int error;
1498
1499 error = pnfs_commit_list(inode, &head, how);
1500 if (error == PNFS_NOT_ATTEMPTED)
1501 error = nfs_commit_list(inode, &head, how);
1445 if (error < 0) 1502 if (error < 0)
1446 return error; 1503 return error;
1447 if (may_wait) 1504 if (!may_wait)
1448 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
1449 nfs_wait_bit_killable,
1450 TASK_KILLABLE);
1451 else
1452 goto out_mark_dirty; 1505 goto out_mark_dirty;
1506 error = wait_on_bit(&NFS_I(inode)->flags,
1507 NFS_INO_COMMIT,
1508 nfs_wait_bit_killable,
1509 TASK_KILLABLE);
1510 if (error < 0)
1511 return error;
1453 } else 1512 } else
1454 nfs_commit_clear_lock(NFS_I(inode)); 1513 nfs_commit_clear_lock(NFS_I(inode));
1455 return res; 1514 return res;
@@ -1503,7 +1562,22 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1503 1562
1504int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1563int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1505{ 1564{
1506 return nfs_commit_unstable_pages(inode, wbc); 1565 int ret;
1566
1567 ret = nfs_commit_unstable_pages(inode, wbc);
1568 if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
1569 int status;
1570 bool sync = true;
1571
1572 if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking ||
1573 wbc->for_background)
1574 sync = false;
1575
1576 status = pnfs_layoutcommit_inode(inode, sync);
1577 if (status < 0)
1578 return status;
1579 }
1580 return ret;
1507} 1581}
1508 1582
1509/* 1583/*
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 84c27d69d421..ec0f277be7f5 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -117,7 +117,6 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
117 * invoked in contexts where a memory allocation failure is 117 * invoked in contexts where a memory allocation failure is
118 * fatal. Fortunately this fake ACL is small enough to 118 * fatal. Fortunately this fake ACL is small enough to
119 * construct on the stack. */ 119 * construct on the stack. */
120 memset(acl2, 0, sizeof(acl2));
121 posix_acl_init(acl2, 4); 120 posix_acl_init(acl2, 4);
122 121
123 /* Insert entries in canonical order: other orders seem 122 /* Insert entries in canonical order: other orders seem