aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/nfs/dir.c83
-rw-r--r--fs/nfs/file.c3
-rw-r--r--fs/nfs/getroot.c4
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h14
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4filelayout.c352
-rw-r--r--fs/nfs/nfs4filelayout.h2
-rw-r--r--fs/nfs/nfs4filelayoutdev.c178
-rw-r--r--fs/nfs/nfs4proc.c125
-rw-r--r--fs/nfs/nfs4xdr.c167
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/pnfs.c142
-rw-r--r--fs/nfs/pnfs.h83
-rw-r--r--fs/nfs/write.c214
-rw-r--r--fs/nfs_common/nfsacl.c1
-rw-r--r--include/linux/nfs4.h1
-rw-r--r--include/linux/nfs_fs.h10
-rw-r--r--include/linux/nfs_page.h7
-rw-r--r--include/linux/nfs_xdr.h30
-rw-r--r--net/sunrpc/xprtsock.c2
21 files changed, 1237 insertions, 193 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a83cd0d9dfab..7237672216c8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -44,6 +44,7 @@
44/* #define NFS_DEBUG_VERBOSE 1 */ 44/* #define NFS_DEBUG_VERBOSE 1 */
45 45
46static int nfs_opendir(struct inode *, struct file *); 46static int nfs_opendir(struct inode *, struct file *);
47static int nfs_closedir(struct inode *, struct file *);
47static int nfs_readdir(struct file *, void *, filldir_t); 48static int nfs_readdir(struct file *, void *, filldir_t);
48static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); 49static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
49static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); 50static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
@@ -64,7 +65,7 @@ const struct file_operations nfs_dir_operations = {
64 .read = generic_read_dir, 65 .read = generic_read_dir,
65 .readdir = nfs_readdir, 66 .readdir = nfs_readdir,
66 .open = nfs_opendir, 67 .open = nfs_opendir,
67 .release = nfs_release, 68 .release = nfs_closedir,
68 .fsync = nfs_fsync_dir, 69 .fsync = nfs_fsync_dir,
69}; 70};
70 71
@@ -133,13 +134,35 @@ const struct inode_operations nfs4_dir_inode_operations = {
133 134
134#endif /* CONFIG_NFS_V4 */ 135#endif /* CONFIG_NFS_V4 */
135 136
137static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred)
138{
139 struct nfs_open_dir_context *ctx;
140 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
141 if (ctx != NULL) {
142 ctx->duped = 0;
143 ctx->dir_cookie = 0;
144 ctx->dup_cookie = 0;
145 ctx->cred = get_rpccred(cred);
146 } else
147 ctx = ERR_PTR(-ENOMEM);
148 return ctx;
149}
150
151static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx)
152{
153 put_rpccred(ctx->cred);
154 kfree(ctx);
155}
156
136/* 157/*
137 * Open file 158 * Open file
138 */ 159 */
139static int 160static int
140nfs_opendir(struct inode *inode, struct file *filp) 161nfs_opendir(struct inode *inode, struct file *filp)
141{ 162{
142 int res; 163 int res = 0;
164 struct nfs_open_dir_context *ctx;
165 struct rpc_cred *cred;
143 166
144 dfprintk(FILE, "NFS: open dir(%s/%s)\n", 167 dfprintk(FILE, "NFS: open dir(%s/%s)\n",
145 filp->f_path.dentry->d_parent->d_name.name, 168 filp->f_path.dentry->d_parent->d_name.name,
@@ -147,8 +170,15 @@ nfs_opendir(struct inode *inode, struct file *filp)
147 170
148 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 171 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
149 172
150 /* Call generic open code in order to cache credentials */ 173 cred = rpc_lookup_cred();
151 res = nfs_open(inode, filp); 174 if (IS_ERR(cred))
175 return PTR_ERR(cred);
176 ctx = alloc_nfs_open_dir_context(cred);
177 if (IS_ERR(ctx)) {
178 res = PTR_ERR(ctx);
179 goto out;
180 }
181 filp->private_data = ctx;
152 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { 182 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
153 /* This is a mountpoint, so d_revalidate will never 183 /* This is a mountpoint, so d_revalidate will never
154 * have been called, so we need to refresh the 184 * have been called, so we need to refresh the
@@ -156,9 +186,18 @@ nfs_opendir(struct inode *inode, struct file *filp)
156 */ 186 */
157 __nfs_revalidate_inode(NFS_SERVER(inode), inode); 187 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
158 } 188 }
189out:
190 put_rpccred(cred);
159 return res; 191 return res;
160} 192}
161 193
194static int
195nfs_closedir(struct inode *inode, struct file *filp)
196{
197 put_nfs_open_dir_context(filp->private_data);
198 return 0;
199}
200
162struct nfs_cache_array_entry { 201struct nfs_cache_array_entry {
163 u64 cookie; 202 u64 cookie;
164 u64 ino; 203 u64 ino;
@@ -284,19 +323,20 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
284{ 323{
285 loff_t diff = desc->file->f_pos - desc->current_index; 324 loff_t diff = desc->file->f_pos - desc->current_index;
286 unsigned int index; 325 unsigned int index;
326 struct nfs_open_dir_context *ctx = desc->file->private_data;
287 327
288 if (diff < 0) 328 if (diff < 0)
289 goto out_eof; 329 goto out_eof;
290 if (diff >= array->size) { 330 if (diff >= array->size) {
291 if (array->eof_index >= 0) 331 if (array->eof_index >= 0)
292 goto out_eof; 332 goto out_eof;
293 desc->current_index += array->size;
294 return -EAGAIN; 333 return -EAGAIN;
295 } 334 }
296 335
297 index = (unsigned int)diff; 336 index = (unsigned int)diff;
298 *desc->dir_cookie = array->array[index].cookie; 337 *desc->dir_cookie = array->array[index].cookie;
299 desc->cache_entry_index = index; 338 desc->cache_entry_index = index;
339 ctx->duped = 0;
300 return 0; 340 return 0;
301out_eof: 341out_eof:
302 desc->eof = 1; 342 desc->eof = 1;
@@ -307,10 +347,18 @@ static
307int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) 347int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
308{ 348{
309 int i; 349 int i;
350 loff_t new_pos;
310 int status = -EAGAIN; 351 int status = -EAGAIN;
352 struct nfs_open_dir_context *ctx = desc->file->private_data;
311 353
312 for (i = 0; i < array->size; i++) { 354 for (i = 0; i < array->size; i++) {
313 if (array->array[i].cookie == *desc->dir_cookie) { 355 if (array->array[i].cookie == *desc->dir_cookie) {
356 new_pos = desc->current_index + i;
357 if (new_pos < desc->file->f_pos) {
358 ctx->dup_cookie = *desc->dir_cookie;
359 ctx->duped = 1;
360 }
361 desc->file->f_pos = new_pos;
314 desc->cache_entry_index = i; 362 desc->cache_entry_index = i;
315 return 0; 363 return 0;
316 } 364 }
@@ -342,6 +390,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
342 390
343 if (status == -EAGAIN) { 391 if (status == -EAGAIN) {
344 desc->last_cookie = array->last_cookie; 392 desc->last_cookie = array->last_cookie;
393 desc->current_index += array->size;
345 desc->page_index++; 394 desc->page_index++;
346 } 395 }
347 nfs_readdir_release_array(desc->page); 396 nfs_readdir_release_array(desc->page);
@@ -354,7 +403,8 @@ static
354int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, 403int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
355 struct nfs_entry *entry, struct file *file, struct inode *inode) 404 struct nfs_entry *entry, struct file *file, struct inode *inode)
356{ 405{
357 struct rpc_cred *cred = nfs_file_cred(file); 406 struct nfs_open_dir_context *ctx = file->private_data;
407 struct rpc_cred *cred = ctx->cred;
358 unsigned long timestamp, gencount; 408 unsigned long timestamp, gencount;
359 int error; 409 int error;
360 410
@@ -693,6 +743,20 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
693 int i = 0; 743 int i = 0;
694 int res = 0; 744 int res = 0;
695 struct nfs_cache_array *array = NULL; 745 struct nfs_cache_array *array = NULL;
746 struct nfs_open_dir_context *ctx = file->private_data;
747
748 if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) {
749 if (printk_ratelimit()) {
750 pr_notice("NFS: directory %s/%s contains a readdir loop. "
751 "Please contact your server vendor. "
752 "Offending cookie: %llu\n",
753 file->f_dentry->d_parent->d_name.name,
754 file->f_dentry->d_name.name,
755 *desc->dir_cookie);
756 }
757 res = -ELOOP;
758 goto out;
759 }
696 760
697 array = nfs_readdir_get_array(desc->page); 761 array = nfs_readdir_get_array(desc->page);
698 if (IS_ERR(array)) { 762 if (IS_ERR(array)) {
@@ -785,6 +849,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
785 struct inode *inode = dentry->d_inode; 849 struct inode *inode = dentry->d_inode;
786 nfs_readdir_descriptor_t my_desc, 850 nfs_readdir_descriptor_t my_desc,
787 *desc = &my_desc; 851 *desc = &my_desc;
852 struct nfs_open_dir_context *dir_ctx = filp->private_data;
788 int res; 853 int res;
789 854
790 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 855 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -801,7 +866,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
801 memset(desc, 0, sizeof(*desc)); 866 memset(desc, 0, sizeof(*desc));
802 867
803 desc->file = filp; 868 desc->file = filp;
804 desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; 869 desc->dir_cookie = &dir_ctx->dir_cookie;
805 desc->decode = NFS_PROTO(inode)->decode_dirent; 870 desc->decode = NFS_PROTO(inode)->decode_dirent;
806 desc->plus = NFS_USE_READDIRPLUS(inode); 871 desc->plus = NFS_USE_READDIRPLUS(inode);
807 872
@@ -853,6 +918,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
853{ 918{
854 struct dentry *dentry = filp->f_path.dentry; 919 struct dentry *dentry = filp->f_path.dentry;
855 struct inode *inode = dentry->d_inode; 920 struct inode *inode = dentry->d_inode;
921 struct nfs_open_dir_context *dir_ctx = filp->private_data;
856 922
857 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", 923 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
858 dentry->d_parent->d_name.name, 924 dentry->d_parent->d_name.name,
@@ -872,7 +938,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
872 } 938 }
873 if (offset != filp->f_pos) { 939 if (offset != filp->f_pos) {
874 filp->f_pos = offset; 940 filp->f_pos = offset;
875 nfs_file_open_context(filp)->dir_cookie = 0; 941 dir_ctx->dir_cookie = 0;
942 dir_ctx->duped = 0;
876 } 943 }
877out: 944out:
878 mutex_unlock(&inode->i_mutex); 945 mutex_unlock(&inode->i_mutex);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d85a534b15cd..3ac5bd695e5e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -326,6 +326,9 @@ nfs_file_fsync(struct file *file, int datasync)
326 ret = xchg(&ctx->error, 0); 326 ret = xchg(&ctx->error, 0);
327 if (!ret && status < 0) 327 if (!ret && status < 0)
328 ret = status; 328 ret = status;
329 if (!ret && !datasync)
330 /* application has asked for meta-data sync */
331 ret = pnfs_layoutcommit_inode(inode, true);
329 return ret; 332 return ret;
330} 333}
331 334
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 1084792bc0fe..dcb61548887f 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -222,6 +222,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
222 goto out; 222 goto out;
223 } 223 }
224 224
225 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
226 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
227 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
228
225 inode = nfs_fhget(sb, mntfh, fattr); 229 inode = nfs_fhget(sb, mntfh, fattr);
226 if (IS_ERR(inode)) { 230 if (IS_ERR(inode)) {
227 dprintk("nfs_get_root: get root inode failed\n"); 231 dprintk("nfs_get_root: get root inode failed\n");
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 058d7d63e566..57bb31ad7a5e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -641,7 +641,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cr
641 ctx->mode = f_mode; 641 ctx->mode = f_mode;
642 ctx->flags = 0; 642 ctx->flags = 0;
643 ctx->error = 0; 643 ctx->error = 0;
644 ctx->dir_cookie = 0;
645 nfs_init_lock_context(&ctx->lock_context); 644 nfs_init_lock_context(&ctx->lock_context);
646 ctx->lock_context.open_context = ctx; 645 ctx->lock_context.open_context = ctx;
647 INIT_LIST_HEAD(&ctx->list); 646 INIT_LIST_HEAD(&ctx->list);
@@ -1473,6 +1472,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1473 nfsi->delegation_state = 0; 1472 nfsi->delegation_state = 0;
1474 init_rwsem(&nfsi->rwsem); 1473 init_rwsem(&nfsi->rwsem);
1475 nfsi->layout = NULL; 1474 nfsi->layout = NULL;
1475 atomic_set(&nfsi->commits_outstanding, 0);
1476#endif 1476#endif
1477} 1477}
1478 1478
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 345d86b90e26..ce118ce885dd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -283,11 +283,25 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
283extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 283extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
284 284
285/* write.c */ 285/* write.c */
286extern void nfs_commit_free(struct nfs_write_data *p);
286extern int nfs_initiate_write(struct nfs_write_data *data, 287extern int nfs_initiate_write(struct nfs_write_data *data,
287 struct rpc_clnt *clnt, 288 struct rpc_clnt *clnt,
288 const struct rpc_call_ops *call_ops, 289 const struct rpc_call_ops *call_ops,
289 int how); 290 int how);
290extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 291extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
292extern int nfs_initiate_commit(struct nfs_write_data *data,
293 struct rpc_clnt *clnt,
294 const struct rpc_call_ops *call_ops,
295 int how);
296extern void nfs_init_commit(struct nfs_write_data *data,
297 struct list_head *head,
298 struct pnfs_layout_segment *lseg);
299void nfs_retry_commit(struct list_head *page_list,
300 struct pnfs_layout_segment *lseg);
301void nfs_commit_clear_lock(struct nfs_inode *nfsi);
302void nfs_commitdata_release(void *data);
303void nfs_commit_release_pages(struct nfs_write_data *data);
304
291#ifdef CONFIG_MIGRATION 305#ifdef CONFIG_MIGRATION
292extern int nfs_migrate_page(struct address_space *, 306extern int nfs_migrate_page(struct address_space *,
293 struct page *, struct page *); 307 struct page *, struct page *);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 72e5f1a2883e..e1c261ddd65d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -263,6 +263,8 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *);
263extern int nfs4_init_session(struct nfs_server *server); 263extern int nfs4_init_session(struct nfs_server *server);
264extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 264extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
265 struct nfs_fsinfo *fsinfo); 265 struct nfs_fsinfo *fsinfo);
266extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
267 bool sync);
266 268
267static inline bool 269static inline bool
268is_ds_only_client(struct nfs_client *clp) 270is_ds_only_client(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 428558464817..6f8192f4cfc7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -154,6 +154,23 @@ static int filelayout_read_done_cb(struct rpc_task *task,
154} 154}
155 155
156/* 156/*
157 * We reference the rpc_cred of the first WRITE that triggers the need for
158 * a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
159 * rfc5661 is not clear about which credential should be used.
160 */
161static void
162filelayout_set_layoutcommit(struct nfs_write_data *wdata)
163{
164 if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
165 wdata->res.verf->committed == NFS_FILE_SYNC)
166 return;
167
168 pnfs_set_layoutcommit(wdata);
169 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
170 (unsigned long) wdata->lseg->pls_end_pos);
171}
172
173/*
157 * Call ops for the async read/write cases 174 * Call ops for the async read/write cases
158 * In the case of dense layouts, the offset needs to be reset to its 175 * In the case of dense layouts, the offset needs to be reset to its
159 * original value. 176 * original value.
@@ -210,6 +227,38 @@ static int filelayout_write_done_cb(struct rpc_task *task,
210 return -EAGAIN; 227 return -EAGAIN;
211 } 228 }
212 229
230 filelayout_set_layoutcommit(data);
231 return 0;
232}
233
234/* Fake up some data that will cause nfs_commit_release to retry the writes. */
235static void prepare_to_resend_writes(struct nfs_write_data *data)
236{
237 struct nfs_page *first = nfs_list_entry(data->pages.next);
238
239 data->task.tk_status = 0;
240 memcpy(data->verf.verifier, first->wb_verf.verifier,
241 sizeof(first->wb_verf.verifier));
242 data->verf.verifier[0]++; /* ensure verifier mismatch */
243}
244
245static int filelayout_commit_done_cb(struct rpc_task *task,
246 struct nfs_write_data *data)
247{
248 int reset = 0;
249
250 if (filelayout_async_handle_error(task, data->args.context->state,
251 data->ds_clp, &reset) == -EAGAIN) {
252 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
253 __func__, data->ds_clp, data->ds_clp->cl_session);
254 if (reset) {
255 prepare_to_resend_writes(data);
256 filelayout_set_lo_fail(data->lseg);
257 } else
258 nfs_restart_rpc(task, data->ds_clp);
259 return -EAGAIN;
260 }
261
213 return 0; 262 return 0;
214} 263}
215 264
@@ -240,6 +289,16 @@ static void filelayout_write_release(void *data)
240 wdata->mds_ops->rpc_release(data); 289 wdata->mds_ops->rpc_release(data);
241} 290}
242 291
292static void filelayout_commit_release(void *data)
293{
294 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
295
296 nfs_commit_release_pages(wdata);
297 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
298 nfs_commit_clear_lock(NFS_I(wdata->inode));
299 nfs_commitdata_release(wdata);
300}
301
243struct rpc_call_ops filelayout_read_call_ops = { 302struct rpc_call_ops filelayout_read_call_ops = {
244 .rpc_call_prepare = filelayout_read_prepare, 303 .rpc_call_prepare = filelayout_read_prepare,
245 .rpc_call_done = filelayout_read_call_done, 304 .rpc_call_done = filelayout_read_call_done,
@@ -252,6 +311,12 @@ struct rpc_call_ops filelayout_write_call_ops = {
252 .rpc_release = filelayout_write_release, 311 .rpc_release = filelayout_write_release,
253}; 312};
254 313
314struct rpc_call_ops filelayout_commit_call_ops = {
315 .rpc_call_prepare = filelayout_write_prepare,
316 .rpc_call_done = filelayout_write_call_done,
317 .rpc_release = filelayout_commit_release,
318};
319
255static enum pnfs_try_status 320static enum pnfs_try_status
256filelayout_read_pagelist(struct nfs_read_data *data) 321filelayout_read_pagelist(struct nfs_read_data *data)
257{ 322{
@@ -320,10 +385,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
320 data->inode->i_ino, sync, (size_t) data->args.count, offset, 385 data->inode->i_ino, sync, (size_t) data->args.count, offset,
321 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 386 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
322 387
323 /* We can't handle commit to ds yet */
324 if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
325 data->args.stable = NFS_FILE_SYNC;
326
327 data->write_done_cb = filelayout_write_done_cb; 388 data->write_done_cb = filelayout_write_done_cb;
328 data->ds_clp = ds->ds_clp; 389 data->ds_clp = ds->ds_clp;
329 fh = nfs4_fl_select_ds_fh(lseg, j); 390 fh = nfs4_fl_select_ds_fh(lseg, j);
@@ -441,12 +502,33 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
441 struct nfs4_layoutget_res *lgr, 502 struct nfs4_layoutget_res *lgr,
442 struct nfs4_deviceid *id) 503 struct nfs4_deviceid *id)
443{ 504{
444 uint32_t *p = (uint32_t *)lgr->layout.buf; 505 struct xdr_stream stream;
506 struct xdr_buf buf = {
507 .pages = lgr->layoutp->pages,
508 .page_len = lgr->layoutp->len,
509 .buflen = lgr->layoutp->len,
510 .len = lgr->layoutp->len,
511 };
512 struct page *scratch;
513 __be32 *p;
445 uint32_t nfl_util; 514 uint32_t nfl_util;
446 int i; 515 int i;
447 516
448 dprintk("%s: set_layout_map Begin\n", __func__); 517 dprintk("%s: set_layout_map Begin\n", __func__);
449 518
519 scratch = alloc_page(GFP_KERNEL);
520 if (!scratch)
521 return -ENOMEM;
522
523 xdr_init_decode(&stream, &buf, NULL);
524 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
525
526 /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
527 * num_fh (4) */
528 p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20);
529 if (unlikely(!p))
530 goto out_err;
531
450 memcpy(id, p, sizeof(*id)); 532 memcpy(id, p, sizeof(*id));
451 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); 533 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
452 print_deviceid(id); 534 print_deviceid(id);
@@ -468,32 +550,57 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
468 __func__, nfl_util, fl->num_fh, fl->first_stripe_index, 550 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
469 fl->pattern_offset); 551 fl->pattern_offset);
470 552
553 if (!fl->num_fh)
554 goto out_err;
555
471 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), 556 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
472 GFP_KERNEL); 557 GFP_KERNEL);
473 if (!fl->fh_array) 558 if (!fl->fh_array)
474 return -ENOMEM; 559 goto out_err;
475 560
476 for (i = 0; i < fl->num_fh; i++) { 561 for (i = 0; i < fl->num_fh; i++) {
477 /* Do we want to use a mempool here? */ 562 /* Do we want to use a mempool here? */
478 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); 563 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
479 if (!fl->fh_array[i]) { 564 if (!fl->fh_array[i])
480 filelayout_free_fh_array(fl); 565 goto out_err_free;
481 return -ENOMEM; 566
482 } 567 p = xdr_inline_decode(&stream, 4);
568 if (unlikely(!p))
569 goto out_err_free;
483 fl->fh_array[i]->size = be32_to_cpup(p++); 570 fl->fh_array[i]->size = be32_to_cpup(p++);
484 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { 571 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
485 printk(KERN_ERR "Too big fh %d received %d\n", 572 printk(KERN_ERR "Too big fh %d received %d\n",
486 i, fl->fh_array[i]->size); 573 i, fl->fh_array[i]->size);
487 filelayout_free_fh_array(fl); 574 goto out_err_free;
488 return -EIO;
489 } 575 }
576
577 p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
578 if (unlikely(!p))
579 goto out_err_free;
490 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); 580 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
491 p += XDR_QUADLEN(fl->fh_array[i]->size);
492 dprintk("DEBUG: %s: fh len %d\n", __func__, 581 dprintk("DEBUG: %s: fh len %d\n", __func__,
493 fl->fh_array[i]->size); 582 fl->fh_array[i]->size);
494 } 583 }
495 584
585 __free_page(scratch);
496 return 0; 586 return 0;
587
588out_err_free:
589 filelayout_free_fh_array(fl);
590out_err:
591 __free_page(scratch);
592 return -EIO;
593}
594
595static void
596filelayout_free_lseg(struct pnfs_layout_segment *lseg)
597{
598 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
599
600 dprintk("--> %s\n", __func__);
601 nfs4_fl_put_deviceid(fl->dsaddr);
602 kfree(fl->commit_buckets);
603 _filelayout_free_lseg(fl);
497} 604}
498 605
499static struct pnfs_layout_segment * 606static struct pnfs_layout_segment *
@@ -514,17 +621,28 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
514 _filelayout_free_lseg(fl); 621 _filelayout_free_lseg(fl);
515 return NULL; 622 return NULL;
516 } 623 }
517 return &fl->generic_hdr;
518}
519 624
520static void 625 /* This assumes there is only one IOMODE_RW lseg. What
521filelayout_free_lseg(struct pnfs_layout_segment *lseg) 626 * we really want to do is have a layout_hdr level
522{ 627 * dictionary of <multipath_list4, fh> keys, each
523 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 628 * associated with a struct list_head, populated by calls
524 629 * to filelayout_write_pagelist().
525 dprintk("--> %s\n", __func__); 630 * */
526 nfs4_fl_put_deviceid(fl->dsaddr); 631 if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
527 _filelayout_free_lseg(fl); 632 int i;
633 int size = (fl->stripe_type == STRIPE_SPARSE) ?
634 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
635
636 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
637 if (!fl->commit_buckets) {
638 filelayout_free_lseg(&fl->generic_hdr);
639 return NULL;
640 }
641 fl->number_of_buckets = size;
642 for (i = 0; i < size; i++)
643 INIT_LIST_HEAD(&fl->commit_buckets[i]);
644 }
645 return &fl->generic_hdr;
528} 646}
529 647
530/* 648/*
@@ -552,6 +670,191 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
552 return (p_stripe == r_stripe); 670 return (p_stripe == r_stripe);
553} 671}
554 672
673static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
674{
675 return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
676}
677
678static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
679{
680 if (fl->stripe_type == STRIPE_SPARSE)
681 return nfs4_fl_calc_ds_index(&fl->generic_hdr, j);
682 else
683 return j;
684}
685
686struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
687{
688 struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
689 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
690 u32 i, j;
691 struct list_head *list;
692
693 /* Note that we are calling nfs4_fl_calc_j_index on each page
694 * that ends up being committed to a data server. An attractive
695 * alternative is to add a field to nfs_write_data and nfs_page
696 * to store the value calculated in filelayout_write_pagelist
697 * and just use that here.
698 */
699 j = nfs4_fl_calc_j_index(lseg,
700 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
701 i = select_bucket_index(fl, j);
702 list = &fl->commit_buckets[i];
703 if (list_empty(list)) {
704 /* Non-empty buckets hold a reference on the lseg */
705 get_lseg(lseg);
706 }
707 return list;
708}
709
710static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
711{
712 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
713
714 if (flseg->stripe_type == STRIPE_SPARSE)
715 return i;
716 else
717 return nfs4_fl_calc_ds_index(lseg, i);
718}
719
720static struct nfs_fh *
721select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
722{
723 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
724
725 if (flseg->stripe_type == STRIPE_SPARSE) {
726 if (flseg->num_fh == 1)
727 i = 0;
728 else if (flseg->num_fh == 0)
729 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
730 return NULL;
731 }
732 return flseg->fh_array[i];
733}
734
735static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
736{
737 struct pnfs_layout_segment *lseg = data->lseg;
738 struct nfs4_pnfs_ds *ds;
739 u32 idx;
740 struct nfs_fh *fh;
741
742 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
743 ds = nfs4_fl_prepare_ds(lseg, idx);
744 if (!ds) {
745 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
746 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
747 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
748 prepare_to_resend_writes(data);
749 data->mds_ops->rpc_release(data);
750 return -EAGAIN;
751 }
752 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
753 data->write_done_cb = filelayout_commit_done_cb;
754 data->ds_clp = ds->ds_clp;
755 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
756 if (fh)
757 data->args.fh = fh;
758 return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
759 &filelayout_commit_call_ops, how);
760}
761
762/*
763 * This is only useful while we are using whole file layouts.
764 */
765static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
766{
767 struct pnfs_layout_segment *lseg, *rv = NULL;
768
769 spin_lock(&inode->i_lock);
770 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
771 if (lseg->pls_range.iomode == IOMODE_RW)
772 rv = get_lseg(lseg);
773 spin_unlock(&inode->i_lock);
774 return rv;
775}
776
777static int alloc_ds_commits(struct inode *inode, struct list_head *list)
778{
779 struct pnfs_layout_segment *lseg;
780 struct nfs4_filelayout_segment *fl;
781 struct nfs_write_data *data;
782 int i, j;
783
784 /* Won't need this when non-whole file layout segments are supported
785 * instead we will use a pnfs_layout_hdr structure */
786 lseg = find_only_write_lseg(inode);
787 if (!lseg)
788 return 0;
789 fl = FILELAYOUT_LSEG(lseg);
790 for (i = 0; i < fl->number_of_buckets; i++) {
791 if (list_empty(&fl->commit_buckets[i]))
792 continue;
793 data = nfs_commitdata_alloc();
794 if (!data)
795 goto out_bad;
796 data->ds_commit_index = i;
797 data->lseg = lseg;
798 list_add(&data->pages, list);
799 }
800 put_lseg(lseg);
801 return 0;
802
803out_bad:
804 for (j = i; j < fl->number_of_buckets; j++) {
805 if (list_empty(&fl->commit_buckets[i]))
806 continue;
807 nfs_retry_commit(&fl->commit_buckets[i], lseg);
808 put_lseg(lseg); /* associated with emptying bucket */
809 }
810 put_lseg(lseg);
811 /* Caller will clean up entries put on list */
812 return -ENOMEM;
813}
814
815/* This follows nfs_commit_list pretty closely */
816static int
817filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
818 int how)
819{
820 struct nfs_write_data *data, *tmp;
821 LIST_HEAD(list);
822
823 if (!list_empty(mds_pages)) {
824 data = nfs_commitdata_alloc();
825 if (!data)
826 goto out_bad;
827 data->lseg = NULL;
828 list_add(&data->pages, &list);
829 }
830
831 if (alloc_ds_commits(inode, &list))
832 goto out_bad;
833
834 list_for_each_entry_safe(data, tmp, &list, pages) {
835 list_del_init(&data->pages);
836 atomic_inc(&NFS_I(inode)->commits_outstanding);
837 if (!data->lseg) {
838 nfs_init_commit(data, mds_pages, NULL);
839 nfs_initiate_commit(data, NFS_CLIENT(inode),
840 data->mds_ops, how);
841 } else {
842 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg);
843 filelayout_initiate_commit(data, how);
844 }
845 }
846 return 0;
847 out_bad:
848 list_for_each_entry_safe(data, tmp, &list, pages) {
849 nfs_retry_commit(&data->pages, data->lseg);
850 list_del_init(&data->pages);
851 nfs_commit_free(data);
852 }
853 nfs_retry_commit(mds_pages, NULL);
854 nfs_commit_clear_lock(NFS_I(inode));
855 return -ENOMEM;
856}
857
555static struct pnfs_layoutdriver_type filelayout_type = { 858static struct pnfs_layoutdriver_type filelayout_type = {
556 .id = LAYOUT_NFSV4_1_FILES, 859 .id = LAYOUT_NFSV4_1_FILES,
557 .name = "LAYOUT_NFSV4_1_FILES", 860 .name = "LAYOUT_NFSV4_1_FILES",
@@ -559,6 +862,9 @@ static struct pnfs_layoutdriver_type filelayout_type = {
559 .alloc_lseg = filelayout_alloc_lseg, 862 .alloc_lseg = filelayout_alloc_lseg,
560 .free_lseg = filelayout_free_lseg, 863 .free_lseg = filelayout_free_lseg,
561 .pg_test = filelayout_pg_test, 864 .pg_test = filelayout_pg_test,
865 .mark_pnfs_commit = filelayout_mark_pnfs_commit,
866 .choose_commit_list = filelayout_choose_commit_list,
867 .commit_pagelist = filelayout_commit_pagelist,
562 .read_pagelist = filelayout_read_pagelist, 868 .read_pagelist = filelayout_read_pagelist,
563 .write_pagelist = filelayout_write_pagelist, 869 .write_pagelist = filelayout_write_pagelist,
564}; 870};
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index ee0c907742b5..085a354e0f08 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -79,6 +79,8 @@ struct nfs4_filelayout_segment {
79 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 79 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
80 unsigned int num_fh; 80 unsigned int num_fh;
81 struct nfs_fh **fh_array; 81 struct nfs_fh **fh_array;
82 struct list_head *commit_buckets; /* Sort commits to ds */
83 int number_of_buckets;
82}; 84};
83 85
84static inline struct nfs4_filelayout_segment * 86static inline struct nfs4_filelayout_segment *
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 68143c162e3b..de5350f2b249 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -261,7 +261,7 @@ out:
261 * Currently only support ipv4, and one multi-path address. 261 * Currently only support ipv4, and one multi-path address.
262 */ 262 */
263static struct nfs4_pnfs_ds * 263static struct nfs4_pnfs_ds *
264decode_and_add_ds(__be32 **pp, struct inode *inode) 264decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
265{ 265{
266 struct nfs4_pnfs_ds *ds = NULL; 266 struct nfs4_pnfs_ds *ds = NULL;
267 char *buf; 267 char *buf;
@@ -269,25 +269,34 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
269 u32 ip_addr, port; 269 u32 ip_addr, port;
270 int nlen, rlen, i; 270 int nlen, rlen, i;
271 int tmp[2]; 271 int tmp[2];
272 __be32 *r_netid, *r_addr, *p = *pp; 272 __be32 *p;
273 273
274 /* r_netid */ 274 /* r_netid */
275 p = xdr_inline_decode(streamp, 4);
276 if (unlikely(!p))
277 goto out_err;
275 nlen = be32_to_cpup(p++); 278 nlen = be32_to_cpup(p++);
276 r_netid = p;
277 p += XDR_QUADLEN(nlen);
278 279
279 /* r_addr */ 280 p = xdr_inline_decode(streamp, nlen);
280 rlen = be32_to_cpup(p++); 281 if (unlikely(!p))
281 r_addr = p; 282 goto out_err;
282 p += XDR_QUADLEN(rlen);
283 *pp = p;
284 283
285 /* Check that netid is "tcp" */ 284 /* Check that netid is "tcp" */
286 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { 285 if (nlen != 3 || memcmp((char *)p, "tcp", 3)) {
287 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); 286 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
288 goto out_err; 287 goto out_err;
289 } 288 }
290 289
290 /* r_addr */
291 p = xdr_inline_decode(streamp, 4);
292 if (unlikely(!p))
293 goto out_err;
294 rlen = be32_to_cpup(p);
295
296 p = xdr_inline_decode(streamp, rlen);
297 if (unlikely(!p))
298 goto out_err;
299
291 /* ipv6 length plus port is legal */ 300 /* ipv6 length plus port is legal */
292 if (rlen > INET6_ADDRSTRLEN + 8) { 301 if (rlen > INET6_ADDRSTRLEN + 8) {
293 dprintk("%s: Invalid address, length %d\n", __func__, 302 dprintk("%s: Invalid address, length %d\n", __func__,
@@ -300,7 +309,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
300 goto out_err; 309 goto out_err;
301 } 310 }
302 buf[rlen] = '\0'; 311 buf[rlen] = '\0';
303 memcpy(buf, r_addr, rlen); 312 memcpy(buf, p, rlen);
304 313
305 /* replace the port dots with dashes for the in4_pton() delimiter*/ 314 /* replace the port dots with dashes for the in4_pton() delimiter*/
306 for (i = 0; i < 2; i++) { 315 for (i = 0; i < 2; i++) {
@@ -336,90 +345,154 @@ out_err:
336static struct nfs4_file_layout_dsaddr* 345static struct nfs4_file_layout_dsaddr*
337decode_device(struct inode *ino, struct pnfs_device *pdev) 346decode_device(struct inode *ino, struct pnfs_device *pdev)
338{ 347{
339 int i, dummy; 348 int i;
340 u32 cnt, num; 349 u32 cnt, num;
341 u8 *indexp; 350 u8 *indexp;
342 __be32 *p = (__be32 *)pdev->area, *indicesp; 351 __be32 *p;
343 struct nfs4_file_layout_dsaddr *dsaddr; 352 u8 *stripe_indices;
353 u8 max_stripe_index;
354 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
355 struct xdr_stream stream;
356 struct xdr_buf buf = {
357 .pages = pdev->pages,
358 .page_len = pdev->pglen,
359 .buflen = pdev->pglen,
360 .len = pdev->pglen,
361 };
362 struct page *scratch;
363
364 /* set up xdr stream */
365 scratch = alloc_page(GFP_KERNEL);
366 if (!scratch)
367 goto out_err;
368
369 xdr_init_decode(&stream, &buf, NULL);
370 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
344 371
345 /* Get the stripe count (number of stripe index) */ 372 /* Get the stripe count (number of stripe index) */
346 cnt = be32_to_cpup(p++); 373 p = xdr_inline_decode(&stream, 4);
374 if (unlikely(!p))
375 goto out_err_free_scratch;
376
377 cnt = be32_to_cpup(p);
347 dprintk("%s stripe count %d\n", __func__, cnt); 378 dprintk("%s stripe count %d\n", __func__, cnt);
348 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { 379 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
349 printk(KERN_WARNING "%s: stripe count %d greater than " 380 printk(KERN_WARNING "%s: stripe count %d greater than "
350 "supported maximum %d\n", __func__, 381 "supported maximum %d\n", __func__,
351 cnt, NFS4_PNFS_MAX_STRIPE_CNT); 382 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
352 goto out_err; 383 goto out_err_free_scratch;
384 }
385
386 /* read stripe indices */
387 stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
388 if (!stripe_indices)
389 goto out_err_free_scratch;
390
391 p = xdr_inline_decode(&stream, cnt << 2);
392 if (unlikely(!p))
393 goto out_err_free_stripe_indices;
394
395 indexp = &stripe_indices[0];
396 max_stripe_index = 0;
397 for (i = 0; i < cnt; i++) {
398 *indexp = be32_to_cpup(p++);
399 max_stripe_index = max(max_stripe_index, *indexp);
400 indexp++;
353 } 401 }
354 402
355 /* Check the multipath list count */ 403 /* Check the multipath list count */
356 indicesp = p; 404 p = xdr_inline_decode(&stream, 4);
357 p += XDR_QUADLEN(cnt << 2); 405 if (unlikely(!p))
358 num = be32_to_cpup(p++); 406 goto out_err_free_stripe_indices;
407
408 num = be32_to_cpup(p);
359 dprintk("%s ds_num %u\n", __func__, num); 409 dprintk("%s ds_num %u\n", __func__, num);
360 if (num > NFS4_PNFS_MAX_MULTI_CNT) { 410 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
361 printk(KERN_WARNING "%s: multipath count %d greater than " 411 printk(KERN_WARNING "%s: multipath count %d greater than "
362 "supported maximum %d\n", __func__, 412 "supported maximum %d\n", __func__,
363 num, NFS4_PNFS_MAX_MULTI_CNT); 413 num, NFS4_PNFS_MAX_MULTI_CNT);
364 goto out_err; 414 goto out_err_free_stripe_indices;
365 } 415 }
416
417 /* validate stripe indices are all < num */
418 if (max_stripe_index >= num) {
419 printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n",
420 __func__, max_stripe_index, num);
421 goto out_err_free_stripe_indices;
422 }
423
366 dsaddr = kzalloc(sizeof(*dsaddr) + 424 dsaddr = kzalloc(sizeof(*dsaddr) +
367 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), 425 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
368 GFP_KERNEL); 426 GFP_KERNEL);
369 if (!dsaddr) 427 if (!dsaddr)
370 goto out_err; 428 goto out_err_free_stripe_indices;
371
372 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
373 if (!dsaddr->stripe_indices)
374 goto out_err_free;
375 429
376 dsaddr->stripe_count = cnt; 430 dsaddr->stripe_count = cnt;
431 dsaddr->stripe_indices = stripe_indices;
432 stripe_indices = NULL;
377 dsaddr->ds_num = num; 433 dsaddr->ds_num = num;
378 434
379 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); 435 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
380 436
381 /* Go back an read stripe indices */
382 p = indicesp;
383 indexp = &dsaddr->stripe_indices[0];
384 for (i = 0; i < dsaddr->stripe_count; i++) {
385 *indexp = be32_to_cpup(p++);
386 if (*indexp >= num)
387 goto out_err_free;
388 indexp++;
389 }
390 /* Skip already read multipath list count */
391 p++;
392
393 for (i = 0; i < dsaddr->ds_num; i++) { 437 for (i = 0; i < dsaddr->ds_num; i++) {
394 int j; 438 int j;
439 u32 mp_count;
440
441 p = xdr_inline_decode(&stream, 4);
442 if (unlikely(!p))
443 goto out_err_free_deviceid;
395 444
396 dummy = be32_to_cpup(p++); /* multipath count */ 445 mp_count = be32_to_cpup(p); /* multipath count */
397 if (dummy > 1) { 446 if (mp_count > 1) {
398 printk(KERN_WARNING 447 printk(KERN_WARNING
399 "%s: Multipath count %d not supported, " 448 "%s: Multipath count %d not supported, "
400 "skipping all greater than 1\n", __func__, 449 "skipping all greater than 1\n", __func__,
401 dummy); 450 mp_count);
402 } 451 }
403 for (j = 0; j < dummy; j++) { 452 for (j = 0; j < mp_count; j++) {
404 if (j == 0) { 453 if (j == 0) {
405 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); 454 dsaddr->ds_list[i] = decode_and_add_ds(&stream,
455 ino);
406 if (dsaddr->ds_list[i] == NULL) 456 if (dsaddr->ds_list[i] == NULL)
407 goto out_err_free; 457 goto out_err_free_deviceid;
408 } else { 458 } else {
409 u32 len; 459 u32 len;
410 /* skip extra multipath */ 460 /* skip extra multipath */
411 len = be32_to_cpup(p++); 461
412 p += XDR_QUADLEN(len); 462 /* read len, skip */
413 len = be32_to_cpup(p++); 463 p = xdr_inline_decode(&stream, 4);
414 p += XDR_QUADLEN(len); 464 if (unlikely(!p))
415 continue; 465 goto out_err_free_deviceid;
466 len = be32_to_cpup(p);
467
468 p = xdr_inline_decode(&stream, len);
469 if (unlikely(!p))
470 goto out_err_free_deviceid;
471
472 /* read len, skip */
473 p = xdr_inline_decode(&stream, 4);
474 if (unlikely(!p))
475 goto out_err_free_deviceid;
476 len = be32_to_cpup(p);
477
478 p = xdr_inline_decode(&stream, len);
479 if (unlikely(!p))
480 goto out_err_free_deviceid;
416 } 481 }
417 } 482 }
418 } 483 }
484
485 __free_page(scratch);
419 return dsaddr; 486 return dsaddr;
420 487
421out_err_free: 488out_err_free_deviceid:
422 nfs4_fl_free_deviceid(dsaddr); 489 nfs4_fl_free_deviceid(dsaddr);
490 /* stripe_indicies was part of dsaddr */
491 goto out_err_free_scratch;
492out_err_free_stripe_indices:
493 kfree(stripe_indices);
494out_err_free_scratch:
495 __free_page(scratch);
423out_err: 496out_err:
424 dprintk("%s ERROR: returning NULL\n", __func__); 497 dprintk("%s ERROR: returning NULL\n", __func__);
425 return NULL; 498 return NULL;
@@ -498,11 +571,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
498 goto out_free; 571 goto out_free;
499 } 572 }
500 573
501 /* set pdev->area */
502 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
503 if (!pdev->area)
504 goto out_free;
505
506 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); 574 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
507 pdev->layout_type = LAYOUT_NFSV4_1_FILES; 575 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
508 pdev->pages = pages; 576 pdev->pages = pages;
@@ -521,8 +589,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
521 */ 589 */
522 dsaddr = decode_and_add_device(inode, pdev); 590 dsaddr = decode_and_add_device(inode, pdev);
523out_free: 591out_free:
524 if (pdev->area != NULL)
525 vunmap(pdev->area);
526 for (i = 0; i < max_pages; i++) 592 for (i = 0; i < max_pages; i++)
527 __free_page(pages[i]); 593 __free_page(pages[i]);
528 kfree(pages); 594 kfree(pages);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f9150f03d640..dfd1e6d7e6c3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3253,12 +3253,9 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3253 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 3253 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
3254} 3254}
3255 3255
3256static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) 3256static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3257{ 3257{
3258 struct inode *inode = data->inode; 3258 struct inode *inode = data->inode;
3259
3260 if (!nfs4_sequence_done(task, &data->res.seq_res))
3261 return -EAGAIN;
3262 3259
3263 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { 3260 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
3264 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3261 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
@@ -3268,11 +3265,24 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3268 return 0; 3265 return 0;
3269} 3266}
3270 3267
3268static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3269{
3270 if (!nfs4_sequence_done(task, &data->res.seq_res))
3271 return -EAGAIN;
3272 return data->write_done_cb(task, data);
3273}
3274
3271static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 3275static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
3272{ 3276{
3273 struct nfs_server *server = NFS_SERVER(data->inode); 3277 struct nfs_server *server = NFS_SERVER(data->inode);
3274 3278
3275 data->args.bitmask = server->cache_consistency_bitmask; 3279 if (data->lseg) {
3280 data->args.bitmask = NULL;
3281 data->res.fattr = NULL;
3282 } else
3283 data->args.bitmask = server->cache_consistency_bitmask;
3284 if (!data->write_done_cb)
3285 data->write_done_cb = nfs4_commit_done_cb;
3276 data->res.server = server; 3286 data->res.server = server;
3277 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3287 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3278} 3288}
@@ -5608,8 +5618,6 @@ static void nfs4_layoutget_release(void *calldata)
5608 struct nfs4_layoutget *lgp = calldata; 5618 struct nfs4_layoutget *lgp = calldata;
5609 5619
5610 dprintk("--> %s\n", __func__); 5620 dprintk("--> %s\n", __func__);
5611 if (lgp->res.layout.buf != NULL)
5612 free_page((unsigned long) lgp->res.layout.buf);
5613 put_nfs_open_context(lgp->args.ctx); 5621 put_nfs_open_context(lgp->args.ctx);
5614 kfree(calldata); 5622 kfree(calldata);
5615 dprintk("<-- %s\n", __func__); 5623 dprintk("<-- %s\n", __func__);
@@ -5641,12 +5649,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5641 5649
5642 dprintk("--> %s\n", __func__); 5650 dprintk("--> %s\n", __func__);
5643 5651
5644 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); 5652 lgp->res.layoutp = &lgp->args.layout;
5645 if (lgp->res.layout.buf == NULL) {
5646 nfs4_layoutget_release(lgp);
5647 return -ENOMEM;
5648 }
5649
5650 lgp->res.seq_res.sr_slot = NULL; 5653 lgp->res.seq_res.sr_slot = NULL;
5651 task = rpc_run_task(&task_setup_data); 5654 task = rpc_run_task(&task_setup_data);
5652 if (IS_ERR(task)) 5655 if (IS_ERR(task))
@@ -5698,6 +5701,100 @@ int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5698} 5701}
5699EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); 5702EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5700 5703
5704static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
5705{
5706 struct nfs4_layoutcommit_data *data = calldata;
5707 struct nfs_server *server = NFS_SERVER(data->args.inode);
5708
5709 if (nfs4_setup_sequence(server, &data->args.seq_args,
5710 &data->res.seq_res, 1, task))
5711 return;
5712 rpc_call_start(task);
5713}
5714
5715static void
5716nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
5717{
5718 struct nfs4_layoutcommit_data *data = calldata;
5719 struct nfs_server *server = NFS_SERVER(data->args.inode);
5720
5721 if (!nfs4_sequence_done(task, &data->res.seq_res))
5722 return;
5723
5724 switch (task->tk_status) { /* Just ignore these failures */
5725 case NFS4ERR_DELEG_REVOKED: /* layout was recalled */
5726 case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
5727 case NFS4ERR_BADLAYOUT: /* no layout */
5728 case NFS4ERR_GRACE: /* loca_recalim always false */
5729 task->tk_status = 0;
5730 }
5731
5732 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5733 nfs_restart_rpc(task, server->nfs_client);
5734 return;
5735 }
5736
5737 if (task->tk_status == 0)
5738 nfs_post_op_update_inode_force_wcc(data->args.inode,
5739 data->res.fattr);
5740}
5741
5742static void nfs4_layoutcommit_release(void *calldata)
5743{
5744 struct nfs4_layoutcommit_data *data = calldata;
5745
5746 /* Matched by references in pnfs_set_layoutcommit */
5747 put_lseg(data->lseg);
5748 put_rpccred(data->cred);
5749 kfree(data);
5750}
5751
5752static const struct rpc_call_ops nfs4_layoutcommit_ops = {
5753 .rpc_call_prepare = nfs4_layoutcommit_prepare,
5754 .rpc_call_done = nfs4_layoutcommit_done,
5755 .rpc_release = nfs4_layoutcommit_release,
5756};
5757
5758int
5759nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
5760{
5761 struct rpc_message msg = {
5762 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT],
5763 .rpc_argp = &data->args,
5764 .rpc_resp = &data->res,
5765 .rpc_cred = data->cred,
5766 };
5767 struct rpc_task_setup task_setup_data = {
5768 .task = &data->task,
5769 .rpc_client = NFS_CLIENT(data->args.inode),
5770 .rpc_message = &msg,
5771 .callback_ops = &nfs4_layoutcommit_ops,
5772 .callback_data = data,
5773 .flags = RPC_TASK_ASYNC,
5774 };
5775 struct rpc_task *task;
5776 int status = 0;
5777
5778 dprintk("NFS: %4d initiating layoutcommit call. sync %d "
5779 "lbw: %llu inode %lu\n",
5780 data->task.tk_pid, sync,
5781 data->args.lastbytewritten,
5782 data->args.inode->i_ino);
5783
5784 task = rpc_run_task(&task_setup_data);
5785 if (IS_ERR(task))
5786 return PTR_ERR(task);
5787 if (sync == false)
5788 goto out;
5789 status = nfs4_wait_for_completion_rpc_task(task);
5790 if (status != 0)
5791 goto out;
5792 status = task->tk_status;
5793out:
5794 dprintk("%s: status %d\n", __func__, status);
5795 rpc_put_task(task);
5796 return status;
5797}
5701#endif /* CONFIG_NFS_V4_1 */ 5798#endif /* CONFIG_NFS_V4_1 */
5702 5799
5703struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5800struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 21c3004b72d5..dddfb5795d7b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -327,6 +327,18 @@ static int nfs4_stat_to_errno(int);
327#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ 327#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
328 decode_stateid_maxsz + \ 328 decode_stateid_maxsz + \
329 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) 329 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
330#define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
331 2 /* offset */ + \
332 2 /* length */ + \
333 1 /* reclaim */ + \
334 encode_stateid_maxsz + \
335 1 /* new offset (true) */ + \
336 2 /* last byte written */ + \
337 1 /* nt_timechanged (false) */ + \
338 1 /* layoutupdate4 layout type */ + \
339 1 /* NULL filelayout layoutupdate4 payload */)
340#define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
341
330#else /* CONFIG_NFS_V4_1 */ 342#else /* CONFIG_NFS_V4_1 */
331#define encode_sequence_maxsz 0 343#define encode_sequence_maxsz 0
332#define decode_sequence_maxsz 0 344#define decode_sequence_maxsz 0
@@ -738,6 +750,17 @@ static int nfs4_stat_to_errno(int);
738 decode_sequence_maxsz + \ 750 decode_sequence_maxsz + \
739 decode_putfh_maxsz + \ 751 decode_putfh_maxsz + \
740 decode_layoutget_maxsz) 752 decode_layoutget_maxsz)
753#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
754 encode_sequence_maxsz +\
755 encode_putfh_maxsz + \
756 encode_layoutcommit_maxsz + \
757 encode_getattr_maxsz)
758#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
759 decode_sequence_maxsz + \
760 decode_putfh_maxsz + \
761 decode_layoutcommit_maxsz + \
762 decode_getattr_maxsz)
763
741 764
742const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 765const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
743 compound_encode_hdr_maxsz + 766 compound_encode_hdr_maxsz +
@@ -1839,6 +1862,34 @@ encode_layoutget(struct xdr_stream *xdr,
1839 hdr->nops++; 1862 hdr->nops++;
1840 hdr->replen += decode_layoutget_maxsz; 1863 hdr->replen += decode_layoutget_maxsz;
1841} 1864}
1865
1866static int
1867encode_layoutcommit(struct xdr_stream *xdr,
1868 const struct nfs4_layoutcommit_args *args,
1869 struct compound_hdr *hdr)
1870{
1871 __be32 *p;
1872
1873 dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
1874 NFS_SERVER(args->inode)->pnfs_curr_ld->id);
1875
1876 p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE);
1877 *p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
1878 /* Only whole file layouts */
1879 p = xdr_encode_hyper(p, 0); /* offset */
1880 p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */
1881 *p++ = cpu_to_be32(0); /* reclaim */
1882 p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
1883 *p++ = cpu_to_be32(1); /* newoffset = TRUE */
1884 p = xdr_encode_hyper(p, args->lastbytewritten);
1885 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */
1886 *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
1887 *p++ = cpu_to_be32(0); /* no file layout payload */
1888
1889 hdr->nops++;
1890 hdr->replen += decode_layoutcommit_maxsz;
1891 return 0;
1892}
1842#endif /* CONFIG_NFS_V4_1 */ 1893#endif /* CONFIG_NFS_V4_1 */
1843 1894
1844/* 1895/*
@@ -2317,7 +2368,8 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2317 encode_sequence(xdr, &args->seq_args, &hdr); 2368 encode_sequence(xdr, &args->seq_args, &hdr);
2318 encode_putfh(xdr, args->fh, &hdr); 2369 encode_putfh(xdr, args->fh, &hdr);
2319 encode_commit(xdr, args, &hdr); 2370 encode_commit(xdr, args, &hdr);
2320 encode_getfattr(xdr, args->bitmask, &hdr); 2371 if (args->bitmask)
2372 encode_getfattr(xdr, args->bitmask, &hdr);
2321 encode_nops(&hdr); 2373 encode_nops(&hdr);
2322} 2374}
2323 2375
@@ -2645,7 +2697,31 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
2645 encode_sequence(xdr, &args->seq_args, &hdr); 2697 encode_sequence(xdr, &args->seq_args, &hdr);
2646 encode_putfh(xdr, NFS_FH(args->inode), &hdr); 2698 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2647 encode_layoutget(xdr, args, &hdr); 2699 encode_layoutget(xdr, args, &hdr);
2700
2701 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
2702 args->layout.pages, 0, args->layout.pglen);
2703
2704 encode_nops(&hdr);
2705}
2706
2707/*
2708 * Encode LAYOUTCOMMIT request
2709 */
2710static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
2711 struct xdr_stream *xdr,
2712 struct nfs4_layoutcommit_args *args)
2713{
2714 struct compound_hdr hdr = {
2715 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2716 };
2717
2718 encode_compound_hdr(xdr, req, &hdr);
2719 encode_sequence(xdr, &args->seq_args, &hdr);
2720 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2721 encode_layoutcommit(xdr, args, &hdr);
2722 encode_getfattr(xdr, args->bitmask, &hdr);
2648 encode_nops(&hdr); 2723 encode_nops(&hdr);
2724 return 0;
2649} 2725}
2650#endif /* CONFIG_NFS_V4_1 */ 2726#endif /* CONFIG_NFS_V4_1 */
2651 2727
@@ -5063,6 +5139,9 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5063 __be32 *p; 5139 __be32 *p;
5064 int status; 5140 int status;
5065 u32 layout_count; 5141 u32 layout_count;
5142 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
5143 struct kvec *iov = rcvbuf->head;
5144 u32 hdrlen, recvd;
5066 5145
5067 status = decode_op_hdr(xdr, OP_LAYOUTGET); 5146 status = decode_op_hdr(xdr, OP_LAYOUTGET);
5068 if (status) 5147 if (status)
@@ -5079,17 +5158,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5079 return -EINVAL; 5158 return -EINVAL;
5080 } 5159 }
5081 5160
5082 p = xdr_inline_decode(xdr, 24); 5161 p = xdr_inline_decode(xdr, 28);
5083 if (unlikely(!p)) 5162 if (unlikely(!p))
5084 goto out_overflow; 5163 goto out_overflow;
5085 p = xdr_decode_hyper(p, &res->range.offset); 5164 p = xdr_decode_hyper(p, &res->range.offset);
5086 p = xdr_decode_hyper(p, &res->range.length); 5165 p = xdr_decode_hyper(p, &res->range.length);
5087 res->range.iomode = be32_to_cpup(p++); 5166 res->range.iomode = be32_to_cpup(p++);
5088 res->type = be32_to_cpup(p++); 5167 res->type = be32_to_cpup(p++);
5089 5168 res->layoutp->len = be32_to_cpup(p);
5090 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
5091 if (unlikely(status))
5092 return status;
5093 5169
5094 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", 5170 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
5095 __func__, 5171 __func__,
@@ -5097,12 +5173,18 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5097 (unsigned long)res->range.length, 5173 (unsigned long)res->range.length,
5098 res->range.iomode, 5174 res->range.iomode,
5099 res->type, 5175 res->type,
5100 res->layout.len); 5176 res->layoutp->len);
5177
5178 hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
5179 recvd = req->rq_rcv_buf.len - hdrlen;
5180 if (res->layoutp->len > recvd) {
5181 dprintk("NFS: server cheating in layoutget reply: "
5182 "layout len %u > recvd %u\n",
5183 res->layoutp->len, recvd);
5184 return -EINVAL;
5185 }
5101 5186
5102 /* nfs4_proc_layoutget allocated a single page */ 5187 xdr_read_pages(xdr, res->layoutp->len);
5103 if (res->layout.len > PAGE_SIZE)
5104 return -ENOMEM;
5105 memcpy(res->layout.buf, p, res->layout.len);
5106 5188
5107 if (layout_count > 1) { 5189 if (layout_count > 1) {
5108 /* We only handle a length one array at the moment. Any 5190 /* We only handle a length one array at the moment. Any
@@ -5119,6 +5201,35 @@ out_overflow:
5119 print_overflow_msg(__func__, xdr); 5201 print_overflow_msg(__func__, xdr);
5120 return -EIO; 5202 return -EIO;
5121} 5203}
5204
5205static int decode_layoutcommit(struct xdr_stream *xdr,
5206 struct rpc_rqst *req,
5207 struct nfs4_layoutcommit_res *res)
5208{
5209 __be32 *p;
5210 __u32 sizechanged;
5211 int status;
5212
5213 status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
5214 if (status)
5215 return status;
5216
5217 p = xdr_inline_decode(xdr, 4);
5218 if (unlikely(!p))
5219 goto out_overflow;
5220 sizechanged = be32_to_cpup(p);
5221
5222 if (sizechanged) {
5223 /* throw away new size */
5224 p = xdr_inline_decode(xdr, 8);
5225 if (unlikely(!p))
5226 goto out_overflow;
5227 }
5228 return 0;
5229out_overflow:
5230 print_overflow_msg(__func__, xdr);
5231 return -EIO;
5232}
5122#endif /* CONFIG_NFS_V4_1 */ 5233#endif /* CONFIG_NFS_V4_1 */
5123 5234
5124/* 5235/*
@@ -5836,8 +5947,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5836 status = decode_commit(xdr, res); 5947 status = decode_commit(xdr, res);
5837 if (status) 5948 if (status)
5838 goto out; 5949 goto out;
5839 decode_getfattr(xdr, res->fattr, res->server, 5950 if (res->fattr)
5840 !RPC_IS_ASYNC(rqstp->rq_task)); 5951 decode_getfattr(xdr, res->fattr, res->server,
5952 !RPC_IS_ASYNC(rqstp->rq_task));
5841out: 5953out:
5842 return status; 5954 return status;
5843} 5955}
@@ -6205,6 +6317,34 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
6205out: 6317out:
6206 return status; 6318 return status;
6207} 6319}
6320
6321/*
6322 * Decode LAYOUTCOMMIT response
6323 */
6324static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
6325 struct xdr_stream *xdr,
6326 struct nfs4_layoutcommit_res *res)
6327{
6328 struct compound_hdr hdr;
6329 int status;
6330
6331 status = decode_compound_hdr(xdr, &hdr);
6332 if (status)
6333 goto out;
6334 status = decode_sequence(xdr, &res->seq_res, rqstp);
6335 if (status)
6336 goto out;
6337 status = decode_putfh(xdr);
6338 if (status)
6339 goto out;
6340 status = decode_layoutcommit(xdr, rqstp, res);
6341 if (status)
6342 goto out;
6343 decode_getfattr(xdr, res->fattr, res->server,
6344 !RPC_IS_ASYNC(rqstp->rq_task));
6345out:
6346 return status;
6347}
6208#endif /* CONFIG_NFS_V4_1 */ 6348#endif /* CONFIG_NFS_V4_1 */
6209 6349
6210/** 6350/**
@@ -6403,6 +6543,7 @@ struct rpc_procinfo nfs4_procedures[] = {
6403 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6543 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6404 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), 6544 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6405 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6545 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
6546 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
6406#endif /* CONFIG_NFS_V4_1 */ 6547#endif /* CONFIG_NFS_V4_1 */
6407}; 6548};
6408 6549
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 23e794410669..87a593c2b055 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -223,6 +223,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
223 desc->pg_count = 0; 223 desc->pg_count = 0;
224 desc->pg_bsize = bsize; 224 desc->pg_bsize = bsize;
225 desc->pg_base = 0; 225 desc->pg_base = 0;
226 desc->pg_moreio = 0;
226 desc->pg_inode = inode; 227 desc->pg_inode = inode;
227 desc->pg_doio = doio; 228 desc->pg_doio = doio;
228 desc->pg_ioflags = io_flags; 229 desc->pg_ioflags = io_flags;
@@ -335,9 +336,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
335 struct nfs_page *req) 336 struct nfs_page *req)
336{ 337{
337 while (!nfs_pageio_do_add_request(desc, req)) { 338 while (!nfs_pageio_do_add_request(desc, req)) {
339 desc->pg_moreio = 1;
338 nfs_pageio_doio(desc); 340 nfs_pageio_doio(desc);
339 if (desc->pg_error < 0) 341 if (desc->pg_error < 0)
340 return 0; 342 return 0;
343 desc->pg_moreio = 0;
341 } 344 }
342 return 1; 345 return 1;
343} 346}
@@ -395,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi,
395 pgoff_t idx_end; 398 pgoff_t idx_end;
396 int found, i; 399 int found, i;
397 int res; 400 int res;
401 struct list_head *list;
398 402
399 res = 0; 403 res = 0;
400 if (npages == 0) 404 if (npages == 0)
@@ -415,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi,
415 idx_start = req->wb_index + 1; 419 idx_start = req->wb_index + 1;
416 if (nfs_set_page_tag_locked(req)) { 420 if (nfs_set_page_tag_locked(req)) {
417 kref_get(&req->wb_kref); 421 kref_get(&req->wb_kref);
418 nfs_list_remove_request(req);
419 radix_tree_tag_clear(&nfsi->nfs_page_tree, 422 radix_tree_tag_clear(&nfsi->nfs_page_tree,
420 req->wb_index, tag); 423 req->wb_index, tag);
421 nfs_list_add_request(req, dst); 424 list = pnfs_choose_commit_list(req, dst);
425 nfs_list_add_request(req, list);
422 res++; 426 res++;
423 if (res == INT_MAX) 427 if (res == INT_MAX)
424 goto out; 428 goto out;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f38813a0a295..d9ab97269ce6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -259,6 +259,7 @@ put_lseg(struct pnfs_layout_segment *lseg)
259 pnfs_free_lseg_list(&free_me); 259 pnfs_free_lseg_list(&free_me);
260 } 260 }
261} 261}
262EXPORT_SYMBOL_GPL(put_lseg);
262 263
263static bool 264static bool
264should_free_lseg(u32 lseg_iomode, u32 recall_iomode) 265should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
@@ -471,6 +472,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
471 struct nfs_server *server = NFS_SERVER(ino); 472 struct nfs_server *server = NFS_SERVER(ino);
472 struct nfs4_layoutget *lgp; 473 struct nfs4_layoutget *lgp;
473 struct pnfs_layout_segment *lseg = NULL; 474 struct pnfs_layout_segment *lseg = NULL;
475 struct page **pages = NULL;
476 int i;
477 u32 max_resp_sz, max_pages;
474 478
475 dprintk("--> %s\n", __func__); 479 dprintk("--> %s\n", __func__);
476 480
@@ -478,6 +482,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
478 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); 482 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
479 if (lgp == NULL) 483 if (lgp == NULL)
480 return NULL; 484 return NULL;
485
486 /* allocate pages for xdr post processing */
487 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
488 max_pages = max_resp_sz >> PAGE_SHIFT;
489
490 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
491 if (!pages)
492 goto out_err_free;
493
494 for (i = 0; i < max_pages; i++) {
495 pages[i] = alloc_page(GFP_KERNEL);
496 if (!pages[i])
497 goto out_err_free;
498 }
499
481 lgp->args.minlength = NFS4_MAX_UINT64; 500 lgp->args.minlength = NFS4_MAX_UINT64;
482 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 501 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
483 lgp->args.range.iomode = iomode; 502 lgp->args.range.iomode = iomode;
@@ -486,6 +505,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
486 lgp->args.type = server->pnfs_curr_ld->id; 505 lgp->args.type = server->pnfs_curr_ld->id;
487 lgp->args.inode = ino; 506 lgp->args.inode = ino;
488 lgp->args.ctx = get_nfs_open_context(ctx); 507 lgp->args.ctx = get_nfs_open_context(ctx);
508 lgp->args.layout.pages = pages;
509 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
489 lgp->lsegpp = &lseg; 510 lgp->lsegpp = &lseg;
490 511
491 /* Synchronously retrieve layout information from server and 512 /* Synchronously retrieve layout information from server and
@@ -496,7 +517,26 @@ send_layoutget(struct pnfs_layout_hdr *lo,
496 /* remember that LAYOUTGET failed and suspend trying */ 517 /* remember that LAYOUTGET failed and suspend trying */
497 set_bit(lo_fail_bit(iomode), &lo->plh_flags); 518 set_bit(lo_fail_bit(iomode), &lo->plh_flags);
498 } 519 }
520
521 /* free xdr pages */
522 for (i = 0; i < max_pages; i++)
523 __free_page(pages[i]);
524 kfree(pages);
525
499 return lseg; 526 return lseg;
527
528out_err_free:
529 /* free any allocated xdr pages, lgp as it's not used */
530 if (pages) {
531 for (i = 0; i < max_pages; i++) {
532 if (!pages[i])
533 break;
534 __free_page(pages[i]);
535 }
536 kfree(pages);
537 }
538 kfree(lgp);
539 return NULL;
500} 540}
501 541
502bool pnfs_roc(struct inode *ino) 542bool pnfs_roc(struct inode *ino)
@@ -945,3 +985,105 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
945 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 985 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
946 return trypnfs; 986 return trypnfs;
947} 987}
988
989/*
990 * Currently there is only one (whole file) write lseg.
991 */
992static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
993{
994 struct pnfs_layout_segment *lseg, *rv = NULL;
995
996 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
997 if (lseg->pls_range.iomode == IOMODE_RW)
998 rv = lseg;
999 return rv;
1000}
1001
1002void
1003pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1004{
1005 struct nfs_inode *nfsi = NFS_I(wdata->inode);
1006 loff_t end_pos = wdata->args.offset + wdata->res.count;
1007
1008 spin_lock(&nfsi->vfs_inode.i_lock);
1009 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1010 /* references matched in nfs4_layoutcommit_release */
1011 get_lseg(wdata->lseg);
1012 wdata->lseg->pls_lc_cred =
1013 get_rpccred(wdata->args.context->state->owner->so_cred);
1014 mark_inode_dirty_sync(wdata->inode);
1015 dprintk("%s: Set layoutcommit for inode %lu ",
1016 __func__, wdata->inode->i_ino);
1017 }
1018 if (end_pos > wdata->lseg->pls_end_pos)
1019 wdata->lseg->pls_end_pos = end_pos;
1020 spin_unlock(&nfsi->vfs_inode.i_lock);
1021}
1022EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1023
1024/*
1025 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
1026 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
1027 * data to disk to allow the server to recover the data if it crashes.
1028 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
1029 * is off, and a COMMIT is sent to a data server, or
1030 * if WRITEs to a data server return NFS_DATA_SYNC.
1031 */
1032int
1033pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1034{
1035 struct nfs4_layoutcommit_data *data;
1036 struct nfs_inode *nfsi = NFS_I(inode);
1037 struct pnfs_layout_segment *lseg;
1038 struct rpc_cred *cred;
1039 loff_t end_pos;
1040 int status = 0;
1041
1042 dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
1043
1044 if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
1045 return 0;
1046
1047 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
1048 data = kzalloc(sizeof(*data), GFP_NOFS);
1049 if (!data) {
1050 mark_inode_dirty_sync(inode);
1051 status = -ENOMEM;
1052 goto out;
1053 }
1054
1055 spin_lock(&inode->i_lock);
1056 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1057 spin_unlock(&inode->i_lock);
1058 kfree(data);
1059 goto out;
1060 }
1061 /*
1062 * Currently only one (whole file) write lseg which is referenced
1063 * in pnfs_set_layoutcommit and will be found.
1064 */
1065 lseg = pnfs_list_write_lseg(inode);
1066
1067 end_pos = lseg->pls_end_pos;
1068 cred = lseg->pls_lc_cred;
1069 lseg->pls_end_pos = 0;
1070 lseg->pls_lc_cred = NULL;
1071
1072 memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data,
1073 sizeof(nfsi->layout->plh_stateid.data));
1074 spin_unlock(&inode->i_lock);
1075
1076 data->args.inode = inode;
1077 data->lseg = lseg;
1078 data->cred = cred;
1079 nfs_fattr_init(&data->fattr);
1080 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
1081 data->res.fattr = &data->fattr;
1082 data->args.lastbytewritten = end_pos - 1;
1083 data->res.server = NFS_SERVER(inode);
1084
1085 status = nfs4_proc_layoutcommit(data, sync);
1086out:
1087 dprintk("<-- %s status %d\n", __func__, status);
1088 return status;
1089}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 6380b9405bcd..bc4827202e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -43,6 +43,8 @@ struct pnfs_layout_segment {
43 atomic_t pls_refcount; 43 atomic_t pls_refcount;
44 unsigned long pls_flags; 44 unsigned long pls_flags;
45 struct pnfs_layout_hdr *pls_layout; 45 struct pnfs_layout_hdr *pls_layout;
46 struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */
47 loff_t pls_end_pos; /* LAYOUTCOMMIT write end */
46}; 48};
47 49
48enum pnfs_try_status { 50enum pnfs_try_status {
@@ -74,6 +76,13 @@ struct pnfs_layoutdriver_type {
74 /* test for nfs page cache coalescing */ 76 /* test for nfs page cache coalescing */
75 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 77 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
76 78
79 /* Returns true if layoutdriver wants to divert this request to
80 * driver's commit routine.
81 */
82 bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg);
83 struct list_head * (*choose_commit_list) (struct nfs_page *req);
84 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
85
77 /* 86 /*
78 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 87 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
79 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS 88 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
@@ -100,7 +109,6 @@ struct pnfs_device {
100 unsigned int layout_type; 109 unsigned int layout_type;
101 unsigned int mincount; 110 unsigned int mincount;
102 struct page **pages; 111 struct page **pages;
103 void *area;
104 unsigned int pgbase; 112 unsigned int pgbase;
105 unsigned int pglen; 113 unsigned int pglen;
106}; 114};
@@ -145,7 +153,8 @@ bool pnfs_roc(struct inode *ino);
145void pnfs_roc_release(struct inode *ino); 153void pnfs_roc_release(struct inode *ino);
146void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 154void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
147bool pnfs_roc_drain(struct inode *ino, u32 *barrier); 155bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
148 156void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
157int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
149 158
150static inline int lo_fail_bit(u32 iomode) 159static inline int lo_fail_bit(u32 iomode)
151{ 160{
@@ -169,6 +178,51 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
169 return nfss->pnfs_curr_ld != NULL; 178 return nfss->pnfs_curr_ld != NULL;
170} 179}
171 180
181static inline void
182pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
183{
184 if (lseg) {
185 struct pnfs_layoutdriver_type *ld;
186
187 ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
188 if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) {
189 set_bit(PG_PNFS_COMMIT, &req->wb_flags);
190 req->wb_commit_lseg = get_lseg(lseg);
191 }
192 }
193}
194
195static inline int
196pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
197{
198 if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
199 return PNFS_NOT_ATTEMPTED;
200 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
201}
202
203static inline struct list_head *
204pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
205{
206 struct list_head *rv;
207
208 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) {
209 struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode;
210
211 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
212 rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req);
213 /* matched by ref taken when PG_PNFS_COMMIT is set */
214 put_lseg(req->wb_commit_lseg);
215 } else
216 rv = mds;
217 return rv;
218}
219
220static inline void pnfs_clear_request_commit(struct nfs_page *req)
221{
222 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags))
223 put_lseg(req->wb_commit_lseg);
224}
225
172#else /* CONFIG_NFS_V4_1 */ 226#else /* CONFIG_NFS_V4_1 */
173 227
174static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) 228static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -252,6 +306,31 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
252 pgio->pg_test = NULL; 306 pgio->pg_test = NULL;
253} 307}
254 308
309static inline void
310pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
311{
312}
313
314static inline int
315pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
316{
317 return PNFS_NOT_ATTEMPTED;
318}
319
320static inline struct list_head *
321pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
322{
323 return mds;
324}
325
326static inline void pnfs_clear_request_commit(struct nfs_page *req)
327{
328}
329
330static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
331{
332 return 0;
333}
255#endif /* CONFIG_NFS_V4_1 */ 334#endif /* CONFIG_NFS_V4_1 */
256 335
257#endif /* FS_NFS_PNFS_H */ 336#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 47a3ad63e0d5..85d75254328e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -59,6 +59,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
59 } 59 }
60 return p; 60 return p;
61} 61}
62EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
62 63
63void nfs_commit_free(struct nfs_write_data *p) 64void nfs_commit_free(struct nfs_write_data *p)
64{ 65{
@@ -66,6 +67,7 @@ void nfs_commit_free(struct nfs_write_data *p)
66 kfree(p->pagevec); 67 kfree(p->pagevec);
67 mempool_free(p, nfs_commit_mempool); 68 mempool_free(p, nfs_commit_mempool);
68} 69}
70EXPORT_SYMBOL_GPL(nfs_commit_free);
69 71
70struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 72struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
71{ 73{
@@ -179,8 +181,8 @@ static int wb_priority(struct writeback_control *wbc)
179 if (wbc->for_reclaim) 181 if (wbc->for_reclaim)
180 return FLUSH_HIGHPRI | FLUSH_STABLE; 182 return FLUSH_HIGHPRI | FLUSH_STABLE;
181 if (wbc->for_kupdate || wbc->for_background) 183 if (wbc->for_kupdate || wbc->for_background)
182 return FLUSH_LOWPRI; 184 return FLUSH_LOWPRI | FLUSH_COND_STABLE;
183 return 0; 185 return FLUSH_COND_STABLE;
184} 186}
185 187
186/* 188/*
@@ -441,7 +443,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
441 * Add a request to the inode's commit list. 443 * Add a request to the inode's commit list.
442 */ 444 */
443static void 445static void
444nfs_mark_request_commit(struct nfs_page *req) 446nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
445{ 447{
446 struct inode *inode = req->wb_context->path.dentry->d_inode; 448 struct inode *inode = req->wb_context->path.dentry->d_inode;
447 struct nfs_inode *nfsi = NFS_I(inode); 449 struct nfs_inode *nfsi = NFS_I(inode);
@@ -453,6 +455,7 @@ nfs_mark_request_commit(struct nfs_page *req)
453 NFS_PAGE_TAG_COMMIT); 455 NFS_PAGE_TAG_COMMIT);
454 nfsi->ncommit++; 456 nfsi->ncommit++;
455 spin_unlock(&inode->i_lock); 457 spin_unlock(&inode->i_lock);
458 pnfs_mark_request_commit(req, lseg);
456 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 459 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
457 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 460 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
458 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 461 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
@@ -474,14 +477,18 @@ nfs_clear_request_commit(struct nfs_page *req)
474static inline 477static inline
475int nfs_write_need_commit(struct nfs_write_data *data) 478int nfs_write_need_commit(struct nfs_write_data *data)
476{ 479{
477 return data->verf.committed != NFS_FILE_SYNC; 480 if (data->verf.committed == NFS_DATA_SYNC)
481 return data->lseg == NULL;
482 else
483 return data->verf.committed != NFS_FILE_SYNC;
478} 484}
479 485
480static inline 486static inline
481int nfs_reschedule_unstable_write(struct nfs_page *req) 487int nfs_reschedule_unstable_write(struct nfs_page *req,
488 struct nfs_write_data *data)
482{ 489{
483 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { 490 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
484 nfs_mark_request_commit(req); 491 nfs_mark_request_commit(req, data->lseg);
485 return 1; 492 return 1;
486 } 493 }
487 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 494 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
@@ -492,7 +499,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
492} 499}
493#else 500#else
494static inline void 501static inline void
495nfs_mark_request_commit(struct nfs_page *req) 502nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
496{ 503{
497} 504}
498 505
@@ -509,7 +516,8 @@ int nfs_write_need_commit(struct nfs_write_data *data)
509} 516}
510 517
511static inline 518static inline
512int nfs_reschedule_unstable_write(struct nfs_page *req) 519int nfs_reschedule_unstable_write(struct nfs_page *req,
520 struct nfs_write_data *data)
513{ 521{
514 return 0; 522 return 0;
515} 523}
@@ -612,9 +620,11 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
612 } 620 }
613 621
614 if (nfs_clear_request_commit(req) && 622 if (nfs_clear_request_commit(req) &&
615 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, 623 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
616 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) 624 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
617 NFS_I(inode)->ncommit--; 625 NFS_I(inode)->ncommit--;
626 pnfs_clear_request_commit(req);
627 }
618 628
619 /* Okay, the request matches. Update the region */ 629 /* Okay, the request matches. Update the region */
620 if (offset < req->wb_offset) { 630 if (offset < req->wb_offset) {
@@ -762,11 +772,12 @@ int nfs_updatepage(struct file *file, struct page *page,
762 return status; 772 return status;
763} 773}
764 774
765static void nfs_writepage_release(struct nfs_page *req) 775static void nfs_writepage_release(struct nfs_page *req,
776 struct nfs_write_data *data)
766{ 777{
767 struct page *page = req->wb_page; 778 struct page *page = req->wb_page;
768 779
769 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) 780 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
770 nfs_inode_remove_request(req); 781 nfs_inode_remove_request(req);
771 nfs_clear_page_tag_locked(req); 782 nfs_clear_page_tag_locked(req);
772 nfs_end_page_writeback(page); 783 nfs_end_page_writeback(page);
@@ -863,7 +874,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
863 data->args.context = get_nfs_open_context(req->wb_context); 874 data->args.context = get_nfs_open_context(req->wb_context);
864 data->args.lock_context = req->wb_lock_context; 875 data->args.lock_context = req->wb_lock_context;
865 data->args.stable = NFS_UNSTABLE; 876 data->args.stable = NFS_UNSTABLE;
866 if (how & FLUSH_STABLE) { 877 if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
867 data->args.stable = NFS_DATA_SYNC; 878 data->args.stable = NFS_DATA_SYNC;
868 if (!nfs_need_commit(NFS_I(inode))) 879 if (!nfs_need_commit(NFS_I(inode)))
869 data->args.stable = NFS_FILE_SYNC; 880 data->args.stable = NFS_FILE_SYNC;
@@ -912,6 +923,12 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
912 923
913 nfs_list_remove_request(req); 924 nfs_list_remove_request(req);
914 925
926 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
927 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
928 desc->pg_count > wsize))
929 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
930
931
915 nbytes = desc->pg_count; 932 nbytes = desc->pg_count;
916 do { 933 do {
917 size_t len = min(nbytes, wsize); 934 size_t len = min(nbytes, wsize);
@@ -1002,6 +1019,10 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
1002 if ((!lseg) && list_is_singular(&data->pages)) 1019 if ((!lseg) && list_is_singular(&data->pages))
1003 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); 1020 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
1004 1021
1022 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1023 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
1024 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1025
1005 /* Set up the argument struct */ 1026 /* Set up the argument struct */
1006 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); 1027 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
1007out: 1028out:
@@ -1074,7 +1095,7 @@ static void nfs_writeback_release_partial(void *calldata)
1074 1095
1075out: 1096out:
1076 if (atomic_dec_and_test(&req->wb_complete)) 1097 if (atomic_dec_and_test(&req->wb_complete))
1077 nfs_writepage_release(req); 1098 nfs_writepage_release(req, data);
1078 nfs_writedata_release(calldata); 1099 nfs_writedata_release(calldata);
1079} 1100}
1080 1101
@@ -1141,7 +1162,7 @@ static void nfs_writeback_release_full(void *calldata)
1141 1162
1142 if (nfs_write_need_commit(data)) { 1163 if (nfs_write_need_commit(data)) {
1143 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1164 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1144 nfs_mark_request_commit(req); 1165 nfs_mark_request_commit(req, data->lseg);
1145 dprintk(" marked for commit\n"); 1166 dprintk(" marked for commit\n");
1146 goto next; 1167 goto next;
1147 } 1168 }
@@ -1251,57 +1272,82 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1251#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1272#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1252static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1273static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1253{ 1274{
1275 int ret;
1276
1254 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1277 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1255 return 1; 1278 return 1;
1256 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, 1279 if (!may_wait)
1257 NFS_INO_COMMIT, nfs_wait_bit_killable, 1280 return 0;
1258 TASK_KILLABLE)) 1281 ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
1259 return 1; 1282 NFS_INO_COMMIT,
1260 return 0; 1283 nfs_wait_bit_killable,
1284 TASK_KILLABLE);
1285 return (ret < 0) ? ret : 1;
1261} 1286}
1262 1287
1263static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1288void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1264{ 1289{
1265 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1290 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1266 smp_mb__after_clear_bit(); 1291 smp_mb__after_clear_bit();
1267 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1292 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1268} 1293}
1294EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1269 1295
1270 1296void nfs_commitdata_release(void *data)
1271static void nfs_commitdata_release(void *data)
1272{ 1297{
1273 struct nfs_write_data *wdata = data; 1298 struct nfs_write_data *wdata = data;
1274 1299
1300 put_lseg(wdata->lseg);
1275 put_nfs_open_context(wdata->args.context); 1301 put_nfs_open_context(wdata->args.context);
1276 nfs_commit_free(wdata); 1302 nfs_commit_free(wdata);
1277} 1303}
1304EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1278 1305
1279/* 1306int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1280 * Set up the argument/result storage required for the RPC call. 1307 const struct rpc_call_ops *call_ops,
1281 */ 1308 int how)
1282static int nfs_commit_rpcsetup(struct list_head *head,
1283 struct nfs_write_data *data,
1284 int how)
1285{ 1309{
1286 struct nfs_page *first = nfs_list_entry(head->next);
1287 struct inode *inode = first->wb_context->path.dentry->d_inode;
1288 int priority = flush_task_priority(how);
1289 struct rpc_task *task; 1310 struct rpc_task *task;
1311 int priority = flush_task_priority(how);
1290 struct rpc_message msg = { 1312 struct rpc_message msg = {
1291 .rpc_argp = &data->args, 1313 .rpc_argp = &data->args,
1292 .rpc_resp = &data->res, 1314 .rpc_resp = &data->res,
1293 .rpc_cred = first->wb_context->cred, 1315 .rpc_cred = data->cred,
1294 }; 1316 };
1295 struct rpc_task_setup task_setup_data = { 1317 struct rpc_task_setup task_setup_data = {
1296 .task = &data->task, 1318 .task = &data->task,
1297 .rpc_client = NFS_CLIENT(inode), 1319 .rpc_client = clnt,
1298 .rpc_message = &msg, 1320 .rpc_message = &msg,
1299 .callback_ops = &nfs_commit_ops, 1321 .callback_ops = call_ops,
1300 .callback_data = data, 1322 .callback_data = data,
1301 .workqueue = nfsiod_workqueue, 1323 .workqueue = nfsiod_workqueue,
1302 .flags = RPC_TASK_ASYNC, 1324 .flags = RPC_TASK_ASYNC,
1303 .priority = priority, 1325 .priority = priority,
1304 }; 1326 };
1327 /* Set up the initial task struct. */
1328 NFS_PROTO(data->inode)->commit_setup(data, &msg);
1329
1330 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
1331
1332 task = rpc_run_task(&task_setup_data);
1333 if (IS_ERR(task))
1334 return PTR_ERR(task);
1335 if (how & FLUSH_SYNC)
1336 rpc_wait_for_completion_task(task);
1337 rpc_put_task(task);
1338 return 0;
1339}
1340EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1341
1342/*
1343 * Set up the argument/result storage required for the RPC call.
1344 */
1345void nfs_init_commit(struct nfs_write_data *data,
1346 struct list_head *head,
1347 struct pnfs_layout_segment *lseg)
1348{
1349 struct nfs_page *first = nfs_list_entry(head->next);
1350 struct inode *inode = first->wb_context->path.dentry->d_inode;
1305 1351
1306 /* Set up the RPC argument and reply structs 1352 /* Set up the RPC argument and reply structs
1307 * NB: take care not to mess about with data->commit et al. */ 1353 * NB: take care not to mess about with data->commit et al. */
@@ -1309,7 +1355,9 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1309 list_splice_init(head, &data->pages); 1355 list_splice_init(head, &data->pages);
1310 1356
1311 data->inode = inode; 1357 data->inode = inode;
1312 data->cred = msg.rpc_cred; 1358 data->cred = first->wb_context->cred;
1359 data->lseg = lseg; /* reference transferred */
1360 data->mds_ops = &nfs_commit_ops;
1313 1361
1314 data->args.fh = NFS_FH(data->inode); 1362 data->args.fh = NFS_FH(data->inode);
1315 /* Note: we always request a commit of the entire inode */ 1363 /* Note: we always request a commit of the entire inode */
@@ -1320,20 +1368,25 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1320 data->res.fattr = &data->fattr; 1368 data->res.fattr = &data->fattr;
1321 data->res.verf = &data->verf; 1369 data->res.verf = &data->verf;
1322 nfs_fattr_init(&data->fattr); 1370 nfs_fattr_init(&data->fattr);
1371}
1372EXPORT_SYMBOL_GPL(nfs_init_commit);
1323 1373
1324 /* Set up the initial task struct. */ 1374void nfs_retry_commit(struct list_head *page_list,
1325 NFS_PROTO(inode)->commit_setup(data, &msg); 1375 struct pnfs_layout_segment *lseg)
1326 1376{
1327 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1377 struct nfs_page *req;
1328 1378
1329 task = rpc_run_task(&task_setup_data); 1379 while (!list_empty(page_list)) {
1330 if (IS_ERR(task)) 1380 req = nfs_list_entry(page_list->next);
1331 return PTR_ERR(task); 1381 nfs_list_remove_request(req);
1332 if (how & FLUSH_SYNC) 1382 nfs_mark_request_commit(req, lseg);
1333 rpc_wait_for_completion_task(task); 1383 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1334 rpc_put_task(task); 1384 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1335 return 0; 1385 BDI_RECLAIMABLE);
1386 nfs_clear_page_tag_locked(req);
1387 }
1336} 1388}
1389EXPORT_SYMBOL_GPL(nfs_retry_commit);
1337 1390
1338/* 1391/*
1339 * Commit dirty pages 1392 * Commit dirty pages
@@ -1342,7 +1395,6 @@ static int
1342nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1395nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1343{ 1396{
1344 struct nfs_write_data *data; 1397 struct nfs_write_data *data;
1345 struct nfs_page *req;
1346 1398
1347 data = nfs_commitdata_alloc(); 1399 data = nfs_commitdata_alloc();
1348 1400
@@ -1350,17 +1402,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1350 goto out_bad; 1402 goto out_bad;
1351 1403
1352 /* Set up the argument struct */ 1404 /* Set up the argument struct */
1353 return nfs_commit_rpcsetup(head, data, how); 1405 nfs_init_commit(data, head, NULL);
1406 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
1354 out_bad: 1407 out_bad:
1355 while (!list_empty(head)) { 1408 nfs_retry_commit(head, NULL);
1356 req = nfs_list_entry(head->next);
1357 nfs_list_remove_request(req);
1358 nfs_mark_request_commit(req);
1359 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1360 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1361 BDI_RECLAIMABLE);
1362 nfs_clear_page_tag_locked(req);
1363 }
1364 nfs_commit_clear_lock(NFS_I(inode)); 1409 nfs_commit_clear_lock(NFS_I(inode));
1365 return -ENOMEM; 1410 return -ENOMEM;
1366} 1411}
@@ -1380,10 +1425,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1380 return; 1425 return;
1381} 1426}
1382 1427
1383static void nfs_commit_release(void *calldata) 1428void nfs_commit_release_pages(struct nfs_write_data *data)
1384{ 1429{
1385 struct nfs_write_data *data = calldata; 1430 struct nfs_page *req;
1386 struct nfs_page *req;
1387 int status = data->task.tk_status; 1431 int status = data->task.tk_status;
1388 1432
1389 while (!list_empty(&data->pages)) { 1433 while (!list_empty(&data->pages)) {
@@ -1417,6 +1461,14 @@ static void nfs_commit_release(void *calldata)
1417 next: 1461 next:
1418 nfs_clear_page_tag_locked(req); 1462 nfs_clear_page_tag_locked(req);
1419 } 1463 }
1464}
1465EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1466
1467static void nfs_commit_release(void *calldata)
1468{
1469 struct nfs_write_data *data = calldata;
1470
1471 nfs_commit_release_pages(data);
1420 nfs_commit_clear_lock(NFS_I(data->inode)); 1472 nfs_commit_clear_lock(NFS_I(data->inode));
1421 nfs_commitdata_release(calldata); 1473 nfs_commitdata_release(calldata);
1422} 1474}
@@ -1433,23 +1485,30 @@ int nfs_commit_inode(struct inode *inode, int how)
1433{ 1485{
1434 LIST_HEAD(head); 1486 LIST_HEAD(head);
1435 int may_wait = how & FLUSH_SYNC; 1487 int may_wait = how & FLUSH_SYNC;
1436 int res = 0; 1488 int res;
1437 1489
1438 if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) 1490 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1491 if (res <= 0)
1439 goto out_mark_dirty; 1492 goto out_mark_dirty;
1440 spin_lock(&inode->i_lock); 1493 spin_lock(&inode->i_lock);
1441 res = nfs_scan_commit(inode, &head, 0, 0); 1494 res = nfs_scan_commit(inode, &head, 0, 0);
1442 spin_unlock(&inode->i_lock); 1495 spin_unlock(&inode->i_lock);
1443 if (res) { 1496 if (res) {
1444 int error = nfs_commit_list(inode, &head, how); 1497 int error;
1498
1499 error = pnfs_commit_list(inode, &head, how);
1500 if (error == PNFS_NOT_ATTEMPTED)
1501 error = nfs_commit_list(inode, &head, how);
1445 if (error < 0) 1502 if (error < 0)
1446 return error; 1503 return error;
1447 if (may_wait) 1504 if (!may_wait)
1448 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
1449 nfs_wait_bit_killable,
1450 TASK_KILLABLE);
1451 else
1452 goto out_mark_dirty; 1505 goto out_mark_dirty;
1506 error = wait_on_bit(&NFS_I(inode)->flags,
1507 NFS_INO_COMMIT,
1508 nfs_wait_bit_killable,
1509 TASK_KILLABLE);
1510 if (error < 0)
1511 return error;
1453 } else 1512 } else
1454 nfs_commit_clear_lock(NFS_I(inode)); 1513 nfs_commit_clear_lock(NFS_I(inode));
1455 return res; 1514 return res;
@@ -1503,7 +1562,22 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1503 1562
1504int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1563int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1505{ 1564{
1506 return nfs_commit_unstable_pages(inode, wbc); 1565 int ret;
1566
1567 ret = nfs_commit_unstable_pages(inode, wbc);
1568 if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
1569 int status;
1570 bool sync = true;
1571
1572 if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking ||
1573 wbc->for_background)
1574 sync = false;
1575
1576 status = pnfs_layoutcommit_inode(inode, sync);
1577 if (status < 0)
1578 return status;
1579 }
1580 return ret;
1507} 1581}
1508 1582
1509/* 1583/*
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 84c27d69d421..ec0f277be7f5 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -117,7 +117,6 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
117 * invoked in contexts where a memory allocation failure is 117 * invoked in contexts where a memory allocation failure is
118 * fatal. Fortunately this fake ACL is small enough to 118 * fatal. Fortunately this fake ACL is small enough to
119 * construct on the stack. */ 119 * construct on the stack. */
120 memset(acl2, 0, sizeof(acl2));
121 posix_acl_init(acl2, 4); 120 posix_acl_init(acl2, 4);
122 121
123 /* Insert entries in canonical order: other orders seem 122 /* Insert entries in canonical order: other orders seem
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 7e7f6b7e46b1..b528f6d4b860 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -561,6 +561,7 @@ enum {
561 NFSPROC4_CLNT_RECLAIM_COMPLETE, 561 NFSPROC4_CLNT_RECLAIM_COMPLETE,
562 NFSPROC4_CLNT_LAYOUTGET, 562 NFSPROC4_CLNT_LAYOUTGET,
563 NFSPROC4_CLNT_GETDEVICEINFO, 563 NFSPROC4_CLNT_GETDEVICEINFO,
564 NFSPROC4_CLNT_LAYOUTCOMMIT,
564}; 565};
565 566
566/* nfs41 types */ 567/* nfs41 types */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f88522b10a38..1b93b9c60e55 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,6 +33,8 @@
33#define FLUSH_STABLE 4 /* commit to stable storage */ 33#define FLUSH_STABLE 4 /* commit to stable storage */
34#define FLUSH_LOWPRI 8 /* low priority background flush */ 34#define FLUSH_LOWPRI 8 /* low priority background flush */
35#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ 35#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
36#define FLUSH_COND_STABLE 32 /* conditional stable write - only stable
37 * if everything fits in one RPC */
36 38
37#ifdef __KERNEL__ 39#ifdef __KERNEL__
38 40
@@ -93,8 +95,13 @@ struct nfs_open_context {
93 int error; 95 int error;
94 96
95 struct list_head list; 97 struct list_head list;
98};
96 99
100struct nfs_open_dir_context {
101 struct rpc_cred *cred;
97 __u64 dir_cookie; 102 __u64 dir_cookie;
103 __u64 dup_cookie;
104 int duped;
98}; 105};
99 106
100/* 107/*
@@ -191,6 +198,7 @@ struct nfs_inode {
191 198
192 /* pNFS layout information */ 199 /* pNFS layout information */
193 struct pnfs_layout_hdr *layout; 200 struct pnfs_layout_hdr *layout;
201 atomic_t commits_outstanding;
194#endif /* CONFIG_NFS_V4*/ 202#endif /* CONFIG_NFS_V4*/
195#ifdef CONFIG_NFS_FSCACHE 203#ifdef CONFIG_NFS_FSCACHE
196 struct fscache_cookie *fscache; 204 struct fscache_cookie *fscache;
@@ -219,6 +227,8 @@ struct nfs_inode {
219#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ 227#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
220#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ 228#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
221#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ 229#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
230#define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */
231#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
222 232
223static inline struct nfs_inode *NFS_I(const struct inode *inode) 233static inline struct nfs_inode *NFS_I(const struct inode *inode)
224{ 234{
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 90907ada6d52..8023e4e25133 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -33,11 +33,15 @@ enum {
33 PG_CLEAN, 33 PG_CLEAN,
34 PG_NEED_COMMIT, 34 PG_NEED_COMMIT,
35 PG_NEED_RESCHED, 35 PG_NEED_RESCHED,
36 PG_PNFS_COMMIT,
36}; 37};
37 38
38struct nfs_inode; 39struct nfs_inode;
39struct nfs_page { 40struct nfs_page {
40 struct list_head wb_list; /* Defines state of page: */ 41 union {
42 struct list_head wb_list; /* Defines state of page: */
43 struct pnfs_layout_segment *wb_commit_lseg; /* Used when PG_PNFS_COMMIT set */
44 };
41 struct page *wb_page; /* page to read in/write out */ 45 struct page *wb_page; /* page to read in/write out */
42 struct nfs_open_context *wb_context; /* File state context info */ 46 struct nfs_open_context *wb_context; /* File state context info */
43 struct nfs_lock_context *wb_lock_context; /* lock context info */ 47 struct nfs_lock_context *wb_lock_context; /* lock context info */
@@ -57,6 +61,7 @@ struct nfs_pageio_descriptor {
57 size_t pg_count; 61 size_t pg_count;
58 size_t pg_bsize; 62 size_t pg_bsize;
59 unsigned int pg_base; 63 unsigned int pg_base;
64 char pg_moreio;
60 65
61 struct inode *pg_inode; 66 struct inode *pg_inode;
62 int (*pg_doio)(struct nfs_pageio_descriptor *); 67 int (*pg_doio)(struct nfs_pageio_descriptor *);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index fa1ba78ca2c8..78b101e487ea 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -195,8 +195,9 @@ struct nfs4_get_lease_time_res {
195#define PNFS_LAYOUT_MAXSIZE 4096 195#define PNFS_LAYOUT_MAXSIZE 4096
196 196
197struct nfs4_layoutdriver_data { 197struct nfs4_layoutdriver_data {
198 struct page **pages;
199 __u32 pglen;
198 __u32 len; 200 __u32 len;
199 void *buf;
200}; 201};
201 202
202struct pnfs_layout_range { 203struct pnfs_layout_range {
@@ -214,6 +215,7 @@ struct nfs4_layoutget_args {
214 struct nfs_open_context *ctx; 215 struct nfs_open_context *ctx;
215 struct nfs4_sequence_args seq_args; 216 struct nfs4_sequence_args seq_args;
216 nfs4_stateid stateid; 217 nfs4_stateid stateid;
218 struct nfs4_layoutdriver_data layout;
217}; 219};
218 220
219struct nfs4_layoutget_res { 221struct nfs4_layoutget_res {
@@ -221,8 +223,8 @@ struct nfs4_layoutget_res {
221 struct pnfs_layout_range range; 223 struct pnfs_layout_range range;
222 __u32 type; 224 __u32 type;
223 nfs4_stateid stateid; 225 nfs4_stateid stateid;
224 struct nfs4_layoutdriver_data layout;
225 struct nfs4_sequence_res seq_res; 226 struct nfs4_sequence_res seq_res;
227 struct nfs4_layoutdriver_data *layoutp;
226}; 228};
227 229
228struct nfs4_layoutget { 230struct nfs4_layoutget {
@@ -241,6 +243,29 @@ struct nfs4_getdeviceinfo_res {
241 struct nfs4_sequence_res seq_res; 243 struct nfs4_sequence_res seq_res;
242}; 244};
243 245
246struct nfs4_layoutcommit_args {
247 nfs4_stateid stateid;
248 __u64 lastbytewritten;
249 struct inode *inode;
250 const u32 *bitmask;
251 struct nfs4_sequence_args seq_args;
252};
253
254struct nfs4_layoutcommit_res {
255 struct nfs_fattr *fattr;
256 const struct nfs_server *server;
257 struct nfs4_sequence_res seq_res;
258};
259
260struct nfs4_layoutcommit_data {
261 struct rpc_task task;
262 struct nfs_fattr fattr;
263 struct pnfs_layout_segment *lseg;
264 struct rpc_cred *cred;
265 struct nfs4_layoutcommit_args args;
266 struct nfs4_layoutcommit_res res;
267};
268
244/* 269/*
245 * Arguments to the open call. 270 * Arguments to the open call.
246 */ 271 */
@@ -1077,6 +1102,7 @@ struct nfs_write_data {
1077 struct nfs_writeres res; /* result struct */ 1102 struct nfs_writeres res; /* result struct */
1078 struct pnfs_layout_segment *lseg; 1103 struct pnfs_layout_segment *lseg;
1079 struct nfs_client *ds_clp; /* pNFS data server */ 1104 struct nfs_client *ds_clp; /* pNFS data server */
1105 int ds_commit_index;
1080 const struct rpc_call_ops *mds_ops; 1106 const struct rpc_call_ops *mds_ops;
1081 int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); 1107 int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
1082#ifdef CONFIG_NFS_V4 1108#ifdef CONFIG_NFS_V4
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index be96d429b475..1e336a06d3e6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -710,6 +710,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
710 if (sk == NULL) 710 if (sk == NULL)
711 return; 711 return;
712 712
713 transport->srcport = 0;
714
713 write_lock_bh(&sk->sk_callback_lock); 715 write_lock_bh(&sk->sk_callback_lock);
714 transport->inet = NULL; 716 transport->inet = NULL;
715 transport->sock = NULL; 717 transport->sock = NULL;