aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-13 20:13:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-13 20:13:19 -0400
commit06b8ab55289345ab191bf4bf0e4acc6d4bdf293d (patch)
tree9af9215097e26c026f30a58c6ca3092ec15d1e1e /fs
parentdc1cc85133120e49c223f36aa77d398b8abac727 (diff)
parent71a6ec8ac587418ceb6b420def1ca44b334c1ff7 (diff)
Merge tag 'nfs-for-3.17-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: - stable fix for a bug in nfs3_list_one_acl() - speed up NFS path walks by supporting LOOKUP_RCU - more read/write code cleanups - pNFS fixes for layout return on close - fixes for the RCU handling in the rpcsec_gss code - more NFS/RDMA fixes" * tag 'nfs-for-3.17-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits) nfs: reject changes to resvport and sharecache during remount NFS: Avoid infinite loop when RELEASE_LOCKOWNER getting expired error SUNRPC: remove all refcounting of groupinfo from rpcauth_lookupcred NFS: fix two problems in lookup_revalidate in RCU-walk NFS: allow lockless access to access_cache NFS: teach nfs_lookup_verify_inode to handle LOOKUP_RCU NFS: teach nfs_neg_need_reval to understand LOOKUP_RCU NFS: support RCU_WALK in nfs_permission() sunrpc/auth: allow lockless (rcu) lookup of credential cache. NFS: prepare for RCU-walk support but pushing tests later in code. NFS: nfs4_lookup_revalidate: only evaluate parent if it will be used. NFS: add checks for returned value of try_module_get() nfs: clear_request_commit while holding i_lock pnfs: add pnfs_put_lseg_async pnfs: find swapped pages on pnfs commit lists too nfs: fix comment and add warn_on for PG_INODE_REF nfs: check wait_on_bit_lock err in page_group_lock sunrpc: remove "ec" argument from encrypt_v2 operation sunrpc: clean up sparse endianness warnings in gss_krb5_wrap.c sunrpc: clean up sparse endianness warnings in gss_krb5_seal.c ...
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c101
-rw-r--r--fs/nfs/callback.c12
-rw-r--r--fs/nfs/client.c18
-rw-r--r--fs/nfs/delegation.c34
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c208
-rw-r--r--fs/nfs/direct.c33
-rw-r--r--fs/nfs/filelayout/filelayout.c298
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c2
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/internal.h11
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c21
-rw-r--r--fs/nfs/nfs4_fs.h32
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4proc.c248
-rw-r--r--fs/nfs/nfs4state.c69
-rw-r--r--fs/nfs/nfs4trace.h28
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c81
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c276
-rw-r--r--fs/nfs/pnfs.c178
-rw-r--r--fs/nfs/pnfs.h45
-rw-r--r--fs/nfs/proc.c27
-rw-r--r--fs/nfs/read.c54
-rw-r--r--fs/nfs/super.c12
-rw-r--r--fs/nfs/write.c150
-rw-r--r--fs/nfs_common/nfsacl.c5
30 files changed, 1124 insertions, 870 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9b431f44fad9..cbb1797149d5 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
210 SetPageUptodate(bvec->bv_page); 210 SetPageUptodate(bvec->bv_page);
211 211
212 if (err) { 212 if (err) {
213 struct nfs_pgio_data *rdata = par->data; 213 struct nfs_pgio_header *header = par->data;
214 struct nfs_pgio_header *header = rdata->header;
215 214
216 if (!header->pnfs_error) 215 if (!header->pnfs_error)
217 header->pnfs_error = -EIO; 216 header->pnfs_error = -EIO;
@@ -224,43 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err)
224static void bl_read_cleanup(struct work_struct *work) 223static void bl_read_cleanup(struct work_struct *work)
225{ 224{
226 struct rpc_task *task; 225 struct rpc_task *task;
227 struct nfs_pgio_data *rdata; 226 struct nfs_pgio_header *hdr;
228 dprintk("%s enter\n", __func__); 227 dprintk("%s enter\n", __func__);
229 task = container_of(work, struct rpc_task, u.tk_work); 228 task = container_of(work, struct rpc_task, u.tk_work);
230 rdata = container_of(task, struct nfs_pgio_data, task); 229 hdr = container_of(task, struct nfs_pgio_header, task);
231 pnfs_ld_read_done(rdata); 230 pnfs_ld_read_done(hdr);
232} 231}
233 232
234static void 233static void
235bl_end_par_io_read(void *data, int unused) 234bl_end_par_io_read(void *data, int unused)
236{ 235{
237 struct nfs_pgio_data *rdata = data; 236 struct nfs_pgio_header *hdr = data;
238 237
239 rdata->task.tk_status = rdata->header->pnfs_error; 238 hdr->task.tk_status = hdr->pnfs_error;
240 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 239 INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
241 schedule_work(&rdata->task.u.tk_work); 240 schedule_work(&hdr->task.u.tk_work);
242} 241}
243 242
244static enum pnfs_try_status 243static enum pnfs_try_status
245bl_read_pagelist(struct nfs_pgio_data *rdata) 244bl_read_pagelist(struct nfs_pgio_header *hdr)
246{ 245{
247 struct nfs_pgio_header *header = rdata->header; 246 struct nfs_pgio_header *header = hdr;
248 int i, hole; 247 int i, hole;
249 struct bio *bio = NULL; 248 struct bio *bio = NULL;
250 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 249 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
251 sector_t isect, extent_length = 0; 250 sector_t isect, extent_length = 0;
252 struct parallel_io *par; 251 struct parallel_io *par;
253 loff_t f_offset = rdata->args.offset; 252 loff_t f_offset = hdr->args.offset;
254 size_t bytes_left = rdata->args.count; 253 size_t bytes_left = hdr->args.count;
255 unsigned int pg_offset, pg_len; 254 unsigned int pg_offset, pg_len;
256 struct page **pages = rdata->args.pages; 255 struct page **pages = hdr->args.pages;
257 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 256 int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT;
258 const bool is_dio = (header->dreq != NULL); 257 const bool is_dio = (header->dreq != NULL);
259 258
260 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 259 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
261 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); 260 hdr->page_array.npages, f_offset,
261 (unsigned int)hdr->args.count);
262 262
263 par = alloc_parallel(rdata); 263 par = alloc_parallel(hdr);
264 if (!par) 264 if (!par)
265 goto use_mds; 265 goto use_mds;
266 par->pnfs_callback = bl_end_par_io_read; 266 par->pnfs_callback = bl_end_par_io_read;
@@ -268,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
268 268
269 isect = (sector_t) (f_offset >> SECTOR_SHIFT); 269 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
270 /* Code assumes extents are page-aligned */ 270 /* Code assumes extents are page-aligned */
271 for (i = pg_index; i < rdata->pages.npages; i++) { 271 for (i = pg_index; i < hdr->page_array.npages; i++) {
272 if (!extent_length) { 272 if (!extent_length) {
273 /* We've used up the previous extent */ 273 /* We've used up the previous extent */
274 bl_put_extent(be); 274 bl_put_extent(be);
@@ -317,7 +317,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
317 struct pnfs_block_extent *be_read; 317 struct pnfs_block_extent *be_read;
318 318
319 be_read = (hole && cow_read) ? cow_read : be; 319 be_read = (hole && cow_read) ? cow_read : be;
320 bio = do_add_page_to_bio(bio, rdata->pages.npages - i, 320 bio = do_add_page_to_bio(bio,
321 hdr->page_array.npages - i,
321 READ, 322 READ,
322 isect, pages[i], be_read, 323 isect, pages[i], be_read,
323 bl_end_io_read, par, 324 bl_end_io_read, par,
@@ -332,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
332 extent_length -= PAGE_CACHE_SECTORS; 333 extent_length -= PAGE_CACHE_SECTORS;
333 } 334 }
334 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { 335 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
335 rdata->res.eof = 1; 336 hdr->res.eof = 1;
336 rdata->res.count = header->inode->i_size - rdata->args.offset; 337 hdr->res.count = header->inode->i_size - hdr->args.offset;
337 } else { 338 } else {
338 rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; 339 hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset;
339 } 340 }
340out: 341out:
341 bl_put_extent(be); 342 bl_put_extent(be);
@@ -390,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
390 } 391 }
391 392
392 if (unlikely(err)) { 393 if (unlikely(err)) {
393 struct nfs_pgio_data *data = par->data; 394 struct nfs_pgio_header *header = par->data;
394 struct nfs_pgio_header *header = data->header;
395 395
396 if (!header->pnfs_error) 396 if (!header->pnfs_error)
397 header->pnfs_error = -EIO; 397 header->pnfs_error = -EIO;
@@ -405,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
405{ 405{
406 struct parallel_io *par = bio->bi_private; 406 struct parallel_io *par = bio->bi_private;
407 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 407 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
408 struct nfs_pgio_data *data = par->data; 408 struct nfs_pgio_header *header = par->data;
409 struct nfs_pgio_header *header = data->header;
410 409
411 if (!uptodate) { 410 if (!uptodate) {
412 if (!header->pnfs_error) 411 if (!header->pnfs_error)
@@ -423,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err)
423static void bl_write_cleanup(struct work_struct *work) 422static void bl_write_cleanup(struct work_struct *work)
424{ 423{
425 struct rpc_task *task; 424 struct rpc_task *task;
426 struct nfs_pgio_data *wdata; 425 struct nfs_pgio_header *hdr;
427 dprintk("%s enter\n", __func__); 426 dprintk("%s enter\n", __func__);
428 task = container_of(work, struct rpc_task, u.tk_work); 427 task = container_of(work, struct rpc_task, u.tk_work);
429 wdata = container_of(task, struct nfs_pgio_data, task); 428 hdr = container_of(task, struct nfs_pgio_header, task);
430 if (likely(!wdata->header->pnfs_error)) { 429 if (likely(!hdr->pnfs_error)) {
431 /* Marks for LAYOUTCOMMIT */ 430 /* Marks for LAYOUTCOMMIT */
432 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), 431 mark_extents_written(BLK_LSEG2EXT(hdr->lseg),
433 wdata->args.offset, wdata->args.count); 432 hdr->args.offset, hdr->args.count);
434 } 433 }
435 pnfs_ld_write_done(wdata); 434 pnfs_ld_write_done(hdr);
436} 435}
437 436
438/* Called when last of bios associated with a bl_write_pagelist call finishes */ 437/* Called when last of bios associated with a bl_write_pagelist call finishes */
439static void bl_end_par_io_write(void *data, int num_se) 438static void bl_end_par_io_write(void *data, int num_se)
440{ 439{
441 struct nfs_pgio_data *wdata = data; 440 struct nfs_pgio_header *hdr = data;
442 441
443 if (unlikely(wdata->header->pnfs_error)) { 442 if (unlikely(hdr->pnfs_error)) {
444 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, 443 bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval,
445 num_se); 444 num_se);
446 } 445 }
447 446
448 wdata->task.tk_status = wdata->header->pnfs_error; 447 hdr->task.tk_status = hdr->pnfs_error;
449 wdata->verf.committed = NFS_FILE_SYNC; 448 hdr->verf.committed = NFS_FILE_SYNC;
450 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 449 INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
451 schedule_work(&wdata->task.u.tk_work); 450 schedule_work(&hdr->task.u.tk_work);
452} 451}
453 452
454/* FIXME STUB - mark intersection of layout and page as bad, so is not 453/* FIXME STUB - mark intersection of layout and page as bad, so is not
@@ -673,18 +672,17 @@ check_page:
673} 672}
674 673
675static enum pnfs_try_status 674static enum pnfs_try_status
676bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) 675bl_write_pagelist(struct nfs_pgio_header *header, int sync)
677{ 676{
678 struct nfs_pgio_header *header = wdata->header;
679 int i, ret, npg_zero, pg_index, last = 0; 677 int i, ret, npg_zero, pg_index, last = 0;
680 struct bio *bio = NULL; 678 struct bio *bio = NULL;
681 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 679 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
682 sector_t isect, last_isect = 0, extent_length = 0; 680 sector_t isect, last_isect = 0, extent_length = 0;
683 struct parallel_io *par = NULL; 681 struct parallel_io *par = NULL;
684 loff_t offset = wdata->args.offset; 682 loff_t offset = header->args.offset;
685 size_t count = wdata->args.count; 683 size_t count = header->args.count;
686 unsigned int pg_offset, pg_len, saved_len; 684 unsigned int pg_offset, pg_len, saved_len;
687 struct page **pages = wdata->args.pages; 685 struct page **pages = header->args.pages;
688 struct page *page; 686 struct page *page;
689 pgoff_t index; 687 pgoff_t index;
690 u64 temp; 688 u64 temp;
@@ -699,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
699 dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); 697 dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
700 goto out_mds; 698 goto out_mds;
701 } 699 }
702 /* At this point, wdata->pages is a (sequential) list of nfs_pages. 700 /* At this point, header->page_aray is a (sequential) list of nfs_pages.
703 * We want to write each, and if there is an error set pnfs_error 701 * We want to write each, and if there is an error set pnfs_error
704 * to have it redone using nfs. 702 * to have it redone using nfs.
705 */ 703 */
706 par = alloc_parallel(wdata); 704 par = alloc_parallel(header);
707 if (!par) 705 if (!par)
708 goto out_mds; 706 goto out_mds;
709 par->pnfs_callback = bl_end_par_io_write; 707 par->pnfs_callback = bl_end_par_io_write;
@@ -790,8 +788,8 @@ next_page:
790 bio = bl_submit_bio(WRITE, bio); 788 bio = bl_submit_bio(WRITE, bio);
791 789
792 /* Middle pages */ 790 /* Middle pages */
793 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; 791 pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
794 for (i = pg_index; i < wdata->pages.npages; i++) { 792 for (i = pg_index; i < header->page_array.npages; i++) {
795 if (!extent_length) { 793 if (!extent_length) {
796 /* We've used up the previous extent */ 794 /* We've used up the previous extent */
797 bl_put_extent(be); 795 bl_put_extent(be);
@@ -862,7 +860,8 @@ next_page:
862 } 860 }
863 861
864 862
865 bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, 863 bio = do_add_page_to_bio(bio, header->page_array.npages - i,
864 WRITE,
866 isect, pages[i], be, 865 isect, pages[i], be,
867 bl_end_io_write, par, 866 bl_end_io_write, par,
868 pg_offset, pg_len); 867 pg_offset, pg_len);
@@ -890,7 +889,7 @@ next_page:
890 } 889 }
891 890
892write_done: 891write_done:
893 wdata->res.count = wdata->args.count; 892 header->res.count = header->args.count;
894out: 893out:
895 bl_put_extent(be); 894 bl_put_extent(be);
896 bl_put_extent(cow_read); 895 bl_put_extent(cow_read);
@@ -1063,7 +1062,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
1063 return ERR_PTR(-ENOMEM); 1062 return ERR_PTR(-ENOMEM);
1064 } 1063 }
1065 1064
1066 pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); 1065 pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS);
1067 if (pages == NULL) { 1066 if (pages == NULL) {
1068 kfree(dev); 1067 kfree(dev);
1069 return ERR_PTR(-ENOMEM); 1068 return ERR_PTR(-ENOMEM);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 073b4cf67ed9..54de482143cc 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
428 if (p == NULL) 428 if (p == NULL)
429 return 0; 429 return 0;
430 430
431 /*
432 * Did we get the acceptor from userland during the SETCLIENID
433 * negotiation?
434 */
435 if (clp->cl_acceptor)
436 return !strcmp(p, clp->cl_acceptor);
437
438 /*
439 * Otherwise try to verify it using the cl_hostname. Note that this
440 * doesn't work if a non-canonical hostname was used in the devname.
441 */
442
431 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ 443 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
432 444
433 if (memcmp(p, "nfs@", 4) != 0) 445 if (memcmp(p, "nfs@", 4) != 0)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 180d1ec9c32e..1c5ff6d58385 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -110,8 +110,8 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
110 mutex_unlock(&nfs_version_mutex); 110 mutex_unlock(&nfs_version_mutex);
111 } 111 }
112 112
113 if (!IS_ERR(nfs)) 113 if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
114 try_module_get(nfs->owner); 114 return ERR_PTR(-EAGAIN);
115 return nfs; 115 return nfs;
116} 116}
117 117
@@ -158,7 +158,8 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
158 goto error_0; 158 goto error_0;
159 159
160 clp->cl_nfs_mod = cl_init->nfs_mod; 160 clp->cl_nfs_mod = cl_init->nfs_mod;
161 try_module_get(clp->cl_nfs_mod->owner); 161 if (!try_module_get(clp->cl_nfs_mod->owner))
162 goto error_dealloc;
162 163
163 clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; 164 clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
164 165
@@ -190,6 +191,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
190 191
191error_cleanup: 192error_cleanup:
192 put_nfs_version(clp->cl_nfs_mod); 193 put_nfs_version(clp->cl_nfs_mod);
194error_dealloc:
193 kfree(clp); 195 kfree(clp);
194error_0: 196error_0:
195 return ERR_PTR(err); 197 return ERR_PTR(err);
@@ -252,6 +254,7 @@ void nfs_free_client(struct nfs_client *clp)
252 put_net(clp->cl_net); 254 put_net(clp->cl_net);
253 put_nfs_version(clp->cl_nfs_mod); 255 put_nfs_version(clp->cl_nfs_mod);
254 kfree(clp->cl_hostname); 256 kfree(clp->cl_hostname);
257 kfree(clp->cl_acceptor);
255 kfree(clp); 258 kfree(clp);
256 259
257 dprintk("<-- nfs_free_client()\n"); 260 dprintk("<-- nfs_free_client()\n");
@@ -482,8 +485,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
482 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); 485 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
483 const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; 486 const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops;
484 487
488 if (cl_init->hostname == NULL) {
489 WARN_ON(1);
490 return NULL;
491 }
492
485 dprintk("--> nfs_get_client(%s,v%u)\n", 493 dprintk("--> nfs_get_client(%s,v%u)\n",
486 cl_init->hostname ?: "", rpc_ops->version); 494 cl_init->hostname, rpc_ops->version);
487 495
488 /* see if the client already exists */ 496 /* see if the client already exists */
489 do { 497 do {
@@ -510,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
510 } while (!IS_ERR(new)); 518 } while (!IS_ERR(new));
511 519
512 dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", 520 dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
513 cl_init->hostname ?: "", PTR_ERR(new)); 521 cl_init->hostname, PTR_ERR(new));
514 return new; 522 return new;
515} 523}
516EXPORT_SYMBOL_GPL(nfs_get_client); 524EXPORT_SYMBOL_GPL(nfs_get_client);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5d8ccecf5f5c..5853f53db732 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -41,14 +41,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
41 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); 41 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
42} 42}
43 43
44/** 44static int
45 * nfs_have_delegation - check if inode has a delegation 45nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
46 * @inode: inode to check
47 * @flags: delegation types to check for
48 *
49 * Returns one if inode has the indicated delegation, otherwise zero.
50 */
51int nfs4_have_delegation(struct inode *inode, fmode_t flags)
52{ 46{
53 struct nfs_delegation *delegation; 47 struct nfs_delegation *delegation;
54 int ret = 0; 48 int ret = 0;
@@ -58,12 +52,34 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
58 delegation = rcu_dereference(NFS_I(inode)->delegation); 52 delegation = rcu_dereference(NFS_I(inode)->delegation);
59 if (delegation != NULL && (delegation->type & flags) == flags && 53 if (delegation != NULL && (delegation->type & flags) == flags &&
60 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { 54 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
61 nfs_mark_delegation_referenced(delegation); 55 if (mark)
56 nfs_mark_delegation_referenced(delegation);
62 ret = 1; 57 ret = 1;
63 } 58 }
64 rcu_read_unlock(); 59 rcu_read_unlock();
65 return ret; 60 return ret;
66} 61}
62/**
63 * nfs_have_delegation - check if inode has a delegation, mark it
64 * NFS_DELEGATION_REFERENCED if there is one.
65 * @inode: inode to check
66 * @flags: delegation types to check for
67 *
68 * Returns one if inode has the indicated delegation, otherwise zero.
69 */
70int nfs4_have_delegation(struct inode *inode, fmode_t flags)
71{
72 return nfs4_do_check_delegation(inode, flags, true);
73}
74
75/*
76 * nfs4_check_delegation - check if inode has a delegation, do not mark
77 * NFS_DELEGATION_REFERENCED if it has one.
78 */
79int nfs4_check_delegation(struct inode *inode, fmode_t flags)
80{
81 return nfs4_do_check_delegation(inode, flags, false);
82}
67 83
68static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) 84static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
69{ 85{
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9a79c7a99d6d..5c1cce39297f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -59,6 +59,7 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_
59 59
60void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); 60void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
61int nfs4_have_delegation(struct inode *inode, fmode_t flags); 61int nfs4_have_delegation(struct inode *inode, fmode_t flags);
62int nfs4_check_delegation(struct inode *inode, fmode_t flags);
62 63
63#endif 64#endif
64 65
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4a3d4ef76127..36d921f0c602 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
988 * A check for whether or not the parent directory has changed. 988 * A check for whether or not the parent directory has changed.
989 * In the case it has, we assume that the dentries are untrustworthy 989 * In the case it has, we assume that the dentries are untrustworthy
990 * and may need to be looked up again. 990 * and may need to be looked up again.
991 * If rcu_walk prevents us from performing a full check, return 0.
991 */ 992 */
992static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) 993static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
994 int rcu_walk)
993{ 995{
996 int ret;
997
994 if (IS_ROOT(dentry)) 998 if (IS_ROOT(dentry))
995 return 1; 999 return 1;
996 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) 1000 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
998 if (!nfs_verify_change_attribute(dir, dentry->d_time)) 1002 if (!nfs_verify_change_attribute(dir, dentry->d_time))
999 return 0; 1003 return 0;
1000 /* Revalidate nfsi->cache_change_attribute before we declare a match */ 1004 /* Revalidate nfsi->cache_change_attribute before we declare a match */
1001 if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) 1005 if (rcu_walk)
1006 ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
1007 else
1008 ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
1009 if (ret < 0)
1002 return 0; 1010 return 0;
1003 if (!nfs_verify_change_attribute(dir, dentry->d_time)) 1011 if (!nfs_verify_change_attribute(dir, dentry->d_time))
1004 return 0; 1012 return 0;
@@ -1042,6 +1050,8 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
1042out: 1050out:
1043 return (inode->i_nlink == 0) ? -ENOENT : 0; 1051 return (inode->i_nlink == 0) ? -ENOENT : 0;
1044out_force: 1052out_force:
1053 if (flags & LOOKUP_RCU)
1054 return -ECHILD;
1045 ret = __nfs_revalidate_inode(server, inode); 1055 ret = __nfs_revalidate_inode(server, inode);
1046 if (ret != 0) 1056 if (ret != 0)
1047 return ret; 1057 return ret;
@@ -1054,6 +1064,9 @@ out_force:
1054 * 1064 *
1055 * If parent mtime has changed, we revalidate, else we wait for a 1065 * If parent mtime has changed, we revalidate, else we wait for a
1056 * period corresponding to the parent's attribute cache timeout value. 1066 * period corresponding to the parent's attribute cache timeout value.
1067 *
1068 * If LOOKUP_RCU prevents us from performing a full check, return 1
1069 * suggesting a reval is needed.
1057 */ 1070 */
1058static inline 1071static inline
1059int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, 1072int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
@@ -1064,7 +1077,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1064 return 0; 1077 return 0;
1065 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) 1078 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
1066 return 1; 1079 return 1;
1067 return !nfs_check_verifier(dir, dentry); 1080 return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1068} 1081}
1069 1082
1070/* 1083/*
@@ -1088,21 +1101,30 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1088 struct nfs4_label *label = NULL; 1101 struct nfs4_label *label = NULL;
1089 int error; 1102 int error;
1090 1103
1091 if (flags & LOOKUP_RCU) 1104 if (flags & LOOKUP_RCU) {
1092 return -ECHILD; 1105 parent = ACCESS_ONCE(dentry->d_parent);
1093 1106 dir = ACCESS_ONCE(parent->d_inode);
1094 parent = dget_parent(dentry); 1107 if (!dir)
1095 dir = parent->d_inode; 1108 return -ECHILD;
1109 } else {
1110 parent = dget_parent(dentry);
1111 dir = parent->d_inode;
1112 }
1096 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); 1113 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1097 inode = dentry->d_inode; 1114 inode = dentry->d_inode;
1098 1115
1099 if (!inode) { 1116 if (!inode) {
1100 if (nfs_neg_need_reval(dir, dentry, flags)) 1117 if (nfs_neg_need_reval(dir, dentry, flags)) {
1118 if (flags & LOOKUP_RCU)
1119 return -ECHILD;
1101 goto out_bad; 1120 goto out_bad;
1121 }
1102 goto out_valid_noent; 1122 goto out_valid_noent;
1103 } 1123 }
1104 1124
1105 if (is_bad_inode(inode)) { 1125 if (is_bad_inode(inode)) {
1126 if (flags & LOOKUP_RCU)
1127 return -ECHILD;
1106 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", 1128 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1107 __func__, dentry); 1129 __func__, dentry);
1108 goto out_bad; 1130 goto out_bad;
@@ -1112,12 +1134,20 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1112 goto out_set_verifier; 1134 goto out_set_verifier;
1113 1135
1114 /* Force a full look up iff the parent directory has changed */ 1136 /* Force a full look up iff the parent directory has changed */
1115 if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { 1137 if (!nfs_is_exclusive_create(dir, flags) &&
1116 if (nfs_lookup_verify_inode(inode, flags)) 1138 nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1139
1140 if (nfs_lookup_verify_inode(inode, flags)) {
1141 if (flags & LOOKUP_RCU)
1142 return -ECHILD;
1117 goto out_zap_parent; 1143 goto out_zap_parent;
1144 }
1118 goto out_valid; 1145 goto out_valid;
1119 } 1146 }
1120 1147
1148 if (flags & LOOKUP_RCU)
1149 return -ECHILD;
1150
1121 if (NFS_STALE(inode)) 1151 if (NFS_STALE(inode))
1122 goto out_bad; 1152 goto out_bad;
1123 1153
@@ -1153,13 +1183,18 @@ out_set_verifier:
1153 /* Success: notify readdir to use READDIRPLUS */ 1183 /* Success: notify readdir to use READDIRPLUS */
1154 nfs_advise_use_readdirplus(dir); 1184 nfs_advise_use_readdirplus(dir);
1155 out_valid_noent: 1185 out_valid_noent:
1156 dput(parent); 1186 if (flags & LOOKUP_RCU) {
1187 if (parent != ACCESS_ONCE(dentry->d_parent))
1188 return -ECHILD;
1189 } else
1190 dput(parent);
1157 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", 1191 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1158 __func__, dentry); 1192 __func__, dentry);
1159 return 1; 1193 return 1;
1160out_zap_parent: 1194out_zap_parent:
1161 nfs_zap_caches(dir); 1195 nfs_zap_caches(dir);
1162 out_bad: 1196 out_bad:
1197 WARN_ON(flags & LOOKUP_RCU);
1163 nfs_free_fattr(fattr); 1198 nfs_free_fattr(fattr);
1164 nfs_free_fhandle(fhandle); 1199 nfs_free_fhandle(fhandle);
1165 nfs4_label_free(label); 1200 nfs4_label_free(label);
@@ -1185,6 +1220,7 @@ out_zap_parent:
1185 __func__, dentry); 1220 __func__, dentry);
1186 return 0; 1221 return 0;
1187out_error: 1222out_error:
1223 WARN_ON(flags & LOOKUP_RCU);
1188 nfs_free_fattr(fattr); 1224 nfs_free_fattr(fattr);
1189 nfs_free_fhandle(fhandle); 1225 nfs_free_fhandle(fhandle);
1190 nfs4_label_free(label); 1226 nfs4_label_free(label);
@@ -1529,14 +1565,9 @@ EXPORT_SYMBOL_GPL(nfs_atomic_open);
1529 1565
1530static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) 1566static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1531{ 1567{
1532 struct dentry *parent = NULL;
1533 struct inode *inode; 1568 struct inode *inode;
1534 struct inode *dir;
1535 int ret = 0; 1569 int ret = 0;
1536 1570
1537 if (flags & LOOKUP_RCU)
1538 return -ECHILD;
1539
1540 if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) 1571 if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
1541 goto no_open; 1572 goto no_open;
1542 if (d_mountpoint(dentry)) 1573 if (d_mountpoint(dentry))
@@ -1545,34 +1576,47 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1545 goto no_open; 1576 goto no_open;
1546 1577
1547 inode = dentry->d_inode; 1578 inode = dentry->d_inode;
1548 parent = dget_parent(dentry);
1549 dir = parent->d_inode;
1550 1579
1551 /* We can't create new files in nfs_open_revalidate(), so we 1580 /* We can't create new files in nfs_open_revalidate(), so we
1552 * optimize away revalidation of negative dentries. 1581 * optimize away revalidation of negative dentries.
1553 */ 1582 */
1554 if (inode == NULL) { 1583 if (inode == NULL) {
1584 struct dentry *parent;
1585 struct inode *dir;
1586
1587 if (flags & LOOKUP_RCU) {
1588 parent = ACCESS_ONCE(dentry->d_parent);
1589 dir = ACCESS_ONCE(parent->d_inode);
1590 if (!dir)
1591 return -ECHILD;
1592 } else {
1593 parent = dget_parent(dentry);
1594 dir = parent->d_inode;
1595 }
1555 if (!nfs_neg_need_reval(dir, dentry, flags)) 1596 if (!nfs_neg_need_reval(dir, dentry, flags))
1556 ret = 1; 1597 ret = 1;
1598 else if (flags & LOOKUP_RCU)
1599 ret = -ECHILD;
1600 if (!(flags & LOOKUP_RCU))
1601 dput(parent);
1602 else if (parent != ACCESS_ONCE(dentry->d_parent))
1603 return -ECHILD;
1557 goto out; 1604 goto out;
1558 } 1605 }
1559 1606
1560 /* NFS only supports OPEN on regular files */ 1607 /* NFS only supports OPEN on regular files */
1561 if (!S_ISREG(inode->i_mode)) 1608 if (!S_ISREG(inode->i_mode))
1562 goto no_open_dput; 1609 goto no_open;
1563 /* We cannot do exclusive creation on a positive dentry */ 1610 /* We cannot do exclusive creation on a positive dentry */
1564 if (flags & LOOKUP_EXCL) 1611 if (flags & LOOKUP_EXCL)
1565 goto no_open_dput; 1612 goto no_open;
1566 1613
1567 /* Let f_op->open() actually open (and revalidate) the file */ 1614 /* Let f_op->open() actually open (and revalidate) the file */
1568 ret = 1; 1615 ret = 1;
1569 1616
1570out: 1617out:
1571 dput(parent);
1572 return ret; 1618 return ret;
1573 1619
1574no_open_dput:
1575 dput(parent);
1576no_open: 1620no_open:
1577 return nfs_lookup_revalidate(dentry, flags); 1621 return nfs_lookup_revalidate(dentry, flags);
1578} 1622}
@@ -2028,10 +2072,14 @@ static DEFINE_SPINLOCK(nfs_access_lru_lock);
2028static LIST_HEAD(nfs_access_lru_list); 2072static LIST_HEAD(nfs_access_lru_list);
2029static atomic_long_t nfs_access_nr_entries; 2073static atomic_long_t nfs_access_nr_entries;
2030 2074
2075static unsigned long nfs_access_max_cachesize = ULONG_MAX;
2076module_param(nfs_access_max_cachesize, ulong, 0644);
2077MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
2078
2031static void nfs_access_free_entry(struct nfs_access_entry *entry) 2079static void nfs_access_free_entry(struct nfs_access_entry *entry)
2032{ 2080{
2033 put_rpccred(entry->cred); 2081 put_rpccred(entry->cred);
2034 kfree(entry); 2082 kfree_rcu(entry, rcu_head);
2035 smp_mb__before_atomic(); 2083 smp_mb__before_atomic();
2036 atomic_long_dec(&nfs_access_nr_entries); 2084 atomic_long_dec(&nfs_access_nr_entries);
2037 smp_mb__after_atomic(); 2085 smp_mb__after_atomic();
@@ -2048,19 +2096,14 @@ static void nfs_access_free_list(struct list_head *head)
2048 } 2096 }
2049} 2097}
2050 2098
2051unsigned long 2099static unsigned long
2052nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) 2100nfs_do_access_cache_scan(unsigned int nr_to_scan)
2053{ 2101{
2054 LIST_HEAD(head); 2102 LIST_HEAD(head);
2055 struct nfs_inode *nfsi, *next; 2103 struct nfs_inode *nfsi, *next;
2056 struct nfs_access_entry *cache; 2104 struct nfs_access_entry *cache;
2057 int nr_to_scan = sc->nr_to_scan;
2058 gfp_t gfp_mask = sc->gfp_mask;
2059 long freed = 0; 2105 long freed = 0;
2060 2106
2061 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2062 return SHRINK_STOP;
2063
2064 spin_lock(&nfs_access_lru_lock); 2107 spin_lock(&nfs_access_lru_lock);
2065 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { 2108 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
2066 struct inode *inode; 2109 struct inode *inode;
@@ -2094,11 +2137,39 @@ remove_lru_entry:
2094} 2137}
2095 2138
2096unsigned long 2139unsigned long
2140nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2141{
2142 int nr_to_scan = sc->nr_to_scan;
2143 gfp_t gfp_mask = sc->gfp_mask;
2144
2145 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2146 return SHRINK_STOP;
2147 return nfs_do_access_cache_scan(nr_to_scan);
2148}
2149
2150
2151unsigned long
2097nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) 2152nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
2098{ 2153{
2099 return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); 2154 return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
2100} 2155}
2101 2156
2157static void
2158nfs_access_cache_enforce_limit(void)
2159{
2160 long nr_entries = atomic_long_read(&nfs_access_nr_entries);
2161 unsigned long diff;
2162 unsigned int nr_to_scan;
2163
2164 if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
2165 return;
2166 nr_to_scan = 100;
2167 diff = nr_entries - nfs_access_max_cachesize;
2168 if (diff < nr_to_scan)
2169 nr_to_scan = diff;
2170 nfs_do_access_cache_scan(nr_to_scan);
2171}
2172
2102static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) 2173static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
2103{ 2174{
2104 struct rb_root *root_node = &nfsi->access_cache; 2175 struct rb_root *root_node = &nfsi->access_cache;
@@ -2186,6 +2257,38 @@ out_zap:
2186 return -ENOENT; 2257 return -ENOENT;
2187} 2258}
2188 2259
2260static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
2261{
2262 /* Only check the most recently returned cache entry,
2263 * but do it without locking.
2264 */
2265 struct nfs_inode *nfsi = NFS_I(inode);
2266 struct nfs_access_entry *cache;
2267 int err = -ECHILD;
2268 struct list_head *lh;
2269
2270 rcu_read_lock();
2271 if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2272 goto out;
2273 lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
2274 cache = list_entry(lh, struct nfs_access_entry, lru);
2275 if (lh == &nfsi->access_cache_entry_lru ||
2276 cred != cache->cred)
2277 cache = NULL;
2278 if (cache == NULL)
2279 goto out;
2280 if (!nfs_have_delegated_attributes(inode) &&
2281 !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
2282 goto out;
2283 res->jiffies = cache->jiffies;
2284 res->cred = cache->cred;
2285 res->mask = cache->mask;
2286 err = 0;
2287out:
2288 rcu_read_unlock();
2289 return err;
2290}
2291
2189static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) 2292static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
2190{ 2293{
2191 struct nfs_inode *nfsi = NFS_I(inode); 2294 struct nfs_inode *nfsi = NFS_I(inode);
@@ -2229,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
2229 cache->cred = get_rpccred(set->cred); 2332 cache->cred = get_rpccred(set->cred);
2230 cache->mask = set->mask; 2333 cache->mask = set->mask;
2231 2334
2335 /* The above field assignments must be visible
2336 * before this item appears on the lru. We cannot easily
2337 * use rcu_assign_pointer, so just force the memory barrier.
2338 */
2339 smp_wmb();
2232 nfs_access_add_rbtree(inode, cache); 2340 nfs_access_add_rbtree(inode, cache);
2233 2341
2234 /* Update accounting */ 2342 /* Update accounting */
@@ -2244,6 +2352,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
2244 &nfs_access_lru_list); 2352 &nfs_access_lru_list);
2245 spin_unlock(&nfs_access_lru_lock); 2353 spin_unlock(&nfs_access_lru_lock);
2246 } 2354 }
2355 nfs_access_cache_enforce_limit();
2247} 2356}
2248EXPORT_SYMBOL_GPL(nfs_access_add_cache); 2357EXPORT_SYMBOL_GPL(nfs_access_add_cache);
2249 2358
@@ -2267,10 +2376,16 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
2267 2376
2268 trace_nfs_access_enter(inode); 2377 trace_nfs_access_enter(inode);
2269 2378
2270 status = nfs_access_get_cached(inode, cred, &cache); 2379 status = nfs_access_get_cached_rcu(inode, cred, &cache);
2380 if (status != 0)
2381 status = nfs_access_get_cached(inode, cred, &cache);
2271 if (status == 0) 2382 if (status == 0)
2272 goto out_cached; 2383 goto out_cached;
2273 2384
2385 status = -ECHILD;
2386 if (mask & MAY_NOT_BLOCK)
2387 goto out;
2388
2274 /* Be clever: ask server to check for all possible rights */ 2389 /* Be clever: ask server to check for all possible rights */
2275 cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; 2390 cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
2276 cache.cred = cred; 2391 cache.cred = cred;
@@ -2321,9 +2436,6 @@ int nfs_permission(struct inode *inode, int mask)
2321 struct rpc_cred *cred; 2436 struct rpc_cred *cred;
2322 int res = 0; 2437 int res = 0;
2323 2438
2324 if (mask & MAY_NOT_BLOCK)
2325 return -ECHILD;
2326
2327 nfs_inc_stats(inode, NFSIOS_VFSACCESS); 2439 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
2328 2440
2329 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) 2441 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2350,12 +2462,23 @@ force_lookup:
2350 if (!NFS_PROTO(inode)->access) 2462 if (!NFS_PROTO(inode)->access)
2351 goto out_notsup; 2463 goto out_notsup;
2352 2464
2353 cred = rpc_lookup_cred(); 2465 /* Always try fast lookups first */
2354 if (!IS_ERR(cred)) { 2466 rcu_read_lock();
2355 res = nfs_do_access(inode, cred, mask); 2467 cred = rpc_lookup_cred_nonblock();
2356 put_rpccred(cred); 2468 if (!IS_ERR(cred))
2357 } else 2469 res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
2470 else
2358 res = PTR_ERR(cred); 2471 res = PTR_ERR(cred);
2472 rcu_read_unlock();
2473 if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
2474 /* Fast lookup failed, try the slow way */
2475 cred = rpc_lookup_cred();
2476 if (!IS_ERR(cred)) {
2477 res = nfs_do_access(inode, cred, mask);
2478 put_rpccred(cred);
2479 } else
2480 res = PTR_ERR(cred);
2481 }
2359out: 2482out:
2360 if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) 2483 if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
2361 res = -EACCES; 2484 res = -EACCES;
@@ -2364,6 +2487,9 @@ out:
2364 inode->i_sb->s_id, inode->i_ino, mask, res); 2487 inode->i_sb->s_id, inode->i_ino, mask, res);
2365 return res; 2488 return res;
2366out_notsup: 2489out_notsup:
2490 if (mask & MAY_NOT_BLOCK)
2491 return -ECHILD;
2492
2367 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 2493 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2368 if (res == 0) 2494 if (res == 0)
2369 res = generic_permission(inode, mask); 2495 res = generic_permission(inode, mask);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f11b9eed0de1..65ef6e00deee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
148{ 148{
149 struct nfs_writeverf *verfp; 149 struct nfs_writeverf *verfp;
150 150
151 verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, 151 verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
152 hdr->data->ds_idx); 152 hdr->ds_idx);
153 WARN_ON_ONCE(verfp->committed >= 0); 153 WARN_ON_ONCE(verfp->committed >= 0);
154 memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); 154 memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
155 WARN_ON_ONCE(verfp->committed < 0); 155 WARN_ON_ONCE(verfp->committed < 0);
@@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
169{ 169{
170 struct nfs_writeverf *verfp; 170 struct nfs_writeverf *verfp;
171 171
172 verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, 172 verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
173 hdr->data->ds_idx); 173 hdr->ds_idx);
174 if (verfp->committed < 0) { 174 if (verfp->committed < 0) {
175 nfs_direct_set_hdr_verf(dreq, hdr); 175 nfs_direct_set_hdr_verf(dreq, hdr);
176 return 0; 176 return 0;
@@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
715{ 715{
716 struct nfs_direct_req *dreq = hdr->dreq; 716 struct nfs_direct_req *dreq = hdr->dreq;
717 struct nfs_commit_info cinfo; 717 struct nfs_commit_info cinfo;
718 int bit = -1; 718 bool request_commit = false;
719 struct nfs_page *req = nfs_list_entry(hdr->pages.next); 719 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
720 720
721 if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) 721 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
729 dreq->flags = 0; 729 dreq->flags = 0;
730 dreq->error = hdr->error; 730 dreq->error = hdr->error;
731 } 731 }
732 if (dreq->error != 0) 732 if (dreq->error == 0) {
733 bit = NFS_IOHDR_ERROR;
734 else {
735 dreq->count += hdr->good_bytes; 733 dreq->count += hdr->good_bytes;
736 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { 734 if (nfs_write_need_commit(hdr)) {
737 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
738 bit = NFS_IOHDR_NEED_RESCHED;
739 } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
740 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) 735 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
741 bit = NFS_IOHDR_NEED_RESCHED; 736 request_commit = true;
742 else if (dreq->flags == 0) { 737 else if (dreq->flags == 0) {
743 nfs_direct_set_hdr_verf(dreq, hdr); 738 nfs_direct_set_hdr_verf(dreq, hdr);
744 bit = NFS_IOHDR_NEED_COMMIT; 739 request_commit = true;
745 dreq->flags = NFS_ODIRECT_DO_COMMIT; 740 dreq->flags = NFS_ODIRECT_DO_COMMIT;
746 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { 741 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
747 if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { 742 request_commit = true;
743 if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr))
748 dreq->flags = 744 dreq->flags =
749 NFS_ODIRECT_RESCHED_WRITES; 745 NFS_ODIRECT_RESCHED_WRITES;
750 bit = NFS_IOHDR_NEED_RESCHED;
751 } else
752 bit = NFS_IOHDR_NEED_COMMIT;
753 } 746 }
754 } 747 }
755 } 748 }
@@ -759,9 +752,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
759 752
760 req = nfs_list_entry(hdr->pages.next); 753 req = nfs_list_entry(hdr->pages.next);
761 nfs_list_remove_request(req); 754 nfs_list_remove_request(req);
762 switch (bit) { 755 if (request_commit) {
763 case NFS_IOHDR_NEED_RESCHED:
764 case NFS_IOHDR_NEED_COMMIT:
765 kref_get(&req->wb_kref); 756 kref_get(&req->wb_kref);
766 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 757 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
767 } 758 }
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d2eba1c13b7e..1359c4a27393 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -84,45 +84,37 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
84 BUG(); 84 BUG();
85} 85}
86 86
87static void filelayout_reset_write(struct nfs_pgio_data *data) 87static void filelayout_reset_write(struct nfs_pgio_header *hdr)
88{ 88{
89 struct nfs_pgio_header *hdr = data->header; 89 struct rpc_task *task = &hdr->task;
90 struct rpc_task *task = &data->task;
91 90
92 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 91 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
93 dprintk("%s Reset task %5u for i/o through MDS " 92 dprintk("%s Reset task %5u for i/o through MDS "
94 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 93 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
95 data->task.tk_pid, 94 hdr->task.tk_pid,
96 hdr->inode->i_sb->s_id, 95 hdr->inode->i_sb->s_id,
97 (unsigned long long)NFS_FILEID(hdr->inode), 96 (unsigned long long)NFS_FILEID(hdr->inode),
98 data->args.count, 97 hdr->args.count,
99 (unsigned long long)data->args.offset); 98 (unsigned long long)hdr->args.offset);
100 99
101 task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, 100 task->tk_status = pnfs_write_done_resend_to_mds(hdr);
102 &hdr->pages,
103 hdr->completion_ops,
104 hdr->dreq);
105 } 101 }
106} 102}
107 103
108static void filelayout_reset_read(struct nfs_pgio_data *data) 104static void filelayout_reset_read(struct nfs_pgio_header *hdr)
109{ 105{
110 struct nfs_pgio_header *hdr = data->header; 106 struct rpc_task *task = &hdr->task;
111 struct rpc_task *task = &data->task;
112 107
113 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 108 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
114 dprintk("%s Reset task %5u for i/o through MDS " 109 dprintk("%s Reset task %5u for i/o through MDS "
115 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 110 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
116 data->task.tk_pid, 111 hdr->task.tk_pid,
117 hdr->inode->i_sb->s_id, 112 hdr->inode->i_sb->s_id,
118 (unsigned long long)NFS_FILEID(hdr->inode), 113 (unsigned long long)NFS_FILEID(hdr->inode),
119 data->args.count, 114 hdr->args.count,
120 (unsigned long long)data->args.offset); 115 (unsigned long long)hdr->args.offset);
121 116
122 task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, 117 task->tk_status = pnfs_read_done_resend_to_mds(hdr);
123 &hdr->pages,
124 hdr->completion_ops,
125 hdr->dreq);
126 } 118 }
127} 119}
128 120
@@ -243,18 +235,17 @@ wait_on_recovery:
243/* NFS_PROTO call done callback routines */ 235/* NFS_PROTO call done callback routines */
244 236
245static int filelayout_read_done_cb(struct rpc_task *task, 237static int filelayout_read_done_cb(struct rpc_task *task,
246 struct nfs_pgio_data *data) 238 struct nfs_pgio_header *hdr)
247{ 239{
248 struct nfs_pgio_header *hdr = data->header;
249 int err; 240 int err;
250 241
251 trace_nfs4_pnfs_read(data, task->tk_status); 242 trace_nfs4_pnfs_read(hdr, task->tk_status);
252 err = filelayout_async_handle_error(task, data->args.context->state, 243 err = filelayout_async_handle_error(task, hdr->args.context->state,
253 data->ds_clp, hdr->lseg); 244 hdr->ds_clp, hdr->lseg);
254 245
255 switch (err) { 246 switch (err) {
256 case -NFS4ERR_RESET_TO_MDS: 247 case -NFS4ERR_RESET_TO_MDS:
257 filelayout_reset_read(data); 248 filelayout_reset_read(hdr);
258 return task->tk_status; 249 return task->tk_status;
259 case -EAGAIN: 250 case -EAGAIN:
260 rpc_restart_call_prepare(task); 251 rpc_restart_call_prepare(task);
@@ -270,15 +261,14 @@ static int filelayout_read_done_cb(struct rpc_task *task,
270 * rfc5661 is not clear about which credential should be used. 261 * rfc5661 is not clear about which credential should be used.
271 */ 262 */
272static void 263static void
273filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) 264filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
274{ 265{
275 struct nfs_pgio_header *hdr = wdata->header;
276 266
277 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || 267 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
278 wdata->res.verf->committed == NFS_FILE_SYNC) 268 hdr->res.verf->committed == NFS_FILE_SYNC)
279 return; 269 return;
280 270
281 pnfs_set_layoutcommit(wdata); 271 pnfs_set_layoutcommit(hdr);
282 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, 272 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
283 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 273 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
284} 274}
@@ -305,83 +295,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
305 */ 295 */
306static void filelayout_read_prepare(struct rpc_task *task, void *data) 296static void filelayout_read_prepare(struct rpc_task *task, void *data)
307{ 297{
308 struct nfs_pgio_data *rdata = data; 298 struct nfs_pgio_header *hdr = data;
309 299
310 if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { 300 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
311 rpc_exit(task, -EIO); 301 rpc_exit(task, -EIO);
312 return; 302 return;
313 } 303 }
314 if (filelayout_reset_to_mds(rdata->header->lseg)) { 304 if (filelayout_reset_to_mds(hdr->lseg)) {
315 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 305 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
316 filelayout_reset_read(rdata); 306 filelayout_reset_read(hdr);
317 rpc_exit(task, 0); 307 rpc_exit(task, 0);
318 return; 308 return;
319 } 309 }
320 rdata->pgio_done_cb = filelayout_read_done_cb; 310 hdr->pgio_done_cb = filelayout_read_done_cb;
321 311
322 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 312 if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
323 &rdata->args.seq_args, 313 &hdr->args.seq_args,
324 &rdata->res.seq_res, 314 &hdr->res.seq_res,
325 task)) 315 task))
326 return; 316 return;
327 if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, 317 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
328 rdata->args.lock_context, FMODE_READ) == -EIO) 318 hdr->args.lock_context, FMODE_READ) == -EIO)
329 rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 319 rpc_exit(task, -EIO); /* lost lock, terminate I/O */
330} 320}
331 321
332static void filelayout_read_call_done(struct rpc_task *task, void *data) 322static void filelayout_read_call_done(struct rpc_task *task, void *data)
333{ 323{
334 struct nfs_pgio_data *rdata = data; 324 struct nfs_pgio_header *hdr = data;
335 325
336 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 326 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
337 327
338 if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && 328 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
339 task->tk_status == 0) { 329 task->tk_status == 0) {
340 nfs41_sequence_done(task, &rdata->res.seq_res); 330 nfs41_sequence_done(task, &hdr->res.seq_res);
341 return; 331 return;
342 } 332 }
343 333
344 /* Note this may cause RPC to be resent */ 334 /* Note this may cause RPC to be resent */
345 rdata->header->mds_ops->rpc_call_done(task, data); 335 hdr->mds_ops->rpc_call_done(task, data);
346} 336}
347 337
348static void filelayout_read_count_stats(struct rpc_task *task, void *data) 338static void filelayout_read_count_stats(struct rpc_task *task, void *data)
349{ 339{
350 struct nfs_pgio_data *rdata = data; 340 struct nfs_pgio_header *hdr = data;
351 341
352 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); 342 rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
353} 343}
354 344
355static void filelayout_read_release(void *data) 345static void filelayout_read_release(void *data)
356{ 346{
357 struct nfs_pgio_data *rdata = data; 347 struct nfs_pgio_header *hdr = data;
358 struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; 348 struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
359 349
360 filelayout_fenceme(lo->plh_inode, lo); 350 filelayout_fenceme(lo->plh_inode, lo);
361 nfs_put_client(rdata->ds_clp); 351 nfs_put_client(hdr->ds_clp);
362 rdata->header->mds_ops->rpc_release(data); 352 hdr->mds_ops->rpc_release(data);
363} 353}
364 354
365static int filelayout_write_done_cb(struct rpc_task *task, 355static int filelayout_write_done_cb(struct rpc_task *task,
366 struct nfs_pgio_data *data) 356 struct nfs_pgio_header *hdr)
367{ 357{
368 struct nfs_pgio_header *hdr = data->header;
369 int err; 358 int err;
370 359
371 trace_nfs4_pnfs_write(data, task->tk_status); 360 trace_nfs4_pnfs_write(hdr, task->tk_status);
372 err = filelayout_async_handle_error(task, data->args.context->state, 361 err = filelayout_async_handle_error(task, hdr->args.context->state,
373 data->ds_clp, hdr->lseg); 362 hdr->ds_clp, hdr->lseg);
374 363
375 switch (err) { 364 switch (err) {
376 case -NFS4ERR_RESET_TO_MDS: 365 case -NFS4ERR_RESET_TO_MDS:
377 filelayout_reset_write(data); 366 filelayout_reset_write(hdr);
378 return task->tk_status; 367 return task->tk_status;
379 case -EAGAIN: 368 case -EAGAIN:
380 rpc_restart_call_prepare(task); 369 rpc_restart_call_prepare(task);
381 return -EAGAIN; 370 return -EAGAIN;
382 } 371 }
383 372
384 filelayout_set_layoutcommit(data); 373 filelayout_set_layoutcommit(hdr);
385 return 0; 374 return 0;
386} 375}
387 376
@@ -419,57 +408,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
419 408
420static void filelayout_write_prepare(struct rpc_task *task, void *data) 409static void filelayout_write_prepare(struct rpc_task *task, void *data)
421{ 410{
422 struct nfs_pgio_data *wdata = data; 411 struct nfs_pgio_header *hdr = data;
423 412
424 if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { 413 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
425 rpc_exit(task, -EIO); 414 rpc_exit(task, -EIO);
426 return; 415 return;
427 } 416 }
428 if (filelayout_reset_to_mds(wdata->header->lseg)) { 417 if (filelayout_reset_to_mds(hdr->lseg)) {
429 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 418 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
430 filelayout_reset_write(wdata); 419 filelayout_reset_write(hdr);
431 rpc_exit(task, 0); 420 rpc_exit(task, 0);
432 return; 421 return;
433 } 422 }
434 if (nfs41_setup_sequence(wdata->ds_clp->cl_session, 423 if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
435 &wdata->args.seq_args, 424 &hdr->args.seq_args,
436 &wdata->res.seq_res, 425 &hdr->res.seq_res,
437 task)) 426 task))
438 return; 427 return;
439 if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, 428 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
440 wdata->args.lock_context, FMODE_WRITE) == -EIO) 429 hdr->args.lock_context, FMODE_WRITE) == -EIO)
441 rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 430 rpc_exit(task, -EIO); /* lost lock, terminate I/O */
442} 431}
443 432
444static void filelayout_write_call_done(struct rpc_task *task, void *data) 433static void filelayout_write_call_done(struct rpc_task *task, void *data)
445{ 434{
446 struct nfs_pgio_data *wdata = data; 435 struct nfs_pgio_header *hdr = data;
447 436
448 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && 437 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
449 task->tk_status == 0) { 438 task->tk_status == 0) {
450 nfs41_sequence_done(task, &wdata->res.seq_res); 439 nfs41_sequence_done(task, &hdr->res.seq_res);
451 return; 440 return;
452 } 441 }
453 442
454 /* Note this may cause RPC to be resent */ 443 /* Note this may cause RPC to be resent */
455 wdata->header->mds_ops->rpc_call_done(task, data); 444 hdr->mds_ops->rpc_call_done(task, data);
456} 445}
457 446
458static void filelayout_write_count_stats(struct rpc_task *task, void *data) 447static void filelayout_write_count_stats(struct rpc_task *task, void *data)
459{ 448{
460 struct nfs_pgio_data *wdata = data; 449 struct nfs_pgio_header *hdr = data;
461 450
462 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); 451 rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
463} 452}
464 453
465static void filelayout_write_release(void *data) 454static void filelayout_write_release(void *data)
466{ 455{
467 struct nfs_pgio_data *wdata = data; 456 struct nfs_pgio_header *hdr = data;
468 struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; 457 struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
469 458
470 filelayout_fenceme(lo->plh_inode, lo); 459 filelayout_fenceme(lo->plh_inode, lo);
471 nfs_put_client(wdata->ds_clp); 460 nfs_put_client(hdr->ds_clp);
472 wdata->header->mds_ops->rpc_release(data); 461 hdr->mds_ops->rpc_release(data);
473} 462}
474 463
475static void filelayout_commit_prepare(struct rpc_task *task, void *data) 464static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@@ -529,19 +518,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
529}; 518};
530 519
531static enum pnfs_try_status 520static enum pnfs_try_status
532filelayout_read_pagelist(struct nfs_pgio_data *data) 521filelayout_read_pagelist(struct nfs_pgio_header *hdr)
533{ 522{
534 struct nfs_pgio_header *hdr = data->header;
535 struct pnfs_layout_segment *lseg = hdr->lseg; 523 struct pnfs_layout_segment *lseg = hdr->lseg;
536 struct nfs4_pnfs_ds *ds; 524 struct nfs4_pnfs_ds *ds;
537 struct rpc_clnt *ds_clnt; 525 struct rpc_clnt *ds_clnt;
538 loff_t offset = data->args.offset; 526 loff_t offset = hdr->args.offset;
539 u32 j, idx; 527 u32 j, idx;
540 struct nfs_fh *fh; 528 struct nfs_fh *fh;
541 529
542 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 530 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
543 __func__, hdr->inode->i_ino, 531 __func__, hdr->inode->i_ino,
544 data->args.pgbase, (size_t)data->args.count, offset); 532 hdr->args.pgbase, (size_t)hdr->args.count, offset);
545 533
546 /* Retrieve the correct rpc_client for the byte range */ 534 /* Retrieve the correct rpc_client for the byte range */
547 j = nfs4_fl_calc_j_index(lseg, offset); 535 j = nfs4_fl_calc_j_index(lseg, offset);
@@ -559,30 +547,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
559 547
560 /* No multipath support. Use first DS */ 548 /* No multipath support. Use first DS */
561 atomic_inc(&ds->ds_clp->cl_count); 549 atomic_inc(&ds->ds_clp->cl_count);
562 data->ds_clp = ds->ds_clp; 550 hdr->ds_clp = ds->ds_clp;
563 data->ds_idx = idx; 551 hdr->ds_idx = idx;
564 fh = nfs4_fl_select_ds_fh(lseg, j); 552 fh = nfs4_fl_select_ds_fh(lseg, j);
565 if (fh) 553 if (fh)
566 data->args.fh = fh; 554 hdr->args.fh = fh;
567 555
568 data->args.offset = filelayout_get_dserver_offset(lseg, offset); 556 hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
569 data->mds_offset = offset; 557 hdr->mds_offset = offset;
570 558
571 /* Perform an asynchronous read to ds */ 559 /* Perform an asynchronous read to ds */
572 nfs_initiate_pgio(ds_clnt, data, 560 nfs_initiate_pgio(ds_clnt, hdr,
573 &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); 561 &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
574 return PNFS_ATTEMPTED; 562 return PNFS_ATTEMPTED;
575} 563}
576 564
577/* Perform async writes. */ 565/* Perform async writes. */
578static enum pnfs_try_status 566static enum pnfs_try_status
579filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) 567filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
580{ 568{
581 struct nfs_pgio_header *hdr = data->header;
582 struct pnfs_layout_segment *lseg = hdr->lseg; 569 struct pnfs_layout_segment *lseg = hdr->lseg;
583 struct nfs4_pnfs_ds *ds; 570 struct nfs4_pnfs_ds *ds;
584 struct rpc_clnt *ds_clnt; 571 struct rpc_clnt *ds_clnt;
585 loff_t offset = data->args.offset; 572 loff_t offset = hdr->args.offset;
586 u32 j, idx; 573 u32 j, idx;
587 struct nfs_fh *fh; 574 struct nfs_fh *fh;
588 575
@@ -598,21 +585,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
598 return PNFS_NOT_ATTEMPTED; 585 return PNFS_NOT_ATTEMPTED;
599 586
600 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", 587 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
601 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, 588 __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
602 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); 589 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
603 590
604 data->pgio_done_cb = filelayout_write_done_cb; 591 hdr->pgio_done_cb = filelayout_write_done_cb;
605 atomic_inc(&ds->ds_clp->cl_count); 592 atomic_inc(&ds->ds_clp->cl_count);
606 data->ds_clp = ds->ds_clp; 593 hdr->ds_clp = ds->ds_clp;
607 data->ds_idx = idx; 594 hdr->ds_idx = idx;
608 fh = nfs4_fl_select_ds_fh(lseg, j); 595 fh = nfs4_fl_select_ds_fh(lseg, j);
609 if (fh) 596 if (fh)
610 data->args.fh = fh; 597 hdr->args.fh = fh;
611 598 hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
612 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
613 599
614 /* Perform an asynchronous write */ 600 /* Perform an asynchronous write */
615 nfs_initiate_pgio(ds_clnt, data, 601 nfs_initiate_pgio(ds_clnt, hdr,
616 &filelayout_write_call_ops, sync, 602 &filelayout_write_call_ops, sync,
617 RPC_TASK_SOFTCONN); 603 RPC_TASK_SOFTCONN);
618 return PNFS_ATTEMPTED; 604 return PNFS_ATTEMPTED;
@@ -1023,6 +1009,7 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
1023 1009
1024/* The generic layer is about to remove the req from the commit list. 1010/* The generic layer is about to remove the req from the commit list.
1025 * If this will make the bucket empty, it will need to put the lseg reference. 1011 * If this will make the bucket empty, it will need to put the lseg reference.
1012 * Note this is must be called holding the inode (/cinfo) lock
1026 */ 1013 */
1027static void 1014static void
1028filelayout_clear_request_commit(struct nfs_page *req, 1015filelayout_clear_request_commit(struct nfs_page *req,
@@ -1030,7 +1017,6 @@ filelayout_clear_request_commit(struct nfs_page *req,
1030{ 1017{
1031 struct pnfs_layout_segment *freeme = NULL; 1018 struct pnfs_layout_segment *freeme = NULL;
1032 1019
1033 spin_lock(cinfo->lock);
1034 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 1020 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
1035 goto out; 1021 goto out;
1036 cinfo->ds->nwritten--; 1022 cinfo->ds->nwritten--;
@@ -1045,22 +1031,25 @@ filelayout_clear_request_commit(struct nfs_page *req,
1045 } 1031 }
1046out: 1032out:
1047 nfs_request_remove_commit_list(req, cinfo); 1033 nfs_request_remove_commit_list(req, cinfo);
1048 spin_unlock(cinfo->lock); 1034 pnfs_put_lseg_async(freeme);
1049 pnfs_put_lseg(freeme);
1050} 1035}
1051 1036
1052static struct list_head * 1037static void
1053filelayout_choose_commit_list(struct nfs_page *req, 1038filelayout_mark_request_commit(struct nfs_page *req,
1054 struct pnfs_layout_segment *lseg, 1039 struct pnfs_layout_segment *lseg,
1055 struct nfs_commit_info *cinfo) 1040 struct nfs_commit_info *cinfo)
1041
1056{ 1042{
1057 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 1043 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
1058 u32 i, j; 1044 u32 i, j;
1059 struct list_head *list; 1045 struct list_head *list;
1060 struct pnfs_commit_bucket *buckets; 1046 struct pnfs_commit_bucket *buckets;
1061 1047
1062 if (fl->commit_through_mds) 1048 if (fl->commit_through_mds) {
1063 return &cinfo->mds->list; 1049 list = &cinfo->mds->list;
1050 spin_lock(cinfo->lock);
1051 goto mds_commit;
1052 }
1064 1053
1065 /* Note that we are calling nfs4_fl_calc_j_index on each page 1054 /* Note that we are calling nfs4_fl_calc_j_index on each page
1066 * that ends up being committed to a data server. An attractive 1055 * that ends up being committed to a data server. An attractive
@@ -1084,19 +1073,22 @@ filelayout_choose_commit_list(struct nfs_page *req,
1084 } 1073 }
1085 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1074 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1086 cinfo->ds->nwritten++; 1075 cinfo->ds->nwritten++;
1087 spin_unlock(cinfo->lock);
1088 return list;
1089}
1090 1076
1091static void 1077mds_commit:
1092filelayout_mark_request_commit(struct nfs_page *req, 1078 /* nfs_request_add_commit_list(). We need to add req to list without
1093 struct pnfs_layout_segment *lseg, 1079 * dropping cinfo lock.
1094 struct nfs_commit_info *cinfo) 1080 */
1095{ 1081 set_bit(PG_CLEAN, &(req)->wb_flags);
1096 struct list_head *list; 1082 nfs_list_add_request(req, list);
1097 1083 cinfo->mds->ncommit++;
1098 list = filelayout_choose_commit_list(req, lseg, cinfo); 1084 spin_unlock(cinfo->lock);
1099 nfs_request_add_commit_list(req, list, cinfo); 1085 if (!cinfo->dreq) {
1086 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1087 inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
1088 BDI_RECLAIMABLE);
1089 __mark_inode_dirty(req->wb_context->dentry->d_inode,
1090 I_DIRTY_DATASYNC);
1091 }
1100} 1092}
1101 1093
1102static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1094static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -1244,15 +1236,63 @@ restart:
1244 spin_unlock(cinfo->lock); 1236 spin_unlock(cinfo->lock);
1245} 1237}
1246 1238
1239/* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest
1240 * for @page
1241 * @cinfo - commit info for current inode
1242 * @page - page to search for matching head request
1243 *
1244 * Returns a the head request if one is found, otherwise returns NULL.
1245 */
1246static struct nfs_page *
1247filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
1248{
1249 struct nfs_page *freq, *t;
1250 struct pnfs_commit_bucket *b;
1251 int i;
1252
1253 /* Linearly search the commit lists for each bucket until a matching
1254 * request is found */
1255 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1256 list_for_each_entry_safe(freq, t, &b->written, wb_list) {
1257 if (freq->wb_page == page)
1258 return freq->wb_head;
1259 }
1260 list_for_each_entry_safe(freq, t, &b->committing, wb_list) {
1261 if (freq->wb_page == page)
1262 return freq->wb_head;
1263 }
1264 }
1265
1266 return NULL;
1267}
1268
1269static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx)
1270{
1271 struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
1272 struct pnfs_commit_bucket *bucket = fl_cinfo->buckets;
1273 struct pnfs_layout_segment *freeme;
1274 int i;
1275
1276 for (i = idx; i < fl_cinfo->nbuckets; i++, bucket++) {
1277 if (list_empty(&bucket->committing))
1278 continue;
1279 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1280 spin_lock(cinfo->lock);
1281 freeme = bucket->clseg;
1282 bucket->clseg = NULL;
1283 spin_unlock(cinfo->lock);
1284 pnfs_put_lseg(freeme);
1285 }
1286}
1287
1247static unsigned int 1288static unsigned int
1248alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) 1289alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1249{ 1290{
1250 struct pnfs_ds_commit_info *fl_cinfo; 1291 struct pnfs_ds_commit_info *fl_cinfo;
1251 struct pnfs_commit_bucket *bucket; 1292 struct pnfs_commit_bucket *bucket;
1252 struct nfs_commit_data *data; 1293 struct nfs_commit_data *data;
1253 int i, j; 1294 int i;
1254 unsigned int nreq = 0; 1295 unsigned int nreq = 0;
1255 struct pnfs_layout_segment *freeme;
1256 1296
1257 fl_cinfo = cinfo->ds; 1297 fl_cinfo = cinfo->ds;
1258 bucket = fl_cinfo->buckets; 1298 bucket = fl_cinfo->buckets;
@@ -1272,16 +1312,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1272 } 1312 }
1273 1313
1274 /* Clean up on error */ 1314 /* Clean up on error */
1275 for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { 1315 filelayout_retry_commit(cinfo, i);
1276 if (list_empty(&bucket->committing))
1277 continue;
1278 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1279 spin_lock(cinfo->lock);
1280 freeme = bucket->clseg;
1281 bucket->clseg = NULL;
1282 spin_unlock(cinfo->lock);
1283 pnfs_put_lseg(freeme);
1284 }
1285 /* Caller will clean up entries put on list */ 1316 /* Caller will clean up entries put on list */
1286 return nreq; 1317 return nreq;
1287} 1318}
@@ -1301,8 +1332,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1301 data->lseg = NULL; 1332 data->lseg = NULL;
1302 list_add(&data->pages, &list); 1333 list_add(&data->pages, &list);
1303 nreq++; 1334 nreq++;
1304 } else 1335 } else {
1305 nfs_retry_commit(mds_pages, NULL, cinfo); 1336 nfs_retry_commit(mds_pages, NULL, cinfo);
1337 filelayout_retry_commit(cinfo, 0);
1338 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1339 return -ENOMEM;
1340 }
1306 } 1341 }
1307 1342
1308 nreq += alloc_ds_commits(cinfo, &list); 1343 nreq += alloc_ds_commits(cinfo, &list);
@@ -1380,6 +1415,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
1380 .clear_request_commit = filelayout_clear_request_commit, 1415 .clear_request_commit = filelayout_clear_request_commit,
1381 .scan_commit_lists = filelayout_scan_commit_lists, 1416 .scan_commit_lists = filelayout_scan_commit_lists,
1382 .recover_commit_reqs = filelayout_recover_commit_reqs, 1417 .recover_commit_reqs = filelayout_recover_commit_reqs,
1418 .search_commit_reqs = filelayout_search_commit_reqs,
1383 .commit_pagelist = filelayout_commit_pagelist, 1419 .commit_pagelist = filelayout_commit_pagelist,
1384 .read_pagelist = filelayout_read_pagelist, 1420 .read_pagelist = filelayout_read_pagelist,
1385 .write_pagelist = filelayout_write_pagelist, 1421 .write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index e2a0361e24c6..8540516f4d71 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -695,7 +695,7 @@ filelayout_get_device_info(struct inode *inode,
695 if (pdev == NULL) 695 if (pdev == NULL)
696 return NULL; 696 return NULL;
697 697
698 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); 698 pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
699 if (pages == NULL) { 699 if (pages == NULL) {
700 kfree(pdev); 700 kfree(pdev);
701 return NULL; 701 return NULL;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 68921b01b792..577a36f0a510 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1002} 1002}
1003EXPORT_SYMBOL_GPL(nfs_revalidate_inode); 1003EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
1004 1004
1005int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode)
1006{
1007 if (!(NFS_I(inode)->cache_validity &
1008 (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
1009 && !nfs_attribute_cache_expired(inode))
1010 return NFS_STALE(inode) ? -ESTALE : 0;
1011 return -ECHILD;
1012}
1013
1005static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) 1014static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
1006{ 1015{
1007 struct nfs_inode *nfsi = NFS_I(inode); 1016 struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e2a45ae5014e..9056622d2230 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -247,11 +247,11 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
247int nfs_iocounter_wait(struct nfs_io_counter *c); 247int nfs_iocounter_wait(struct nfs_io_counter *c);
248 248
249extern const struct nfs_pageio_ops nfs_pgio_rw_ops; 249extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
250struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); 250struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
251void nfs_rw_header_free(struct nfs_pgio_header *); 251void nfs_pgio_header_free(struct nfs_pgio_header *);
252void nfs_pgio_data_release(struct nfs_pgio_data *); 252void nfs_pgio_data_destroy(struct nfs_pgio_header *);
253int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); 253int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
254int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, 254int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *,
255 const struct rpc_call_ops *, int, int); 255 const struct rpc_call_ops *, int, int);
256void nfs_free_request(struct nfs_page *req); 256void nfs_free_request(struct nfs_page *req);
257 257
@@ -451,6 +451,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst,
451void nfs_mark_request_commit(struct nfs_page *req, 451void nfs_mark_request_commit(struct nfs_page *req,
452 struct pnfs_layout_segment *lseg, 452 struct pnfs_layout_segment *lseg,
453 struct nfs_commit_info *cinfo); 453 struct nfs_commit_info *cinfo);
454int nfs_write_need_commit(struct nfs_pgio_header *);
454int nfs_generic_commit_list(struct inode *inode, struct list_head *head, 455int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
455 int how, struct nfs_commit_info *cinfo); 456 int how, struct nfs_commit_info *cinfo);
456void nfs_retry_commit(struct list_head *page_list, 457void nfs_retry_commit(struct list_head *page_list,
@@ -491,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
491extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); 492extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
492 493
493/* nfs4proc.c */ 494/* nfs4proc.c */
494extern void __nfs4_read_done_cb(struct nfs_pgio_data *); 495extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
495extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, 496extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
496 const struct rpc_timeout *timeparms, 497 const struct rpc_timeout *timeparms,
497 const char *ip_addr); 498 const char *ip_addr);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 8f854dde4150..d0fec260132a 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -256,7 +256,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
256 char *p = data + *result; 256 char *p = data + *result;
257 257
258 acl = get_acl(inode, type); 258 acl = get_acl(inode, type);
259 if (!acl) 259 if (IS_ERR_OR_NULL(acl))
260 return 0; 260 return 0;
261 261
262 posix_acl_release(acl); 262 posix_acl_release(acl);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index f0afa291fd58..809670eba52a 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
795 return status; 795 return status;
796} 796}
797 797
798static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) 798static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
799{ 799{
800 struct inode *inode = data->header->inode; 800 struct inode *inode = hdr->inode;
801 801
802 if (nfs3_async_handle_jukebox(task, inode)) 802 if (nfs3_async_handle_jukebox(task, inode))
803 return -EAGAIN; 803 return -EAGAIN;
804 804
805 nfs_invalidate_atime(inode); 805 nfs_invalidate_atime(inode);
806 nfs_refresh_inode(inode, &data->fattr); 806 nfs_refresh_inode(inode, &hdr->fattr);
807 return 0; 807 return 0;
808} 808}
809 809
810static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 810static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
811 struct rpc_message *msg)
811{ 812{
812 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; 813 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
813} 814}
814 815
815static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) 816static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
817 struct nfs_pgio_header *hdr)
816{ 818{
817 rpc_call_start(task); 819 rpc_call_start(task);
818 return 0; 820 return 0;
819} 821}
820 822
821static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) 823static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
822{ 824{
823 struct inode *inode = data->header->inode; 825 struct inode *inode = hdr->inode;
824 826
825 if (nfs3_async_handle_jukebox(task, inode)) 827 if (nfs3_async_handle_jukebox(task, inode))
826 return -EAGAIN; 828 return -EAGAIN;
827 if (task->tk_status >= 0) 829 if (task->tk_status >= 0)
828 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); 830 nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
829 return 0; 831 return 0;
830} 832}
831 833
832static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 834static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr,
835 struct rpc_message *msg)
833{ 836{
834 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; 837 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
835} 838}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ba2affa51941..92193eddb41d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -54,7 +54,7 @@ struct nfs4_minor_version_ops {
54 const nfs4_stateid *); 54 const nfs4_stateid *);
55 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, 55 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
56 struct nfs_fsinfo *); 56 struct nfs_fsinfo *);
57 int (*free_lock_state)(struct nfs_server *, 57 void (*free_lock_state)(struct nfs_server *,
58 struct nfs4_lock_state *); 58 struct nfs4_lock_state *);
59 const struct rpc_call_ops *call_sync_ops; 59 const struct rpc_call_ops *call_sync_ops;
60 const struct nfs4_state_recovery_ops *reboot_recovery_ops; 60 const struct nfs4_state_recovery_ops *reboot_recovery_ops;
@@ -129,27 +129,17 @@ enum {
129 * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) 129 * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
130 */ 130 */
131 131
132struct nfs4_lock_owner {
133 unsigned int lo_type;
134#define NFS4_ANY_LOCK_TYPE (0U)
135#define NFS4_FLOCK_LOCK_TYPE (1U << 0)
136#define NFS4_POSIX_LOCK_TYPE (1U << 1)
137 union {
138 fl_owner_t posix_owner;
139 pid_t flock_owner;
140 } lo_u;
141};
142
143struct nfs4_lock_state { 132struct nfs4_lock_state {
144 struct list_head ls_locks; /* Other lock stateids */ 133 struct list_head ls_locks; /* Other lock stateids */
145 struct nfs4_state * ls_state; /* Pointer to open state */ 134 struct nfs4_state * ls_state; /* Pointer to open state */
146#define NFS_LOCK_INITIALIZED 0 135#define NFS_LOCK_INITIALIZED 0
147#define NFS_LOCK_LOST 1 136#define NFS_LOCK_LOST 1
148 unsigned long ls_flags; 137 unsigned long ls_flags;
149 struct nfs_seqid_counter ls_seqid; 138 struct nfs_seqid_counter ls_seqid;
150 nfs4_stateid ls_stateid; 139 nfs4_stateid ls_stateid;
151 atomic_t ls_count; 140 atomic_t ls_count;
152 struct nfs4_lock_owner ls_owner; 141 fl_owner_t ls_owner;
142 struct work_struct ls_release;
153}; 143};
154 144
155/* bits for nfs4_state->flags */ 145/* bits for nfs4_state->flags */
@@ -337,11 +327,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
337 */ 327 */
338static inline void 328static inline void
339nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, 329nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
340 struct rpc_message *msg, struct nfs_pgio_data *wdata) 330 struct rpc_message *msg, struct nfs_pgio_header *hdr)
341{ 331{
342 if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && 332 if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
343 !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) 333 !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
344 wdata->args.stable = NFS_FILE_SYNC; 334 hdr->args.stable = NFS_FILE_SYNC;
345} 335}
346#else /* CONFIG_NFS_v4_1 */ 336#else /* CONFIG_NFS_v4_1 */
347static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) 337static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -369,7 +359,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
369 359
370static inline void 360static inline void
371nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, 361nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
372 struct rpc_message *msg, struct nfs_pgio_data *wdata) 362 struct rpc_message *msg, struct nfs_pgio_header *hdr)
373{ 363{
374} 364}
375#endif /* CONFIG_NFS_V4_1 */ 365#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index aa9ef4876046..53e435a95260 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -855,6 +855,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
855 }; 855 };
856 struct rpc_timeout ds_timeout; 856 struct rpc_timeout ds_timeout;
857 struct nfs_client *clp; 857 struct nfs_client *clp;
858 char buf[INET6_ADDRSTRLEN + 1];
859
860 if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
861 return ERR_PTR(-EINVAL);
862 cl_init.hostname = buf;
858 863
859 /* 864 /*
860 * Set an authflavor equual to the MDS value. Use the MDS nfs_client 865 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bf3d97cc5a0..75ae8d22f067 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1952,6 +1952,14 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1952 return status; 1952 return status;
1953} 1953}
1954 1954
1955/*
1956 * Additional permission checks in order to distinguish between an
1957 * open for read, and an open for execute. This works around the
1958 * fact that NFSv4 OPEN treats read and execute permissions as being
1959 * the same.
1960 * Note that in the non-execute case, we want to turn off permission
1961 * checking if we just created a new file (POSIX open() semantics).
1962 */
1955static int nfs4_opendata_access(struct rpc_cred *cred, 1963static int nfs4_opendata_access(struct rpc_cred *cred,
1956 struct nfs4_opendata *opendata, 1964 struct nfs4_opendata *opendata,
1957 struct nfs4_state *state, fmode_t fmode, 1965 struct nfs4_state *state, fmode_t fmode,
@@ -1966,14 +1974,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
1966 return 0; 1974 return 0;
1967 1975
1968 mask = 0; 1976 mask = 0;
1969 /* don't check MAY_WRITE - a newly created file may not have 1977 /*
1970 * write mode bits, but POSIX allows the creating process to write. 1978 * Use openflags to check for exec, because fmode won't
1971 * use openflags to check for exec, because fmode won't 1979 * always have FMODE_EXEC set when file open for exec.
1972 * always have FMODE_EXEC set when file open for exec. */ 1980 */
1973 if (openflags & __FMODE_EXEC) { 1981 if (openflags & __FMODE_EXEC) {
1974 /* ONLY check for exec rights */ 1982 /* ONLY check for exec rights */
1975 mask = MAY_EXEC; 1983 mask = MAY_EXEC;
1976 } else if (fmode & FMODE_READ) 1984 } else if ((fmode & FMODE_READ) && !opendata->file_created)
1977 mask = MAY_READ; 1985 mask = MAY_READ;
1978 1986
1979 cache.cred = cred; 1987 cache.cred = cred;
@@ -2216,8 +2224,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2216 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); 2224 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
2217 2225
2218 ret = _nfs4_proc_open(opendata); 2226 ret = _nfs4_proc_open(opendata);
2219 if (ret != 0) 2227 if (ret != 0) {
2228 if (ret == -ENOENT) {
2229 d_drop(opendata->dentry);
2230 d_add(opendata->dentry, NULL);
2231 nfs_set_verifier(opendata->dentry,
2232 nfs_save_change_attribute(opendata->dir->d_inode));
2233 }
2220 goto out; 2234 goto out;
2235 }
2221 2236
2222 state = nfs4_opendata_to_nfs4_state(opendata); 2237 state = nfs4_opendata_to_nfs4_state(opendata);
2223 ret = PTR_ERR(state); 2238 ret = PTR_ERR(state);
@@ -2647,6 +2662,48 @@ static const struct rpc_call_ops nfs4_close_ops = {
2647 .rpc_release = nfs4_free_closedata, 2662 .rpc_release = nfs4_free_closedata,
2648}; 2663};
2649 2664
2665static bool nfs4_state_has_opener(struct nfs4_state *state)
2666{
2667 /* first check existing openers */
2668 if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 &&
2669 state->n_rdonly != 0)
2670 return true;
2671
2672 if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 &&
2673 state->n_wronly != 0)
2674 return true;
2675
2676 if (test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 &&
2677 state->n_rdwr != 0)
2678 return true;
2679
2680 return false;
2681}
2682
2683static bool nfs4_roc(struct inode *inode)
2684{
2685 struct nfs_inode *nfsi = NFS_I(inode);
2686 struct nfs_open_context *ctx;
2687 struct nfs4_state *state;
2688
2689 spin_lock(&inode->i_lock);
2690 list_for_each_entry(ctx, &nfsi->open_files, list) {
2691 state = ctx->state;
2692 if (state == NULL)
2693 continue;
2694 if (nfs4_state_has_opener(state)) {
2695 spin_unlock(&inode->i_lock);
2696 return false;
2697 }
2698 }
2699 spin_unlock(&inode->i_lock);
2700
2701 if (nfs4_check_delegation(inode, FMODE_READ))
2702 return false;
2703
2704 return pnfs_roc(inode);
2705}
2706
2650/* 2707/*
2651 * It is possible for data to be read/written from a mem-mapped file 2708 * It is possible for data to be read/written from a mem-mapped file
2652 * after the sys_close call (which hits the vfs layer as a flush). 2709 * after the sys_close call (which hits the vfs layer as a flush).
@@ -2697,7 +2754,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
2697 calldata->res.fattr = &calldata->fattr; 2754 calldata->res.fattr = &calldata->fattr;
2698 calldata->res.seqid = calldata->arg.seqid; 2755 calldata->res.seqid = calldata->arg.seqid;
2699 calldata->res.server = server; 2756 calldata->res.server = server;
2700 calldata->roc = pnfs_roc(state->inode); 2757 calldata->roc = nfs4_roc(state->inode);
2701 nfs_sb_active(calldata->inode->i_sb); 2758 nfs_sb_active(calldata->inode->i_sb);
2702 2759
2703 msg.rpc_argp = &calldata->arg; 2760 msg.rpc_argp = &calldata->arg;
@@ -4033,24 +4090,25 @@ static bool nfs4_error_stateid_expired(int err)
4033 return false; 4090 return false;
4034} 4091}
4035 4092
4036void __nfs4_read_done_cb(struct nfs_pgio_data *data) 4093void __nfs4_read_done_cb(struct nfs_pgio_header *hdr)
4037{ 4094{
4038 nfs_invalidate_atime(data->header->inode); 4095 nfs_invalidate_atime(hdr->inode);
4039} 4096}
4040 4097
4041static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) 4098static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
4042{ 4099{
4043 struct nfs_server *server = NFS_SERVER(data->header->inode); 4100 struct nfs_server *server = NFS_SERVER(hdr->inode);
4044 4101
4045 trace_nfs4_read(data, task->tk_status); 4102 trace_nfs4_read(hdr, task->tk_status);
4046 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 4103 if (nfs4_async_handle_error(task, server,
4104 hdr->args.context->state) == -EAGAIN) {
4047 rpc_restart_call_prepare(task); 4105 rpc_restart_call_prepare(task);
4048 return -EAGAIN; 4106 return -EAGAIN;
4049 } 4107 }
4050 4108
4051 __nfs4_read_done_cb(data); 4109 __nfs4_read_done_cb(hdr);
4052 if (task->tk_status > 0) 4110 if (task->tk_status > 0)
4053 renew_lease(server, data->timestamp); 4111 renew_lease(server, hdr->timestamp);
4054 return 0; 4112 return 0;
4055} 4113}
4056 4114
@@ -4068,54 +4126,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
4068 return true; 4126 return true;
4069} 4127}
4070 4128
4071static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) 4129static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
4072{ 4130{
4073 4131
4074 dprintk("--> %s\n", __func__); 4132 dprintk("--> %s\n", __func__);
4075 4133
4076 if (!nfs4_sequence_done(task, &data->res.seq_res)) 4134 if (!nfs4_sequence_done(task, &hdr->res.seq_res))
4077 return -EAGAIN; 4135 return -EAGAIN;
4078 if (nfs4_read_stateid_changed(task, &data->args)) 4136 if (nfs4_read_stateid_changed(task, &hdr->args))
4079 return -EAGAIN; 4137 return -EAGAIN;
4080 return data->pgio_done_cb ? data->pgio_done_cb(task, data) : 4138 return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
4081 nfs4_read_done_cb(task, data); 4139 nfs4_read_done_cb(task, hdr);
4082} 4140}
4083 4141
4084static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 4142static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
4143 struct rpc_message *msg)
4085{ 4144{
4086 data->timestamp = jiffies; 4145 hdr->timestamp = jiffies;
4087 data->pgio_done_cb = nfs4_read_done_cb; 4146 hdr->pgio_done_cb = nfs4_read_done_cb;
4088 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 4147 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
4089 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); 4148 nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);
4090} 4149}
4091 4150
4092static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) 4151static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
4152 struct nfs_pgio_header *hdr)
4093{ 4153{
4094 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), 4154 if (nfs4_setup_sequence(NFS_SERVER(hdr->inode),
4095 &data->args.seq_args, 4155 &hdr->args.seq_args,
4096 &data->res.seq_res, 4156 &hdr->res.seq_res,
4097 task)) 4157 task))
4098 return 0; 4158 return 0;
4099 if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, 4159 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
4100 data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) 4160 hdr->args.lock_context,
4161 hdr->rw_ops->rw_mode) == -EIO)
4101 return -EIO; 4162 return -EIO;
4102 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) 4163 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
4103 return -EIO; 4164 return -EIO;
4104 return 0; 4165 return 0;
4105} 4166}
4106 4167
4107static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) 4168static int nfs4_write_done_cb(struct rpc_task *task,
4169 struct nfs_pgio_header *hdr)
4108{ 4170{
4109 struct inode *inode = data->header->inode; 4171 struct inode *inode = hdr->inode;
4110 4172
4111 trace_nfs4_write(data, task->tk_status); 4173 trace_nfs4_write(hdr, task->tk_status);
4112 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 4174 if (nfs4_async_handle_error(task, NFS_SERVER(inode),
4175 hdr->args.context->state) == -EAGAIN) {
4113 rpc_restart_call_prepare(task); 4176 rpc_restart_call_prepare(task);
4114 return -EAGAIN; 4177 return -EAGAIN;
4115 } 4178 }
4116 if (task->tk_status >= 0) { 4179 if (task->tk_status >= 0) {
4117 renew_lease(NFS_SERVER(inode), data->timestamp); 4180 renew_lease(NFS_SERVER(inode), hdr->timestamp);
4118 nfs_post_op_update_inode_force_wcc(inode, &data->fattr); 4181 nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
4119 } 4182 }
4120 return 0; 4183 return 0;
4121} 4184}
@@ -4134,23 +4197,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
4134 return true; 4197 return true;
4135} 4198}
4136 4199
4137static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) 4200static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
4138{ 4201{
4139 if (!nfs4_sequence_done(task, &data->res.seq_res)) 4202 if (!nfs4_sequence_done(task, &hdr->res.seq_res))
4140 return -EAGAIN; 4203 return -EAGAIN;
4141 if (nfs4_write_stateid_changed(task, &data->args)) 4204 if (nfs4_write_stateid_changed(task, &hdr->args))
4142 return -EAGAIN; 4205 return -EAGAIN;
4143 return data->pgio_done_cb ? data->pgio_done_cb(task, data) : 4206 return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
4144 nfs4_write_done_cb(task, data); 4207 nfs4_write_done_cb(task, hdr);
4145} 4208}
4146 4209
4147static 4210static
4148bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) 4211bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
4149{ 4212{
4150 const struct nfs_pgio_header *hdr = data->header;
4151
4152 /* Don't request attributes for pNFS or O_DIRECT writes */ 4213 /* Don't request attributes for pNFS or O_DIRECT writes */
4153 if (data->ds_clp != NULL || hdr->dreq != NULL) 4214 if (hdr->ds_clp != NULL || hdr->dreq != NULL)
4154 return false; 4215 return false;
4155 /* Otherwise, request attributes if and only if we don't hold 4216 /* Otherwise, request attributes if and only if we don't hold
4156 * a delegation 4217 * a delegation
@@ -4158,23 +4219,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
4158 return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; 4219 return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
4159} 4220}
4160 4221
4161static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 4222static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
4223 struct rpc_message *msg)
4162{ 4224{
4163 struct nfs_server *server = NFS_SERVER(data->header->inode); 4225 struct nfs_server *server = NFS_SERVER(hdr->inode);
4164 4226
4165 if (!nfs4_write_need_cache_consistency_data(data)) { 4227 if (!nfs4_write_need_cache_consistency_data(hdr)) {
4166 data->args.bitmask = NULL; 4228 hdr->args.bitmask = NULL;
4167 data->res.fattr = NULL; 4229 hdr->res.fattr = NULL;
4168 } else 4230 } else
4169 data->args.bitmask = server->cache_consistency_bitmask; 4231 hdr->args.bitmask = server->cache_consistency_bitmask;
4170 4232
4171 if (!data->pgio_done_cb) 4233 if (!hdr->pgio_done_cb)
4172 data->pgio_done_cb = nfs4_write_done_cb; 4234 hdr->pgio_done_cb = nfs4_write_done_cb;
4173 data->res.server = server; 4235 hdr->res.server = server;
4174 data->timestamp = jiffies; 4236 hdr->timestamp = jiffies;
4175 4237
4176 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 4238 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
4177 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 4239 nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1);
4178} 4240}
4179 4241
4180static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) 4242static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -4881,6 +4943,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
4881 return scnprintf(buf, len, "tcp"); 4943 return scnprintf(buf, len, "tcp");
4882} 4944}
4883 4945
4946static void nfs4_setclientid_done(struct rpc_task *task, void *calldata)
4947{
4948 struct nfs4_setclientid *sc = calldata;
4949
4950 if (task->tk_status == 0)
4951 sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred);
4952}
4953
4954static const struct rpc_call_ops nfs4_setclientid_ops = {
4955 .rpc_call_done = nfs4_setclientid_done,
4956};
4957
4884/** 4958/**
4885 * nfs4_proc_setclientid - Negotiate client ID 4959 * nfs4_proc_setclientid - Negotiate client ID
4886 * @clp: state data structure 4960 * @clp: state data structure
@@ -4907,6 +4981,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
4907 .rpc_resp = res, 4981 .rpc_resp = res,
4908 .rpc_cred = cred, 4982 .rpc_cred = cred,
4909 }; 4983 };
4984 struct rpc_task *task;
4985 struct rpc_task_setup task_setup_data = {
4986 .rpc_client = clp->cl_rpcclient,
4987 .rpc_message = &msg,
4988 .callback_ops = &nfs4_setclientid_ops,
4989 .callback_data = &setclientid,
4990 .flags = RPC_TASK_TIMEOUT,
4991 };
4910 int status; 4992 int status;
4911 4993
4912 /* nfs_client_id4 */ 4994 /* nfs_client_id4 */
@@ -4933,7 +5015,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
4933 dprintk("NFS call setclientid auth=%s, '%.*s'\n", 5015 dprintk("NFS call setclientid auth=%s, '%.*s'\n",
4934 clp->cl_rpcclient->cl_auth->au_ops->au_name, 5016 clp->cl_rpcclient->cl_auth->au_ops->au_name,
4935 setclientid.sc_name_len, setclientid.sc_name); 5017 setclientid.sc_name_len, setclientid.sc_name);
4936 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5018 task = rpc_run_task(&task_setup_data);
5019 if (IS_ERR(task)) {
5020 status = PTR_ERR(task);
5021 goto out;
5022 }
5023 status = task->tk_status;
5024 if (setclientid.sc_cred) {
5025 clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
5026 put_rpccred(setclientid.sc_cred);
5027 }
5028 rpc_put_task(task);
5029out:
4937 trace_nfs4_setclientid(clp, status); 5030 trace_nfs4_setclientid(clp, status);
4938 dprintk("NFS reply setclientid: %d\n", status); 5031 dprintk("NFS reply setclientid: %d\n", status);
4939 return status; 5032 return status;
@@ -4975,6 +5068,9 @@ struct nfs4_delegreturndata {
4975 unsigned long timestamp; 5068 unsigned long timestamp;
4976 struct nfs_fattr fattr; 5069 struct nfs_fattr fattr;
4977 int rpc_status; 5070 int rpc_status;
5071 struct inode *inode;
5072 bool roc;
5073 u32 roc_barrier;
4978}; 5074};
4979 5075
4980static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) 5076static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
@@ -4988,7 +5084,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
4988 switch (task->tk_status) { 5084 switch (task->tk_status) {
4989 case 0: 5085 case 0:
4990 renew_lease(data->res.server, data->timestamp); 5086 renew_lease(data->res.server, data->timestamp);
4991 break;
4992 case -NFS4ERR_ADMIN_REVOKED: 5087 case -NFS4ERR_ADMIN_REVOKED:
4993 case -NFS4ERR_DELEG_REVOKED: 5088 case -NFS4ERR_DELEG_REVOKED:
4994 case -NFS4ERR_BAD_STATEID: 5089 case -NFS4ERR_BAD_STATEID:
@@ -4996,6 +5091,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
4996 case -NFS4ERR_STALE_STATEID: 5091 case -NFS4ERR_STALE_STATEID:
4997 case -NFS4ERR_EXPIRED: 5092 case -NFS4ERR_EXPIRED:
4998 task->tk_status = 0; 5093 task->tk_status = 0;
5094 if (data->roc)
5095 pnfs_roc_set_barrier(data->inode, data->roc_barrier);
4999 break; 5096 break;
5000 default: 5097 default:
5001 if (nfs4_async_handle_error(task, data->res.server, NULL) == 5098 if (nfs4_async_handle_error(task, data->res.server, NULL) ==
@@ -5009,6 +5106,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
5009 5106
5010static void nfs4_delegreturn_release(void *calldata) 5107static void nfs4_delegreturn_release(void *calldata)
5011{ 5108{
5109 struct nfs4_delegreturndata *data = calldata;
5110
5111 if (data->roc)
5112 pnfs_roc_release(data->inode);
5012 kfree(calldata); 5113 kfree(calldata);
5013} 5114}
5014 5115
@@ -5018,6 +5119,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
5018 5119
5019 d_data = (struct nfs4_delegreturndata *)data; 5120 d_data = (struct nfs4_delegreturndata *)data;
5020 5121
5122 if (d_data->roc &&
5123 pnfs_roc_drain(d_data->inode, &d_data->roc_barrier, task))
5124 return;
5125
5021 nfs4_setup_sequence(d_data->res.server, 5126 nfs4_setup_sequence(d_data->res.server,
5022 &d_data->args.seq_args, 5127 &d_data->args.seq_args,
5023 &d_data->res.seq_res, 5128 &d_data->res.seq_res,
@@ -5061,6 +5166,9 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
5061 nfs_fattr_init(data->res.fattr); 5166 nfs_fattr_init(data->res.fattr);
5062 data->timestamp = jiffies; 5167 data->timestamp = jiffies;
5063 data->rpc_status = 0; 5168 data->rpc_status = 0;
5169 data->inode = inode;
5170 data->roc = list_empty(&NFS_I(inode)->open_files) ?
5171 pnfs_roc(inode) : false;
5064 5172
5065 task_setup_data.callback_data = data; 5173 task_setup_data.callback_data = data;
5066 msg.rpc_argp = &data->args; 5174 msg.rpc_argp = &data->args;
@@ -5834,8 +5942,10 @@ struct nfs_release_lockowner_data {
5834static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) 5942static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
5835{ 5943{
5836 struct nfs_release_lockowner_data *data = calldata; 5944 struct nfs_release_lockowner_data *data = calldata;
5837 nfs40_setup_sequence(data->server, 5945 struct nfs_server *server = data->server;
5838 &data->args.seq_args, &data->res.seq_res, task); 5946 nfs40_setup_sequence(server, &data->args.seq_args,
5947 &data->res.seq_res, task);
5948 data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
5839 data->timestamp = jiffies; 5949 data->timestamp = jiffies;
5840} 5950}
5841 5951
@@ -5852,6 +5962,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
5852 break; 5962 break;
5853 case -NFS4ERR_STALE_CLIENTID: 5963 case -NFS4ERR_STALE_CLIENTID:
5854 case -NFS4ERR_EXPIRED: 5964 case -NFS4ERR_EXPIRED:
5965 nfs4_schedule_lease_recovery(server->nfs_client);
5966 break;
5855 case -NFS4ERR_LEASE_MOVED: 5967 case -NFS4ERR_LEASE_MOVED:
5856 case -NFS4ERR_DELAY: 5968 case -NFS4ERR_DELAY:
5857 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) 5969 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
@@ -5872,7 +5984,8 @@ static const struct rpc_call_ops nfs4_release_lockowner_ops = {
5872 .rpc_release = nfs4_release_lockowner_release, 5984 .rpc_release = nfs4_release_lockowner_release,
5873}; 5985};
5874 5986
5875static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) 5987static void
5988nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
5876{ 5989{
5877 struct nfs_release_lockowner_data *data; 5990 struct nfs_release_lockowner_data *data;
5878 struct rpc_message msg = { 5991 struct rpc_message msg = {
@@ -5880,11 +5993,11 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
5880 }; 5993 };
5881 5994
5882 if (server->nfs_client->cl_mvops->minor_version != 0) 5995 if (server->nfs_client->cl_mvops->minor_version != 0)
5883 return -EINVAL; 5996 return;
5884 5997
5885 data = kmalloc(sizeof(*data), GFP_NOFS); 5998 data = kmalloc(sizeof(*data), GFP_NOFS);
5886 if (!data) 5999 if (!data)
5887 return -ENOMEM; 6000 return;
5888 data->lsp = lsp; 6001 data->lsp = lsp;
5889 data->server = server; 6002 data->server = server;
5890 data->args.lock_owner.clientid = server->nfs_client->cl_clientid; 6003 data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
@@ -5895,7 +6008,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
5895 msg.rpc_resp = &data->res; 6008 msg.rpc_resp = &data->res;
5896 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); 6009 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
5897 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); 6010 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
5898 return 0;
5899} 6011}
5900 6012
5901#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" 6013#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
@@ -8182,7 +8294,8 @@ static int nfs41_free_stateid(struct nfs_server *server,
8182 return ret; 8294 return ret;
8183} 8295}
8184 8296
8185static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) 8297static void
8298nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
8186{ 8299{
8187 struct rpc_task *task; 8300 struct rpc_task *task;
8188 struct rpc_cred *cred = lsp->ls_state->owner->so_cred; 8301 struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
@@ -8190,9 +8303,8 @@ static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta
8190 task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); 8303 task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false);
8191 nfs4_free_lock_state(server, lsp); 8304 nfs4_free_lock_state(server, lsp);
8192 if (IS_ERR(task)) 8305 if (IS_ERR(task))
8193 return PTR_ERR(task); 8306 return;
8194 rpc_put_task(task); 8307 rpc_put_task(task);
8195 return 0;
8196} 8308}
8197 8309
8198static bool nfs41_match_stateid(const nfs4_stateid *s1, 8310static bool nfs41_match_stateid(const nfs4_stateid *s1,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 42f121182167..a043f618cd5a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -787,33 +787,36 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
787 * that is compatible with current->files 787 * that is compatible with current->files
788 */ 788 */
789static struct nfs4_lock_state * 789static struct nfs4_lock_state *
790__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) 790__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
791{ 791{
792 struct nfs4_lock_state *pos; 792 struct nfs4_lock_state *pos;
793 list_for_each_entry(pos, &state->lock_states, ls_locks) { 793 list_for_each_entry(pos, &state->lock_states, ls_locks) {
794 if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type) 794 if (pos->ls_owner != fl_owner)
795 continue; 795 continue;
796 switch (pos->ls_owner.lo_type) {
797 case NFS4_POSIX_LOCK_TYPE:
798 if (pos->ls_owner.lo_u.posix_owner != fl_owner)
799 continue;
800 break;
801 case NFS4_FLOCK_LOCK_TYPE:
802 if (pos->ls_owner.lo_u.flock_owner != fl_pid)
803 continue;
804 }
805 atomic_inc(&pos->ls_count); 796 atomic_inc(&pos->ls_count);
806 return pos; 797 return pos;
807 } 798 }
808 return NULL; 799 return NULL;
809} 800}
810 801
802static void
803free_lock_state_work(struct work_struct *work)
804{
805 struct nfs4_lock_state *lsp = container_of(work,
806 struct nfs4_lock_state, ls_release);
807 struct nfs4_state *state = lsp->ls_state;
808 struct nfs_server *server = state->owner->so_server;
809 struct nfs_client *clp = server->nfs_client;
810
811 clp->cl_mvops->free_lock_state(server, lsp);
812}
813
811/* 814/*
812 * Return a compatible lock_state. If no initialized lock_state structure 815 * Return a compatible lock_state. If no initialized lock_state structure
813 * exists, return an uninitialized one. 816 * exists, return an uninitialized one.
814 * 817 *
815 */ 818 */
816static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) 819static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
817{ 820{
818 struct nfs4_lock_state *lsp; 821 struct nfs4_lock_state *lsp;
819 struct nfs_server *server = state->owner->so_server; 822 struct nfs_server *server = state->owner->so_server;
@@ -824,21 +827,12 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
824 nfs4_init_seqid_counter(&lsp->ls_seqid); 827 nfs4_init_seqid_counter(&lsp->ls_seqid);
825 atomic_set(&lsp->ls_count, 1); 828 atomic_set(&lsp->ls_count, 1);
826 lsp->ls_state = state; 829 lsp->ls_state = state;
827 lsp->ls_owner.lo_type = type; 830 lsp->ls_owner = fl_owner;
828 switch (lsp->ls_owner.lo_type) {
829 case NFS4_FLOCK_LOCK_TYPE:
830 lsp->ls_owner.lo_u.flock_owner = fl_pid;
831 break;
832 case NFS4_POSIX_LOCK_TYPE:
833 lsp->ls_owner.lo_u.posix_owner = fl_owner;
834 break;
835 default:
836 goto out_free;
837 }
838 lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); 831 lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
839 if (lsp->ls_seqid.owner_id < 0) 832 if (lsp->ls_seqid.owner_id < 0)
840 goto out_free; 833 goto out_free;
841 INIT_LIST_HEAD(&lsp->ls_locks); 834 INIT_LIST_HEAD(&lsp->ls_locks);
835 INIT_WORK(&lsp->ls_release, free_lock_state_work);
842 return lsp; 836 return lsp;
843out_free: 837out_free:
844 kfree(lsp); 838 kfree(lsp);
@@ -857,13 +851,13 @@ void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp
857 * exists, return an uninitialized one. 851 * exists, return an uninitialized one.
858 * 852 *
859 */ 853 */
860static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type) 854static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
861{ 855{
862 struct nfs4_lock_state *lsp, *new = NULL; 856 struct nfs4_lock_state *lsp, *new = NULL;
863 857
864 for(;;) { 858 for(;;) {
865 spin_lock(&state->state_lock); 859 spin_lock(&state->state_lock);
866 lsp = __nfs4_find_lock_state(state, owner, pid, type); 860 lsp = __nfs4_find_lock_state(state, owner);
867 if (lsp != NULL) 861 if (lsp != NULL)
868 break; 862 break;
869 if (new != NULL) { 863 if (new != NULL) {
@@ -874,7 +868,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
874 break; 868 break;
875 } 869 }
876 spin_unlock(&state->state_lock); 870 spin_unlock(&state->state_lock);
877 new = nfs4_alloc_lock_state(state, owner, pid, type); 871 new = nfs4_alloc_lock_state(state, owner);
878 if (new == NULL) 872 if (new == NULL)
879 return NULL; 873 return NULL;
880 } 874 }
@@ -902,13 +896,12 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
902 if (list_empty(&state->lock_states)) 896 if (list_empty(&state->lock_states))
903 clear_bit(LK_STATE_IN_USE, &state->flags); 897 clear_bit(LK_STATE_IN_USE, &state->flags);
904 spin_unlock(&state->state_lock); 898 spin_unlock(&state->state_lock);
905 server = state->owner->so_server; 899 if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags))
906 if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { 900 queue_work(nfsiod_workqueue, &lsp->ls_release);
907 struct nfs_client *clp = server->nfs_client; 901 else {
908 902 server = state->owner->so_server;
909 clp->cl_mvops->free_lock_state(server, lsp);
910 } else
911 nfs4_free_lock_state(server, lsp); 903 nfs4_free_lock_state(server, lsp);
904 }
912} 905}
913 906
914static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) 907static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -935,13 +928,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
935 928
936 if (fl->fl_ops != NULL) 929 if (fl->fl_ops != NULL)
937 return 0; 930 return 0;
938 if (fl->fl_flags & FL_POSIX) 931 lsp = nfs4_get_lock_state(state, fl->fl_owner);
939 lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
940 else if (fl->fl_flags & FL_FLOCK)
941 lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
942 NFS4_FLOCK_LOCK_TYPE);
943 else
944 return -EINVAL;
945 if (lsp == NULL) 932 if (lsp == NULL)
946 return -ENOMEM; 933 return -ENOMEM;
947 fl->fl_u.nfs4_fl.owner = lsp; 934 fl->fl_u.nfs4_fl.owner = lsp;
@@ -955,7 +942,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
955{ 942{
956 struct nfs4_lock_state *lsp; 943 struct nfs4_lock_state *lsp;
957 fl_owner_t fl_owner; 944 fl_owner_t fl_owner;
958 pid_t fl_pid;
959 int ret = -ENOENT; 945 int ret = -ENOENT;
960 946
961 947
@@ -966,9 +952,8 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
966 goto out; 952 goto out;
967 953
968 fl_owner = lockowner->l_owner; 954 fl_owner = lockowner->l_owner;
969 fl_pid = lockowner->l_pid;
970 spin_lock(&state->state_lock); 955 spin_lock(&state->state_lock);
971 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); 956 lsp = __nfs4_find_lock_state(state, fl_owner);
972 if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) 957 if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
973 ret = -EIO; 958 ret = -EIO;
974 else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { 959 else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 0a744f3a86f6..1c32adbe728d 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
932 932
933DECLARE_EVENT_CLASS(nfs4_read_event, 933DECLARE_EVENT_CLASS(nfs4_read_event,
934 TP_PROTO( 934 TP_PROTO(
935 const struct nfs_pgio_data *data, 935 const struct nfs_pgio_header *hdr,
936 int error 936 int error
937 ), 937 ),
938 938
939 TP_ARGS(data, error), 939 TP_ARGS(hdr, error),
940 940
941 TP_STRUCT__entry( 941 TP_STRUCT__entry(
942 __field(dev_t, dev) 942 __field(dev_t, dev)
@@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
948 ), 948 ),
949 949
950 TP_fast_assign( 950 TP_fast_assign(
951 const struct inode *inode = data->header->inode; 951 const struct inode *inode = hdr->inode;
952 __entry->dev = inode->i_sb->s_dev; 952 __entry->dev = inode->i_sb->s_dev;
953 __entry->fileid = NFS_FILEID(inode); 953 __entry->fileid = NFS_FILEID(inode);
954 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 954 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
955 __entry->offset = data->args.offset; 955 __entry->offset = hdr->args.offset;
956 __entry->count = data->args.count; 956 __entry->count = hdr->args.count;
957 __entry->error = error; 957 __entry->error = error;
958 ), 958 ),
959 959
@@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
972#define DEFINE_NFS4_READ_EVENT(name) \ 972#define DEFINE_NFS4_READ_EVENT(name) \
973 DEFINE_EVENT(nfs4_read_event, name, \ 973 DEFINE_EVENT(nfs4_read_event, name, \
974 TP_PROTO( \ 974 TP_PROTO( \
975 const struct nfs_pgio_data *data, \ 975 const struct nfs_pgio_header *hdr, \
976 int error \ 976 int error \
977 ), \ 977 ), \
978 TP_ARGS(data, error)) 978 TP_ARGS(hdr, error))
979DEFINE_NFS4_READ_EVENT(nfs4_read); 979DEFINE_NFS4_READ_EVENT(nfs4_read);
980#ifdef CONFIG_NFS_V4_1 980#ifdef CONFIG_NFS_V4_1
981DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); 981DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
@@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
983 983
984DECLARE_EVENT_CLASS(nfs4_write_event, 984DECLARE_EVENT_CLASS(nfs4_write_event,
985 TP_PROTO( 985 TP_PROTO(
986 const struct nfs_pgio_data *data, 986 const struct nfs_pgio_header *hdr,
987 int error 987 int error
988 ), 988 ),
989 989
990 TP_ARGS(data, error), 990 TP_ARGS(hdr, error),
991 991
992 TP_STRUCT__entry( 992 TP_STRUCT__entry(
993 __field(dev_t, dev) 993 __field(dev_t, dev)
@@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
999 ), 999 ),
1000 1000
1001 TP_fast_assign( 1001 TP_fast_assign(
1002 const struct inode *inode = data->header->inode; 1002 const struct inode *inode = hdr->inode;
1003 __entry->dev = inode->i_sb->s_dev; 1003 __entry->dev = inode->i_sb->s_dev;
1004 __entry->fileid = NFS_FILEID(inode); 1004 __entry->fileid = NFS_FILEID(inode);
1005 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 1005 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
1006 __entry->offset = data->args.offset; 1006 __entry->offset = hdr->args.offset;
1007 __entry->count = data->args.count; 1007 __entry->count = hdr->args.count;
1008 __entry->error = error; 1008 __entry->error = error;
1009 ), 1009 ),
1010 1010
@@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
1024#define DEFINE_NFS4_WRITE_EVENT(name) \ 1024#define DEFINE_NFS4_WRITE_EVENT(name) \
1025 DEFINE_EVENT(nfs4_write_event, name, \ 1025 DEFINE_EVENT(nfs4_write_event, name, \
1026 TP_PROTO( \ 1026 TP_PROTO( \
1027 const struct nfs_pgio_data *data, \ 1027 const struct nfs_pgio_header *hdr, \
1028 int error \ 1028 int error \
1029 ), \ 1029 ), \
1030 TP_ARGS(data, error)) 1030 TP_ARGS(hdr, error))
1031DEFINE_NFS4_WRITE_EVENT(nfs4_write); 1031DEFINE_NFS4_WRITE_EVENT(nfs4_write);
1032#ifdef CONFIG_NFS_V4_1 1032#ifdef CONFIG_NFS_V4_1
1033DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write); 1033DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 939ae606cfa4..e13b59d8d9aa 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7092,7 +7092,7 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
7092 if (!status) 7092 if (!status)
7093 status = decode_sequence(xdr, &res->seq_res, rqstp); 7093 status = decode_sequence(xdr, &res->seq_res, rqstp);
7094 if (!status) 7094 if (!status)
7095 status = decode_reclaim_complete(xdr, (void *)NULL); 7095 status = decode_reclaim_complete(xdr, NULL);
7096 return status; 7096 return status;
7097} 7097}
7098 7098
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 611320753db2..ae05278b3761 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private)
439 objlayout_read_done(&objios->oir, status, objios->sync); 439 objlayout_read_done(&objios->oir, status, objios->sync);
440} 440}
441 441
442int objio_read_pagelist(struct nfs_pgio_data *rdata) 442int objio_read_pagelist(struct nfs_pgio_header *hdr)
443{ 443{
444 struct nfs_pgio_header *hdr = rdata->header;
445 struct objio_state *objios; 444 struct objio_state *objios;
446 int ret; 445 int ret;
447 446
448 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, 447 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
449 hdr->lseg, rdata->args.pages, rdata->args.pgbase, 448 hdr->lseg, hdr->args.pages, hdr->args.pgbase,
450 rdata->args.offset, rdata->args.count, rdata, 449 hdr->args.offset, hdr->args.count, hdr,
451 GFP_KERNEL, &objios); 450 GFP_KERNEL, &objios);
452 if (unlikely(ret)) 451 if (unlikely(ret))
453 return ret; 452 return ret;
454 453
455 objios->ios->done = _read_done; 454 objios->ios->done = _read_done;
456 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
457 rdata->args.offset, rdata->args.count); 456 hdr->args.offset, hdr->args.count);
458 ret = ore_read(objios->ios); 457 ret = ore_read(objios->ios);
459 if (unlikely(ret)) 458 if (unlikely(ret))
460 objio_free_result(&objios->oir); 459 objio_free_result(&objios->oir);
@@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private)
487static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) 486static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
488{ 487{
489 struct objio_state *objios = priv; 488 struct objio_state *objios = priv;
490 struct nfs_pgio_data *wdata = objios->oir.rpcdata; 489 struct nfs_pgio_header *hdr = objios->oir.rpcdata;
491 struct address_space *mapping = wdata->header->inode->i_mapping; 490 struct address_space *mapping = hdr->inode->i_mapping;
492 pgoff_t index = offset / PAGE_SIZE; 491 pgoff_t index = offset / PAGE_SIZE;
493 struct page *page; 492 struct page *page;
494 loff_t i_size = i_size_read(wdata->header->inode); 493 loff_t i_size = i_size_read(hdr->inode);
495 494
496 if (offset >= i_size) { 495 if (offset >= i_size) {
497 *uptodate = true; 496 *uptodate = true;
@@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = {
531 .put_page = &__r4w_put_page, 530 .put_page = &__r4w_put_page,
532}; 531};
533 532
534int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) 533int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
535{ 534{
536 struct nfs_pgio_header *hdr = wdata->header;
537 struct objio_state *objios; 535 struct objio_state *objios;
538 int ret; 536 int ret;
539 537
540 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, 538 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
541 hdr->lseg, wdata->args.pages, wdata->args.pgbase, 539 hdr->lseg, hdr->args.pages, hdr->args.pgbase,
542 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, 540 hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
543 &objios); 541 &objios);
544 if (unlikely(ret)) 542 if (unlikely(ret))
545 return ret; 543 return ret;
@@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
551 objios->ios->done = _write_done; 549 objios->ios->done = _write_done;
552 550
553 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 551 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
554 wdata->args.offset, wdata->args.count); 552 hdr->args.offset, hdr->args.count);
555 ret = ore_write(objios->ios); 553 ret = ore_write(objios->ios);
556 if (unlikely(ret)) { 554 if (unlikely(ret)) {
557 objio_free_result(&objios->oir); 555 objio_free_result(&objios->oir);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 765d3f54e986..697a16d11fac 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
229static void _rpc_read_complete(struct work_struct *work) 229static void _rpc_read_complete(struct work_struct *work)
230{ 230{
231 struct rpc_task *task; 231 struct rpc_task *task;
232 struct nfs_pgio_data *rdata; 232 struct nfs_pgio_header *hdr;
233 233
234 dprintk("%s enter\n", __func__); 234 dprintk("%s enter\n", __func__);
235 task = container_of(work, struct rpc_task, u.tk_work); 235 task = container_of(work, struct rpc_task, u.tk_work);
236 rdata = container_of(task, struct nfs_pgio_data, task); 236 hdr = container_of(task, struct nfs_pgio_header, task);
237 237
238 pnfs_ld_read_done(rdata); 238 pnfs_ld_read_done(hdr);
239} 239}
240 240
241void 241void
242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) 242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
243{ 243{
244 struct nfs_pgio_data *rdata = oir->rpcdata; 244 struct nfs_pgio_header *hdr = oir->rpcdata;
245 245
246 oir->status = rdata->task.tk_status = status; 246 oir->status = hdr->task.tk_status = status;
247 if (status >= 0) 247 if (status >= 0)
248 rdata->res.count = status; 248 hdr->res.count = status;
249 else 249 else
250 rdata->header->pnfs_error = status; 250 hdr->pnfs_error = status;
251 objlayout_iodone(oir); 251 objlayout_iodone(oir);
252 /* must not use oir after this point */ 252 /* must not use oir after this point */
253 253
254 dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, 254 dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
255 status, rdata->res.eof, sync); 255 status, hdr->res.eof, sync);
256 256
257 if (sync) 257 if (sync)
258 pnfs_ld_read_done(rdata); 258 pnfs_ld_read_done(hdr);
259 else { 259 else {
260 INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); 260 INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
261 schedule_work(&rdata->task.u.tk_work); 261 schedule_work(&hdr->task.u.tk_work);
262 } 262 }
263} 263}
264 264
@@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
266 * Perform sync or async reads. 266 * Perform sync or async reads.
267 */ 267 */
268enum pnfs_try_status 268enum pnfs_try_status
269objlayout_read_pagelist(struct nfs_pgio_data *rdata) 269objlayout_read_pagelist(struct nfs_pgio_header *hdr)
270{ 270{
271 struct nfs_pgio_header *hdr = rdata->header;
272 struct inode *inode = hdr->inode; 271 struct inode *inode = hdr->inode;
273 loff_t offset = rdata->args.offset; 272 loff_t offset = hdr->args.offset;
274 size_t count = rdata->args.count; 273 size_t count = hdr->args.count;
275 int err; 274 int err;
276 loff_t eof; 275 loff_t eof;
277 276
@@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
279 if (unlikely(offset + count > eof)) { 278 if (unlikely(offset + count > eof)) {
280 if (offset >= eof) { 279 if (offset >= eof) {
281 err = 0; 280 err = 0;
282 rdata->res.count = 0; 281 hdr->res.count = 0;
283 rdata->res.eof = 1; 282 hdr->res.eof = 1;
284 /*FIXME: do we need to call pnfs_ld_read_done() */ 283 /*FIXME: do we need to call pnfs_ld_read_done() */
285 goto out; 284 goto out;
286 } 285 }
287 count = eof - offset; 286 count = eof - offset;
288 } 287 }
289 288
290 rdata->res.eof = (offset + count) >= eof; 289 hdr->res.eof = (offset + count) >= eof;
291 _fix_verify_io_params(hdr->lseg, &rdata->args.pages, 290 _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
292 &rdata->args.pgbase, 291 &hdr->args.pgbase,
293 rdata->args.offset, rdata->args.count); 292 hdr->args.offset, hdr->args.count);
294 293
295 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", 294 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
296 __func__, inode->i_ino, offset, count, rdata->res.eof); 295 __func__, inode->i_ino, offset, count, hdr->res.eof);
297 296
298 err = objio_read_pagelist(rdata); 297 err = objio_read_pagelist(hdr);
299 out: 298 out:
300 if (unlikely(err)) { 299 if (unlikely(err)) {
301 hdr->pnfs_error = err; 300 hdr->pnfs_error = err;
@@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
312static void _rpc_write_complete(struct work_struct *work) 311static void _rpc_write_complete(struct work_struct *work)
313{ 312{
314 struct rpc_task *task; 313 struct rpc_task *task;
315 struct nfs_pgio_data *wdata; 314 struct nfs_pgio_header *hdr;
316 315
317 dprintk("%s enter\n", __func__); 316 dprintk("%s enter\n", __func__);
318 task = container_of(work, struct rpc_task, u.tk_work); 317 task = container_of(work, struct rpc_task, u.tk_work);
319 wdata = container_of(task, struct nfs_pgio_data, task); 318 hdr = container_of(task, struct nfs_pgio_header, task);
320 319
321 pnfs_ld_write_done(wdata); 320 pnfs_ld_write_done(hdr);
322} 321}
323 322
324void 323void
325objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) 324objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
326{ 325{
327 struct nfs_pgio_data *wdata = oir->rpcdata; 326 struct nfs_pgio_header *hdr = oir->rpcdata;
328 327
329 oir->status = wdata->task.tk_status = status; 328 oir->status = hdr->task.tk_status = status;
330 if (status >= 0) { 329 if (status >= 0) {
331 wdata->res.count = status; 330 hdr->res.count = status;
332 wdata->verf.committed = oir->committed; 331 hdr->verf.committed = oir->committed;
333 } else { 332 } else {
334 wdata->header->pnfs_error = status; 333 hdr->pnfs_error = status;
335 } 334 }
336 objlayout_iodone(oir); 335 objlayout_iodone(oir);
337 /* must not use oir after this point */ 336 /* must not use oir after this point */
338 337
339 dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, 338 dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
340 status, wdata->verf.committed, sync); 339 status, hdr->verf.committed, sync);
341 340
342 if (sync) 341 if (sync)
343 pnfs_ld_write_done(wdata); 342 pnfs_ld_write_done(hdr);
344 else { 343 else {
345 INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); 344 INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
346 schedule_work(&wdata->task.u.tk_work); 345 schedule_work(&hdr->task.u.tk_work);
347 } 346 }
348} 347}
349 348
@@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
351 * Perform sync or async writes. 350 * Perform sync or async writes.
352 */ 351 */
353enum pnfs_try_status 352enum pnfs_try_status
354objlayout_write_pagelist(struct nfs_pgio_data *wdata, 353objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
355 int how)
356{ 354{
357 struct nfs_pgio_header *hdr = wdata->header;
358 int err; 355 int err;
359 356
360 _fix_verify_io_params(hdr->lseg, &wdata->args.pages, 357 _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
361 &wdata->args.pgbase, 358 &hdr->args.pgbase,
362 wdata->args.offset, wdata->args.count); 359 hdr->args.offset, hdr->args.count);
363 360
364 err = objio_write_pagelist(wdata, how); 361 err = objio_write_pagelist(hdr, how);
365 if (unlikely(err)) { 362 if (unlikely(err)) {
366 hdr->pnfs_error = err; 363 hdr->pnfs_error = err;
367 dprintk("%s: Returned Error %d\n", __func__, err); 364 dprintk("%s: Returned Error %d\n", __func__, err);
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 01e041029a6c..fd13f1d2f136 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
119 */ 119 */
120extern void objio_free_result(struct objlayout_io_res *oir); 120extern void objio_free_result(struct objlayout_io_res *oir);
121 121
122extern int objio_read_pagelist(struct nfs_pgio_data *rdata); 122extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
123extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); 123extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
124 124
125/* 125/*
126 * callback API 126 * callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
168extern void objlayout_free_lseg(struct pnfs_layout_segment *); 168extern void objlayout_free_lseg(struct pnfs_layout_segment *);
169 169
170extern enum pnfs_try_status objlayout_read_pagelist( 170extern enum pnfs_try_status objlayout_read_pagelist(
171 struct nfs_pgio_data *); 171 struct nfs_pgio_header *);
172 172
173extern enum pnfs_try_status objlayout_write_pagelist( 173extern enum pnfs_try_status objlayout_write_pagelist(
174 struct nfs_pgio_data *, 174 struct nfs_pgio_header *,
175 int how); 175 int how);
176 176
177extern void objlayout_encode_layoutcommit( 177extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 0be5050638f7..ba491926df5f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -141,16 +141,24 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
141 * @req - request in group that is to be locked 141 * @req - request in group that is to be locked
142 * 142 *
143 * this lock must be held if modifying the page group list 143 * this lock must be held if modifying the page group list
144 *
145 * returns result from wait_on_bit_lock: 0 on success, < 0 on error
144 */ 146 */
145void 147int
146nfs_page_group_lock(struct nfs_page *req) 148nfs_page_group_lock(struct nfs_page *req, bool wait)
147{ 149{
148 struct nfs_page *head = req->wb_head; 150 struct nfs_page *head = req->wb_head;
151 int ret;
149 152
150 WARN_ON_ONCE(head != head->wb_head); 153 WARN_ON_ONCE(head != head->wb_head);
151 154
152 wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, 155 do {
156 ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
153 TASK_UNINTERRUPTIBLE); 157 TASK_UNINTERRUPTIBLE);
158 } while (wait && ret != 0);
159
160 WARN_ON_ONCE(ret > 0);
161 return ret;
154} 162}
155 163
156/* 164/*
@@ -211,7 +219,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
211{ 219{
212 bool ret; 220 bool ret;
213 221
214 nfs_page_group_lock(req); 222 nfs_page_group_lock(req, true);
215 ret = nfs_page_group_sync_on_bit_locked(req, bit); 223 ret = nfs_page_group_sync_on_bit_locked(req, bit);
216 nfs_page_group_unlock(req); 224 nfs_page_group_unlock(req);
217 225
@@ -454,123 +462,72 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
454} 462}
455EXPORT_SYMBOL_GPL(nfs_generic_pg_test); 463EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
456 464
457static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) 465struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
458{
459 return container_of(hdr, struct nfs_rw_header, header);
460}
461
462/**
463 * nfs_rw_header_alloc - Allocate a header for a read or write
464 * @ops: Read or write function vector
465 */
466struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
467{ 466{
468 struct nfs_rw_header *header = ops->rw_alloc_header(); 467 struct nfs_pgio_header *hdr = ops->rw_alloc_header();
469
470 if (header) {
471 struct nfs_pgio_header *hdr = &header->header;
472 468
469 if (hdr) {
473 INIT_LIST_HEAD(&hdr->pages); 470 INIT_LIST_HEAD(&hdr->pages);
474 spin_lock_init(&hdr->lock); 471 spin_lock_init(&hdr->lock);
475 atomic_set(&hdr->refcnt, 0);
476 hdr->rw_ops = ops; 472 hdr->rw_ops = ops;
477 } 473 }
478 return header; 474 return hdr;
479} 475}
480EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); 476EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc);
481 477
482/* 478/*
483 * nfs_rw_header_free - Free a read or write header 479 * nfs_pgio_header_free - Free a read or write header
484 * @hdr: The header to free 480 * @hdr: The header to free
485 */ 481 */
486void nfs_rw_header_free(struct nfs_pgio_header *hdr) 482void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
487{ 483{
488 hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); 484 hdr->rw_ops->rw_free_header(hdr);
489} 485}
490EXPORT_SYMBOL_GPL(nfs_rw_header_free); 486EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
491 487
492/** 488/**
493 * nfs_pgio_data_alloc - Allocate pageio data 489 * nfs_pgio_data_destroy - make @hdr suitable for reuse
494 * @hdr: The header making a request 490 *
495 * @pagecount: Number of pages to create 491 * Frees memory and releases refs from nfs_generic_pgio, so that it may
496 */ 492 * be called again.
497static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, 493 *
498 unsigned int pagecount) 494 * @hdr: A header that has had nfs_generic_pgio called
499{
500 struct nfs_pgio_data *data, *prealloc;
501
502 prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
503 if (prealloc->header == NULL)
504 data = prealloc;
505 else
506 data = kzalloc(sizeof(*data), GFP_KERNEL);
507 if (!data)
508 goto out;
509
510 if (nfs_pgarray_set(&data->pages, pagecount)) {
511 data->header = hdr;
512 atomic_inc(&hdr->refcnt);
513 } else {
514 if (data != prealloc)
515 kfree(data);
516 data = NULL;
517 }
518out:
519 return data;
520}
521
522/**
523 * nfs_pgio_data_release - Properly free pageio data
524 * @data: The data to release
525 */ 495 */
526void nfs_pgio_data_release(struct nfs_pgio_data *data) 496void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
527{ 497{
528 struct nfs_pgio_header *hdr = data->header; 498 put_nfs_open_context(hdr->args.context);
529 struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); 499 if (hdr->page_array.pagevec != hdr->page_array.page_array)
530 500 kfree(hdr->page_array.pagevec);
531 put_nfs_open_context(data->args.context);
532 if (data->pages.pagevec != data->pages.page_array)
533 kfree(data->pages.pagevec);
534 if (data == &pageio_header->rpc_data) {
535 data->header = NULL;
536 data = NULL;
537 }
538 if (atomic_dec_and_test(&hdr->refcnt))
539 hdr->completion_ops->completion(hdr);
540 /* Note: we only free the rpc_task after callbacks are done.
541 * See the comment in rpc_free_task() for why
542 */
543 kfree(data);
544} 501}
545EXPORT_SYMBOL_GPL(nfs_pgio_data_release); 502EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
546 503
547/** 504/**
548 * nfs_pgio_rpcsetup - Set up arguments for a pageio call 505 * nfs_pgio_rpcsetup - Set up arguments for a pageio call
549 * @data: The pageio data 506 * @hdr: The pageio hdr
550 * @count: Number of bytes to read 507 * @count: Number of bytes to read
551 * @offset: Initial offset 508 * @offset: Initial offset
552 * @how: How to commit data (writes only) 509 * @how: How to commit data (writes only)
553 * @cinfo: Commit information for the call (writes only) 510 * @cinfo: Commit information for the call (writes only)
554 */ 511 */
555static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, 512static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
556 unsigned int count, unsigned int offset, 513 unsigned int count, unsigned int offset,
557 int how, struct nfs_commit_info *cinfo) 514 int how, struct nfs_commit_info *cinfo)
558{ 515{
559 struct nfs_page *req = data->header->req; 516 struct nfs_page *req = hdr->req;
560 517
561 /* Set up the RPC argument and reply structs 518 /* Set up the RPC argument and reply structs
562 * NB: take care not to mess about with data->commit et al. */ 519 * NB: take care not to mess about with hdr->commit et al. */
563 520
564 data->args.fh = NFS_FH(data->header->inode); 521 hdr->args.fh = NFS_FH(hdr->inode);
565 data->args.offset = req_offset(req) + offset; 522 hdr->args.offset = req_offset(req) + offset;
566 /* pnfs_set_layoutcommit needs this */ 523 /* pnfs_set_layoutcommit needs this */
567 data->mds_offset = data->args.offset; 524 hdr->mds_offset = hdr->args.offset;
568 data->args.pgbase = req->wb_pgbase + offset; 525 hdr->args.pgbase = req->wb_pgbase + offset;
569 data->args.pages = data->pages.pagevec; 526 hdr->args.pages = hdr->page_array.pagevec;
570 data->args.count = count; 527 hdr->args.count = count;
571 data->args.context = get_nfs_open_context(req->wb_context); 528 hdr->args.context = get_nfs_open_context(req->wb_context);
572 data->args.lock_context = req->wb_lock_context; 529 hdr->args.lock_context = req->wb_lock_context;
573 data->args.stable = NFS_UNSTABLE; 530 hdr->args.stable = NFS_UNSTABLE;
574 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { 531 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
575 case 0: 532 case 0:
576 break; 533 break;
@@ -578,59 +535,59 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
578 if (nfs_reqs_to_commit(cinfo)) 535 if (nfs_reqs_to_commit(cinfo))
579 break; 536 break;
580 default: 537 default:
581 data->args.stable = NFS_FILE_SYNC; 538 hdr->args.stable = NFS_FILE_SYNC;
582 } 539 }
583 540
584 data->res.fattr = &data->fattr; 541 hdr->res.fattr = &hdr->fattr;
585 data->res.count = count; 542 hdr->res.count = count;
586 data->res.eof = 0; 543 hdr->res.eof = 0;
587 data->res.verf = &data->verf; 544 hdr->res.verf = &hdr->verf;
588 nfs_fattr_init(&data->fattr); 545 nfs_fattr_init(&hdr->fattr);
589} 546}
590 547
591/** 548/**
592 * nfs_pgio_prepare - Prepare pageio data to go over the wire 549 * nfs_pgio_prepare - Prepare pageio hdr to go over the wire
593 * @task: The current task 550 * @task: The current task
594 * @calldata: pageio data to prepare 551 * @calldata: pageio header to prepare
595 */ 552 */
596static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) 553static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
597{ 554{
598 struct nfs_pgio_data *data = calldata; 555 struct nfs_pgio_header *hdr = calldata;
599 int err; 556 int err;
600 err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); 557 err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr);
601 if (err) 558 if (err)
602 rpc_exit(task, err); 559 rpc_exit(task, err);
603} 560}
604 561
605int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, 562int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
606 const struct rpc_call_ops *call_ops, int how, int flags) 563 const struct rpc_call_ops *call_ops, int how, int flags)
607{ 564{
608 struct rpc_task *task; 565 struct rpc_task *task;
609 struct rpc_message msg = { 566 struct rpc_message msg = {
610 .rpc_argp = &data->args, 567 .rpc_argp = &hdr->args,
611 .rpc_resp = &data->res, 568 .rpc_resp = &hdr->res,
612 .rpc_cred = data->header->cred, 569 .rpc_cred = hdr->cred,
613 }; 570 };
614 struct rpc_task_setup task_setup_data = { 571 struct rpc_task_setup task_setup_data = {
615 .rpc_client = clnt, 572 .rpc_client = clnt,
616 .task = &data->task, 573 .task = &hdr->task,
617 .rpc_message = &msg, 574 .rpc_message = &msg,
618 .callback_ops = call_ops, 575 .callback_ops = call_ops,
619 .callback_data = data, 576 .callback_data = hdr,
620 .workqueue = nfsiod_workqueue, 577 .workqueue = nfsiod_workqueue,
621 .flags = RPC_TASK_ASYNC | flags, 578 .flags = RPC_TASK_ASYNC | flags,
622 }; 579 };
623 int ret = 0; 580 int ret = 0;
624 581
625 data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); 582 hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how);
626 583
627 dprintk("NFS: %5u initiated pgio call " 584 dprintk("NFS: %5u initiated pgio call "
628 "(req %s/%llu, %u bytes @ offset %llu)\n", 585 "(req %s/%llu, %u bytes @ offset %llu)\n",
629 data->task.tk_pid, 586 hdr->task.tk_pid,
630 data->header->inode->i_sb->s_id, 587 hdr->inode->i_sb->s_id,
631 (unsigned long long)NFS_FILEID(data->header->inode), 588 (unsigned long long)NFS_FILEID(hdr->inode),
632 data->args.count, 589 hdr->args.count,
633 (unsigned long long)data->args.offset); 590 (unsigned long long)hdr->args.offset);
634 591
635 task = rpc_run_task(&task_setup_data); 592 task = rpc_run_task(&task_setup_data);
636 if (IS_ERR(task)) { 593 if (IS_ERR(task)) {
@@ -657,22 +614,23 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
657 struct nfs_pgio_header *hdr) 614 struct nfs_pgio_header *hdr)
658{ 615{
659 set_bit(NFS_IOHDR_REDO, &hdr->flags); 616 set_bit(NFS_IOHDR_REDO, &hdr->flags);
660 nfs_pgio_data_release(hdr->data); 617 nfs_pgio_data_destroy(hdr);
661 hdr->data = NULL; 618 hdr->completion_ops->completion(hdr);
662 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 619 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
663 return -ENOMEM; 620 return -ENOMEM;
664} 621}
665 622
666/** 623/**
667 * nfs_pgio_release - Release pageio data 624 * nfs_pgio_release - Release pageio data
668 * @calldata: The pageio data to release 625 * @calldata: The pageio header to release
669 */ 626 */
670static void nfs_pgio_release(void *calldata) 627static void nfs_pgio_release(void *calldata)
671{ 628{
672 struct nfs_pgio_data *data = calldata; 629 struct nfs_pgio_header *hdr = calldata;
673 if (data->header->rw_ops->rw_release) 630 if (hdr->rw_ops->rw_release)
674 data->header->rw_ops->rw_release(data); 631 hdr->rw_ops->rw_release(hdr);
675 nfs_pgio_data_release(data); 632 nfs_pgio_data_destroy(hdr);
633 hdr->completion_ops->completion(hdr);
676} 634}
677 635
678/** 636/**
@@ -713,22 +671,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init);
713/** 671/**
714 * nfs_pgio_result - Basic pageio error handling 672 * nfs_pgio_result - Basic pageio error handling
715 * @task: The task that ran 673 * @task: The task that ran
716 * @calldata: Pageio data to check 674 * @calldata: Pageio header to check
717 */ 675 */
718static void nfs_pgio_result(struct rpc_task *task, void *calldata) 676static void nfs_pgio_result(struct rpc_task *task, void *calldata)
719{ 677{
720 struct nfs_pgio_data *data = calldata; 678 struct nfs_pgio_header *hdr = calldata;
721 struct inode *inode = data->header->inode; 679 struct inode *inode = hdr->inode;
722 680
723 dprintk("NFS: %s: %5u, (status %d)\n", __func__, 681 dprintk("NFS: %s: %5u, (status %d)\n", __func__,
724 task->tk_pid, task->tk_status); 682 task->tk_pid, task->tk_status);
725 683
726 if (data->header->rw_ops->rw_done(task, data, inode) != 0) 684 if (hdr->rw_ops->rw_done(task, hdr, inode) != 0)
727 return; 685 return;
728 if (task->tk_status < 0) 686 if (task->tk_status < 0)
729 nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); 687 nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset);
730 else 688 else
731 data->header->rw_ops->rw_result(task, data); 689 hdr->rw_ops->rw_result(task, hdr);
732} 690}
733 691
734/* 692/*
@@ -744,17 +702,16 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
744{ 702{
745 struct nfs_page *req; 703 struct nfs_page *req;
746 struct page **pages; 704 struct page **pages;
747 struct nfs_pgio_data *data;
748 struct list_head *head = &desc->pg_list; 705 struct list_head *head = &desc->pg_list;
749 struct nfs_commit_info cinfo; 706 struct nfs_commit_info cinfo;
707 unsigned int pagecount;
750 708
751 data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, 709 pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count);
752 desc->pg_count)); 710 if (!nfs_pgarray_set(&hdr->page_array, pagecount))
753 if (!data)
754 return nfs_pgio_error(desc, hdr); 711 return nfs_pgio_error(desc, hdr);
755 712
756 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); 713 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
757 pages = data->pages.pagevec; 714 pages = hdr->page_array.pagevec;
758 while (!list_empty(head)) { 715 while (!list_empty(head)) {
759 req = nfs_list_entry(head->next); 716 req = nfs_list_entry(head->next);
760 nfs_list_remove_request(req); 717 nfs_list_remove_request(req);
@@ -767,8 +724,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
767 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 724 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
768 725
769 /* Set up the argument struct */ 726 /* Set up the argument struct */
770 nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); 727 nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
771 hdr->data = data;
772 desc->pg_rpc_callops = &nfs_pgio_common_ops; 728 desc->pg_rpc_callops = &nfs_pgio_common_ops;
773 return 0; 729 return 0;
774} 730}
@@ -776,25 +732,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
776 732
777static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) 733static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
778{ 734{
779 struct nfs_rw_header *rw_hdr;
780 struct nfs_pgio_header *hdr; 735 struct nfs_pgio_header *hdr;
781 int ret; 736 int ret;
782 737
783 rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); 738 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
784 if (!rw_hdr) { 739 if (!hdr) {
785 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 740 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
786 return -ENOMEM; 741 return -ENOMEM;
787 } 742 }
788 hdr = &rw_hdr->header; 743 nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
789 nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
790 atomic_inc(&hdr->refcnt);
791 ret = nfs_generic_pgio(desc, hdr); 744 ret = nfs_generic_pgio(desc, hdr);
792 if (ret == 0) 745 if (ret == 0)
793 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), 746 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
794 hdr->data, desc->pg_rpc_callops, 747 hdr, desc->pg_rpc_callops,
795 desc->pg_ioflags, 0); 748 desc->pg_ioflags, 0);
796 if (atomic_dec_and_test(&hdr->refcnt))
797 hdr->completion_ops->completion(hdr);
798 return ret; 749 return ret;
799} 750}
800 751
@@ -907,8 +858,13 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
907 struct nfs_page *subreq; 858 struct nfs_page *subreq;
908 unsigned int bytes_left = 0; 859 unsigned int bytes_left = 0;
909 unsigned int offset, pgbase; 860 unsigned int offset, pgbase;
861 int ret;
910 862
911 nfs_page_group_lock(req); 863 ret = nfs_page_group_lock(req, false);
864 if (ret < 0) {
865 desc->pg_error = ret;
866 return 0;
867 }
912 868
913 subreq = req; 869 subreq = req;
914 bytes_left = subreq->wb_bytes; 870 bytes_left = subreq->wb_bytes;
@@ -930,7 +886,11 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
930 if (desc->pg_recoalesce) 886 if (desc->pg_recoalesce)
931 return 0; 887 return 0;
932 /* retry add_request for this subreq */ 888 /* retry add_request for this subreq */
933 nfs_page_group_lock(req); 889 ret = nfs_page_group_lock(req, false);
890 if (ret < 0) {
891 desc->pg_error = ret;
892 return 0;
893 }
934 continue; 894 continue;
935 } 895 }
936 896
@@ -1005,7 +965,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1005 } while (ret); 965 } while (ret);
1006 return ret; 966 return ret;
1007} 967}
1008EXPORT_SYMBOL_GPL(nfs_pageio_add_request); 968
969/*
970 * nfs_pageio_resend - Transfer requests to new descriptor and resend
971 * @hdr - the pgio header to move request from
972 * @desc - the pageio descriptor to add requests to
973 *
974 * Try to move each request (nfs_page) from @hdr to @desc then attempt
975 * to send them.
976 *
977 * Returns 0 on success and < 0 on error.
978 */
979int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
980 struct nfs_pgio_header *hdr)
981{
982 LIST_HEAD(failed);
983
984 desc->pg_dreq = hdr->dreq;
985 while (!list_empty(&hdr->pages)) {
986 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
987
988 nfs_list_remove_request(req);
989 if (!nfs_pageio_add_request(desc, req))
990 nfs_list_add_request(req, &failed);
991 }
992 nfs_pageio_complete(desc);
993 if (!list_empty(&failed)) {
994 list_move(&failed, &hdr->pages);
995 return -EIO;
996 }
997 return 0;
998}
999EXPORT_SYMBOL_GPL(nfs_pageio_resend);
1009 1000
1010/** 1001/**
1011 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor 1002 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
@@ -1021,7 +1012,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
1021 break; 1012 break;
1022 } 1013 }
1023} 1014}
1024EXPORT_SYMBOL_GPL(nfs_pageio_complete);
1025 1015
1026/** 1016/**
1027 * nfs_pageio_cond_complete - Conditional I/O completion 1017 * nfs_pageio_cond_complete - Conditional I/O completion
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a8914b335617..a3851debf8a2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,6 +361,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
361} 361}
362EXPORT_SYMBOL_GPL(pnfs_put_lseg); 362EXPORT_SYMBOL_GPL(pnfs_put_lseg);
363 363
364static void pnfs_put_lseg_async_work(struct work_struct *work)
365{
366 struct pnfs_layout_segment *lseg;
367
368 lseg = container_of(work, struct pnfs_layout_segment, pls_work);
369
370 pnfs_put_lseg(lseg);
371}
372
373void
374pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
375{
376 INIT_WORK(&lseg->pls_work, pnfs_put_lseg_async_work);
377 schedule_work(&lseg->pls_work);
378}
379EXPORT_SYMBOL_GPL(pnfs_put_lseg_async);
380
364static u64 381static u64
365end_offset(u64 start, u64 len) 382end_offset(u64 start, u64 len)
366{ 383{
@@ -1470,41 +1487,19 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1470} 1487}
1471EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1488EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1472 1489
1473int pnfs_write_done_resend_to_mds(struct inode *inode, 1490int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
1474 struct list_head *head,
1475 const struct nfs_pgio_completion_ops *compl_ops,
1476 struct nfs_direct_req *dreq)
1477{ 1491{
1478 struct nfs_pageio_descriptor pgio; 1492 struct nfs_pageio_descriptor pgio;
1479 LIST_HEAD(failed);
1480 1493
1481 /* Resend all requests through the MDS */ 1494 /* Resend all requests through the MDS */
1482 nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); 1495 nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
1483 pgio.pg_dreq = dreq; 1496 hdr->completion_ops);
1484 while (!list_empty(head)) { 1497 return nfs_pageio_resend(&pgio, hdr);
1485 struct nfs_page *req = nfs_list_entry(head->next);
1486
1487 nfs_list_remove_request(req);
1488 if (!nfs_pageio_add_request(&pgio, req))
1489 nfs_list_add_request(req, &failed);
1490 }
1491 nfs_pageio_complete(&pgio);
1492
1493 if (!list_empty(&failed)) {
1494 /* For some reason our attempt to resend pages. Mark the
1495 * overall send request as having failed, and let
1496 * nfs_writeback_release_full deal with the error.
1497 */
1498 list_move(&failed, head);
1499 return -EIO;
1500 }
1501 return 0;
1502} 1498}
1503EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 1499EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1504 1500
1505static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) 1501static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
1506{ 1502{
1507 struct nfs_pgio_header *hdr = data->header;
1508 1503
1509 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 1504 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1510 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1505 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
@@ -1512,50 +1507,42 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
1512 pnfs_return_layout(hdr->inode); 1507 pnfs_return_layout(hdr->inode);
1513 } 1508 }
1514 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1509 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1515 data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, 1510 hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
1516 &hdr->pages,
1517 hdr->completion_ops,
1518 hdr->dreq);
1519} 1511}
1520 1512
1521/* 1513/*
1522 * Called by non rpc-based layout drivers 1514 * Called by non rpc-based layout drivers
1523 */ 1515 */
1524void pnfs_ld_write_done(struct nfs_pgio_data *data) 1516void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
1525{ 1517{
1526 struct nfs_pgio_header *hdr = data->header; 1518 trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
1527
1528 trace_nfs4_pnfs_write(data, hdr->pnfs_error);
1529 if (!hdr->pnfs_error) { 1519 if (!hdr->pnfs_error) {
1530 pnfs_set_layoutcommit(data); 1520 pnfs_set_layoutcommit(hdr);
1531 hdr->mds_ops->rpc_call_done(&data->task, data); 1521 hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
1532 } else 1522 } else
1533 pnfs_ld_handle_write_error(data); 1523 pnfs_ld_handle_write_error(hdr);
1534 hdr->mds_ops->rpc_release(data); 1524 hdr->mds_ops->rpc_release(hdr);
1535} 1525}
1536EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1526EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1537 1527
1538static void 1528static void
1539pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1529pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1540 struct nfs_pgio_data *data) 1530 struct nfs_pgio_header *hdr)
1541{ 1531{
1542 struct nfs_pgio_header *hdr = data->header;
1543
1544 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1532 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1545 list_splice_tail_init(&hdr->pages, &desc->pg_list); 1533 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1546 nfs_pageio_reset_write_mds(desc); 1534 nfs_pageio_reset_write_mds(desc);
1547 desc->pg_recoalesce = 1; 1535 desc->pg_recoalesce = 1;
1548 } 1536 }
1549 nfs_pgio_data_release(data); 1537 nfs_pgio_data_destroy(hdr);
1550} 1538}
1551 1539
1552static enum pnfs_try_status 1540static enum pnfs_try_status
1553pnfs_try_to_write_data(struct nfs_pgio_data *wdata, 1541pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
1554 const struct rpc_call_ops *call_ops, 1542 const struct rpc_call_ops *call_ops,
1555 struct pnfs_layout_segment *lseg, 1543 struct pnfs_layout_segment *lseg,
1556 int how) 1544 int how)
1557{ 1545{
1558 struct nfs_pgio_header *hdr = wdata->header;
1559 struct inode *inode = hdr->inode; 1546 struct inode *inode = hdr->inode;
1560 enum pnfs_try_status trypnfs; 1547 enum pnfs_try_status trypnfs;
1561 struct nfs_server *nfss = NFS_SERVER(inode); 1548 struct nfs_server *nfss = NFS_SERVER(inode);
@@ -1563,8 +1550,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
1563 hdr->mds_ops = call_ops; 1550 hdr->mds_ops = call_ops;
1564 1551
1565 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1552 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1566 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1553 inode->i_ino, hdr->args.count, hdr->args.offset, how);
1567 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); 1554 trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
1568 if (trypnfs != PNFS_NOT_ATTEMPTED) 1555 if (trypnfs != PNFS_NOT_ATTEMPTED)
1569 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 1556 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
1570 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1557 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1575,139 +1562,105 @@ static void
1575pnfs_do_write(struct nfs_pageio_descriptor *desc, 1562pnfs_do_write(struct nfs_pageio_descriptor *desc,
1576 struct nfs_pgio_header *hdr, int how) 1563 struct nfs_pgio_header *hdr, int how)
1577{ 1564{
1578 struct nfs_pgio_data *data = hdr->data;
1579 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1565 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1580 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1566 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1581 enum pnfs_try_status trypnfs; 1567 enum pnfs_try_status trypnfs;
1582 1568
1583 desc->pg_lseg = NULL; 1569 desc->pg_lseg = NULL;
1584 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); 1570 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
1585 if (trypnfs == PNFS_NOT_ATTEMPTED) 1571 if (trypnfs == PNFS_NOT_ATTEMPTED)
1586 pnfs_write_through_mds(desc, data); 1572 pnfs_write_through_mds(desc, hdr);
1587 pnfs_put_lseg(lseg); 1573 pnfs_put_lseg(lseg);
1588} 1574}
1589 1575
1590static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 1576static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1591{ 1577{
1592 pnfs_put_lseg(hdr->lseg); 1578 pnfs_put_lseg(hdr->lseg);
1593 nfs_rw_header_free(hdr); 1579 nfs_pgio_header_free(hdr);
1594} 1580}
1595EXPORT_SYMBOL_GPL(pnfs_writehdr_free); 1581EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
1596 1582
1597int 1583int
1598pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1584pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1599{ 1585{
1600 struct nfs_rw_header *whdr;
1601 struct nfs_pgio_header *hdr; 1586 struct nfs_pgio_header *hdr;
1602 int ret; 1587 int ret;
1603 1588
1604 whdr = nfs_rw_header_alloc(desc->pg_rw_ops); 1589 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
1605 if (!whdr) { 1590 if (!hdr) {
1606 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1591 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1607 pnfs_put_lseg(desc->pg_lseg); 1592 pnfs_put_lseg(desc->pg_lseg);
1608 desc->pg_lseg = NULL; 1593 desc->pg_lseg = NULL;
1609 return -ENOMEM; 1594 return -ENOMEM;
1610 } 1595 }
1611 hdr = &whdr->header;
1612 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 1596 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1613 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1597 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1614 atomic_inc(&hdr->refcnt);
1615 ret = nfs_generic_pgio(desc, hdr); 1598 ret = nfs_generic_pgio(desc, hdr);
1616 if (ret != 0) { 1599 if (ret != 0) {
1617 pnfs_put_lseg(desc->pg_lseg); 1600 pnfs_put_lseg(desc->pg_lseg);
1618 desc->pg_lseg = NULL; 1601 desc->pg_lseg = NULL;
1619 } else 1602 } else
1620 pnfs_do_write(desc, hdr, desc->pg_ioflags); 1603 pnfs_do_write(desc, hdr, desc->pg_ioflags);
1621 if (atomic_dec_and_test(&hdr->refcnt))
1622 hdr->completion_ops->completion(hdr);
1623 return ret; 1604 return ret;
1624} 1605}
1625EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1606EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1626 1607
1627int pnfs_read_done_resend_to_mds(struct inode *inode, 1608int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
1628 struct list_head *head,
1629 const struct nfs_pgio_completion_ops *compl_ops,
1630 struct nfs_direct_req *dreq)
1631{ 1609{
1632 struct nfs_pageio_descriptor pgio; 1610 struct nfs_pageio_descriptor pgio;
1633 LIST_HEAD(failed);
1634 1611
1635 /* Resend all requests through the MDS */ 1612 /* Resend all requests through the MDS */
1636 nfs_pageio_init_read(&pgio, inode, true, compl_ops); 1613 nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
1637 pgio.pg_dreq = dreq; 1614 return nfs_pageio_resend(&pgio, hdr);
1638 while (!list_empty(head)) {
1639 struct nfs_page *req = nfs_list_entry(head->next);
1640
1641 nfs_list_remove_request(req);
1642 if (!nfs_pageio_add_request(&pgio, req))
1643 nfs_list_add_request(req, &failed);
1644 }
1645 nfs_pageio_complete(&pgio);
1646
1647 if (!list_empty(&failed)) {
1648 list_move(&failed, head);
1649 return -EIO;
1650 }
1651 return 0;
1652} 1615}
1653EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 1616EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1654 1617
1655static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) 1618static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
1656{ 1619{
1657 struct nfs_pgio_header *hdr = data->header;
1658
1659 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 1620 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1660 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1621 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1661 PNFS_LAYOUTRET_ON_ERROR) { 1622 PNFS_LAYOUTRET_ON_ERROR) {
1662 pnfs_return_layout(hdr->inode); 1623 pnfs_return_layout(hdr->inode);
1663 } 1624 }
1664 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1625 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1665 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, 1626 hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
1666 &hdr->pages,
1667 hdr->completion_ops,
1668 hdr->dreq);
1669} 1627}
1670 1628
1671/* 1629/*
1672 * Called by non rpc-based layout drivers 1630 * Called by non rpc-based layout drivers
1673 */ 1631 */
1674void pnfs_ld_read_done(struct nfs_pgio_data *data) 1632void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
1675{ 1633{
1676 struct nfs_pgio_header *hdr = data->header; 1634 trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
1677
1678 trace_nfs4_pnfs_read(data, hdr->pnfs_error);
1679 if (likely(!hdr->pnfs_error)) { 1635 if (likely(!hdr->pnfs_error)) {
1680 __nfs4_read_done_cb(data); 1636 __nfs4_read_done_cb(hdr);
1681 hdr->mds_ops->rpc_call_done(&data->task, data); 1637 hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
1682 } else 1638 } else
1683 pnfs_ld_handle_read_error(data); 1639 pnfs_ld_handle_read_error(hdr);
1684 hdr->mds_ops->rpc_release(data); 1640 hdr->mds_ops->rpc_release(hdr);
1685} 1641}
1686EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1642EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1687 1643
1688static void 1644static void
1689pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1645pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1690 struct nfs_pgio_data *data) 1646 struct nfs_pgio_header *hdr)
1691{ 1647{
1692 struct nfs_pgio_header *hdr = data->header;
1693
1694 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1648 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1695 list_splice_tail_init(&hdr->pages, &desc->pg_list); 1649 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1696 nfs_pageio_reset_read_mds(desc); 1650 nfs_pageio_reset_read_mds(desc);
1697 desc->pg_recoalesce = 1; 1651 desc->pg_recoalesce = 1;
1698 } 1652 }
1699 nfs_pgio_data_release(data); 1653 nfs_pgio_data_destroy(hdr);
1700} 1654}
1701 1655
1702/* 1656/*
1703 * Call the appropriate parallel I/O subsystem read function. 1657 * Call the appropriate parallel I/O subsystem read function.
1704 */ 1658 */
1705static enum pnfs_try_status 1659static enum pnfs_try_status
1706pnfs_try_to_read_data(struct nfs_pgio_data *rdata, 1660pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
1707 const struct rpc_call_ops *call_ops, 1661 const struct rpc_call_ops *call_ops,
1708 struct pnfs_layout_segment *lseg) 1662 struct pnfs_layout_segment *lseg)
1709{ 1663{
1710 struct nfs_pgio_header *hdr = rdata->header;
1711 struct inode *inode = hdr->inode; 1664 struct inode *inode = hdr->inode;
1712 struct nfs_server *nfss = NFS_SERVER(inode); 1665 struct nfs_server *nfss = NFS_SERVER(inode);
1713 enum pnfs_try_status trypnfs; 1666 enum pnfs_try_status trypnfs;
@@ -1715,9 +1668,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
1715 hdr->mds_ops = call_ops; 1668 hdr->mds_ops = call_ops;
1716 1669
1717 dprintk("%s: Reading ino:%lu %u@%llu\n", 1670 dprintk("%s: Reading ino:%lu %u@%llu\n",
1718 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1671 __func__, inode->i_ino, hdr->args.count, hdr->args.offset);
1719 1672
1720 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); 1673 trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
1721 if (trypnfs != PNFS_NOT_ATTEMPTED) 1674 if (trypnfs != PNFS_NOT_ATTEMPTED)
1722 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 1675 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1723 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1676 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1727,52 +1680,46 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
1727static void 1680static void
1728pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 1681pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
1729{ 1682{
1730 struct nfs_pgio_data *data = hdr->data;
1731 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1683 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1732 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1684 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1733 enum pnfs_try_status trypnfs; 1685 enum pnfs_try_status trypnfs;
1734 1686
1735 desc->pg_lseg = NULL; 1687 desc->pg_lseg = NULL;
1736 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1688 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
1737 if (trypnfs == PNFS_NOT_ATTEMPTED) 1689 if (trypnfs == PNFS_NOT_ATTEMPTED)
1738 pnfs_read_through_mds(desc, data); 1690 pnfs_read_through_mds(desc, hdr);
1739 pnfs_put_lseg(lseg); 1691 pnfs_put_lseg(lseg);
1740} 1692}
1741 1693
1742static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 1694static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1743{ 1695{
1744 pnfs_put_lseg(hdr->lseg); 1696 pnfs_put_lseg(hdr->lseg);
1745 nfs_rw_header_free(hdr); 1697 nfs_pgio_header_free(hdr);
1746} 1698}
1747EXPORT_SYMBOL_GPL(pnfs_readhdr_free); 1699EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
1748 1700
1749int 1701int
1750pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1702pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1751{ 1703{
1752 struct nfs_rw_header *rhdr;
1753 struct nfs_pgio_header *hdr; 1704 struct nfs_pgio_header *hdr;
1754 int ret; 1705 int ret;
1755 1706
1756 rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); 1707 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
1757 if (!rhdr) { 1708 if (!hdr) {
1758 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1709 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1759 ret = -ENOMEM; 1710 ret = -ENOMEM;
1760 pnfs_put_lseg(desc->pg_lseg); 1711 pnfs_put_lseg(desc->pg_lseg);
1761 desc->pg_lseg = NULL; 1712 desc->pg_lseg = NULL;
1762 return ret; 1713 return ret;
1763 } 1714 }
1764 hdr = &rhdr->header;
1765 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 1715 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1766 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1716 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1767 atomic_inc(&hdr->refcnt);
1768 ret = nfs_generic_pgio(desc, hdr); 1717 ret = nfs_generic_pgio(desc, hdr);
1769 if (ret != 0) { 1718 if (ret != 0) {
1770 pnfs_put_lseg(desc->pg_lseg); 1719 pnfs_put_lseg(desc->pg_lseg);
1771 desc->pg_lseg = NULL; 1720 desc->pg_lseg = NULL;
1772 } else 1721 } else
1773 pnfs_do_read(desc, hdr); 1722 pnfs_do_read(desc, hdr);
1774 if (atomic_dec_and_test(&hdr->refcnt))
1775 hdr->completion_ops->completion(hdr);
1776 return ret; 1723 return ret;
1777} 1724}
1778EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1725EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
@@ -1820,12 +1767,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1820EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 1767EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1821 1768
1822void 1769void
1823pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) 1770pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
1824{ 1771{
1825 struct nfs_pgio_header *hdr = wdata->header;
1826 struct inode *inode = hdr->inode; 1772 struct inode *inode = hdr->inode;
1827 struct nfs_inode *nfsi = NFS_I(inode); 1773 struct nfs_inode *nfsi = NFS_I(inode);
1828 loff_t end_pos = wdata->mds_offset + wdata->res.count; 1774 loff_t end_pos = hdr->mds_offset + hdr->res.count;
1829 bool mark_as_dirty = false; 1775 bool mark_as_dirty = false;
1830 1776
1831 spin_lock(&inode->i_lock); 1777 spin_lock(&inode->i_lock);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4fb309a2b4c4..aca3dff5dae6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -32,6 +32,7 @@
32 32
33#include <linux/nfs_fs.h> 33#include <linux/nfs_fs.h>
34#include <linux/nfs_page.h> 34#include <linux/nfs_page.h>
35#include <linux/workqueue.h>
35 36
36enum { 37enum {
37 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 38 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
@@ -46,6 +47,7 @@ struct pnfs_layout_segment {
46 atomic_t pls_refcount; 47 atomic_t pls_refcount;
47 unsigned long pls_flags; 48 unsigned long pls_flags;
48 struct pnfs_layout_hdr *pls_layout; 49 struct pnfs_layout_hdr *pls_layout;
50 struct work_struct pls_work;
49}; 51};
50 52
51enum pnfs_try_status { 53enum pnfs_try_status {
@@ -104,6 +106,8 @@ struct pnfs_layoutdriver_type {
104 int max); 106 int max);
105 void (*recover_commit_reqs) (struct list_head *list, 107 void (*recover_commit_reqs) (struct list_head *list,
106 struct nfs_commit_info *cinfo); 108 struct nfs_commit_info *cinfo);
109 struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
110 struct page *page);
107 int (*commit_pagelist)(struct inode *inode, 111 int (*commit_pagelist)(struct inode *inode,
108 struct list_head *mds_pages, 112 struct list_head *mds_pages,
109 int how, 113 int how,
@@ -113,8 +117,8 @@ struct pnfs_layoutdriver_type {
113 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 117 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
114 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS 118 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
115 */ 119 */
116 enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); 120 enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *);
117 enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); 121 enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int);
118 122
119 void (*free_deviceid_node) (struct nfs4_deviceid_node *); 123 void (*free_deviceid_node) (struct nfs4_deviceid_node *);
120 124
@@ -179,6 +183,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
179/* pnfs.c */ 183/* pnfs.c */
180void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); 184void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
181void pnfs_put_lseg(struct pnfs_layout_segment *lseg); 185void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
186void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg);
182 187
183void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); 188void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
184void unset_pnfs_layoutdriver(struct nfs_server *); 189void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -213,13 +218,13 @@ bool pnfs_roc(struct inode *ino);
213void pnfs_roc_release(struct inode *ino); 218void pnfs_roc_release(struct inode *ino);
214void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 219void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
215bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); 220bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
216void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); 221void pnfs_set_layoutcommit(struct nfs_pgio_header *);
217void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 222void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
218int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 223int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
219int _pnfs_return_layout(struct inode *); 224int _pnfs_return_layout(struct inode *);
220int pnfs_commit_and_return_layout(struct inode *); 225int pnfs_commit_and_return_layout(struct inode *);
221void pnfs_ld_write_done(struct nfs_pgio_data *); 226void pnfs_ld_write_done(struct nfs_pgio_header *);
222void pnfs_ld_read_done(struct nfs_pgio_data *); 227void pnfs_ld_read_done(struct nfs_pgio_header *);
223struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 228struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
224 struct nfs_open_context *ctx, 229 struct nfs_open_context *ctx,
225 loff_t pos, 230 loff_t pos,
@@ -228,12 +233,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
228 gfp_t gfp_flags); 233 gfp_t gfp_flags);
229 234
230void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); 235void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
231int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, 236int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *);
232 const struct nfs_pgio_completion_ops *compl_ops, 237int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
233 struct nfs_direct_req *dreq);
234int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
235 const struct nfs_pgio_completion_ops *compl_ops,
236 struct nfs_direct_req *dreq);
237struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); 238struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
238 239
239/* nfs4_deviceid_flags */ 240/* nfs4_deviceid_flags */
@@ -345,6 +346,17 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
345 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); 346 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
346} 347}
347 348
349static inline struct nfs_page *
350pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
351 struct page *page)
352{
353 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
354
355 if (ld == NULL || ld->search_commit_reqs == NULL)
356 return NULL;
357 return ld->search_commit_reqs(cinfo, page);
358}
359
348/* Should the pNFS client commit and return the layout upon a setattr */ 360/* Should the pNFS client commit and return the layout upon a setattr */
349static inline bool 361static inline bool
350pnfs_ld_layoutret_on_setattr(struct inode *inode) 362pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -410,6 +422,10 @@ static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg)
410{ 422{
411} 423}
412 424
425static inline void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
426{
427}
428
413static inline int pnfs_return_layout(struct inode *ino) 429static inline int pnfs_return_layout(struct inode *ino)
414{ 430{
415 return 0; 431 return 0;
@@ -496,6 +512,13 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
496{ 512{
497} 513}
498 514
515static inline struct nfs_page *
516pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
517 struct page *page)
518{
519 return NULL;
520}
521
499static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 522static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
500{ 523{
501 return 0; 524 return 0;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c171ce1a8a30..b09cc23d6f43 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
578 return 0; 578 return 0;
579} 579}
580 580
581static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) 581static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
582{ 582{
583 struct inode *inode = data->header->inode; 583 struct inode *inode = hdr->inode;
584 584
585 nfs_invalidate_atime(inode); 585 nfs_invalidate_atime(inode);
586 if (task->tk_status >= 0) { 586 if (task->tk_status >= 0) {
587 nfs_refresh_inode(inode, data->res.fattr); 587 nfs_refresh_inode(inode, hdr->res.fattr);
588 /* Emulate the eof flag, which isn't normally needed in NFSv2 588 /* Emulate the eof flag, which isn't normally needed in NFSv2
589 * as it is guaranteed to always return the file attributes 589 * as it is guaranteed to always return the file attributes
590 */ 590 */
591 if (data->args.offset + data->res.count >= data->res.fattr->size) 591 if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
592 data->res.eof = 1; 592 hdr->res.eof = 1;
593 } 593 }
594 return 0; 594 return 0;
595} 595}
596 596
597static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 597static void nfs_proc_read_setup(struct nfs_pgio_header *hdr,
598 struct rpc_message *msg)
598{ 599{
599 msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; 600 msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
600} 601}
601 602
602static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) 603static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
604 struct nfs_pgio_header *hdr)
603{ 605{
604 rpc_call_start(task); 606 rpc_call_start(task);
605 return 0; 607 return 0;
606} 608}
607 609
608static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) 610static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
609{ 611{
610 struct inode *inode = data->header->inode; 612 struct inode *inode = hdr->inode;
611 613
612 if (task->tk_status >= 0) 614 if (task->tk_status >= 0)
613 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); 615 nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
614 return 0; 616 return 0;
615} 617}
616 618
617static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 619static void nfs_proc_write_setup(struct nfs_pgio_header *hdr,
620 struct rpc_message *msg)
618{ 621{
619 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ 622 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
620 data->args.stable = NFS_FILE_SYNC; 623 hdr->args.stable = NFS_FILE_SYNC;
621 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; 624 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
622} 625}
623 626
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e818a475ca64..beff2769c5c5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops;
33 33
34static struct kmem_cache *nfs_rdata_cachep; 34static struct kmem_cache *nfs_rdata_cachep;
35 35
36static struct nfs_rw_header *nfs_readhdr_alloc(void) 36static struct nfs_pgio_header *nfs_readhdr_alloc(void)
37{ 37{
38 return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 38 return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
39} 39}
40 40
41static void nfs_readhdr_free(struct nfs_rw_header *rhdr) 41static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
42{ 42{
43 kmem_cache_free(nfs_rdata_cachep, rhdr); 43 kmem_cache_free(nfs_rdata_cachep, rhdr);
44} 44}
@@ -115,12 +115,6 @@ static void nfs_readpage_release(struct nfs_page *req)
115 115
116 unlock_page(req->wb_page); 116 unlock_page(req->wb_page);
117 } 117 }
118
119 dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
120 req->wb_context->dentry->d_inode->i_sb->s_id,
121 (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
122 req->wb_bytes,
123 (long long)req_offset(req));
124 nfs_release_request(req); 118 nfs_release_request(req);
125} 119}
126 120
@@ -172,14 +166,15 @@ out:
172 hdr->release(hdr); 166 hdr->release(hdr);
173} 167}
174 168
175static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, 169static void nfs_initiate_read(struct nfs_pgio_header *hdr,
170 struct rpc_message *msg,
176 struct rpc_task_setup *task_setup_data, int how) 171 struct rpc_task_setup *task_setup_data, int how)
177{ 172{
178 struct inode *inode = data->header->inode; 173 struct inode *inode = hdr->inode;
179 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 174 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
180 175
181 task_setup_data->flags |= swap_flags; 176 task_setup_data->flags |= swap_flags;
182 NFS_PROTO(inode)->read_setup(data, msg); 177 NFS_PROTO(inode)->read_setup(hdr, msg);
183} 178}
184 179
185static void 180static void
@@ -203,14 +198,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
203 * This is the callback from RPC telling us whether a reply was 198 * This is the callback from RPC telling us whether a reply was
204 * received or some error occurred (timeout or socket shutdown). 199 * received or some error occurred (timeout or socket shutdown).
205 */ 200 */
206static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, 201static int nfs_readpage_done(struct rpc_task *task,
202 struct nfs_pgio_header *hdr,
207 struct inode *inode) 203 struct inode *inode)
208{ 204{
209 int status = NFS_PROTO(inode)->read_done(task, data); 205 int status = NFS_PROTO(inode)->read_done(task, hdr);
210 if (status != 0) 206 if (status != 0)
211 return status; 207 return status;
212 208
213 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); 209 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
214 210
215 if (task->tk_status == -ESTALE) { 211 if (task->tk_status == -ESTALE) {
216 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); 212 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
@@ -219,34 +215,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
219 return 0; 215 return 0;
220} 216}
221 217
222static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) 218static void nfs_readpage_retry(struct rpc_task *task,
219 struct nfs_pgio_header *hdr)
223{ 220{
224 struct nfs_pgio_args *argp = &data->args; 221 struct nfs_pgio_args *argp = &hdr->args;
225 struct nfs_pgio_res *resp = &data->res; 222 struct nfs_pgio_res *resp = &hdr->res;
226 223
227 /* This is a short read! */ 224 /* This is a short read! */
228 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); 225 nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
229 /* Has the server at least made some progress? */ 226 /* Has the server at least made some progress? */
230 if (resp->count == 0) { 227 if (resp->count == 0) {
231 nfs_set_pgio_error(data->header, -EIO, argp->offset); 228 nfs_set_pgio_error(hdr, -EIO, argp->offset);
232 return; 229 return;
233 } 230 }
234 /* Yes, so retry the read at the end of the data */ 231 /* Yes, so retry the read at the end of the hdr */
235 data->mds_offset += resp->count; 232 hdr->mds_offset += resp->count;
236 argp->offset += resp->count; 233 argp->offset += resp->count;
237 argp->pgbase += resp->count; 234 argp->pgbase += resp->count;
238 argp->count -= resp->count; 235 argp->count -= resp->count;
239 rpc_restart_call_prepare(task); 236 rpc_restart_call_prepare(task);
240} 237}
241 238
242static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) 239static void nfs_readpage_result(struct rpc_task *task,
240 struct nfs_pgio_header *hdr)
243{ 241{
244 struct nfs_pgio_header *hdr = data->header; 242 if (hdr->res.eof) {
245
246 if (data->res.eof) {
247 loff_t bound; 243 loff_t bound;
248 244
249 bound = data->args.offset + data->res.count; 245 bound = hdr->args.offset + hdr->res.count;
250 spin_lock(&hdr->lock); 246 spin_lock(&hdr->lock);
251 if (bound < hdr->io_start + hdr->good_bytes) { 247 if (bound < hdr->io_start + hdr->good_bytes) {
252 set_bit(NFS_IOHDR_EOF, &hdr->flags); 248 set_bit(NFS_IOHDR_EOF, &hdr->flags);
@@ -254,8 +250,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat
254 hdr->good_bytes = bound - hdr->io_start; 250 hdr->good_bytes = bound - hdr->io_start;
255 } 251 }
256 spin_unlock(&hdr->lock); 252 spin_unlock(&hdr->lock);
257 } else if (data->res.count != data->args.count) 253 } else if (hdr->res.count != hdr->args.count)
258 nfs_readpage_retry(task, data); 254 nfs_readpage_retry(task, hdr);
259} 255}
260 256
261/* 257/*
@@ -404,7 +400,7 @@ out:
404int __init nfs_init_readpagecache(void) 400int __init nfs_init_readpagecache(void)
405{ 401{
406 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 402 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
407 sizeof(struct nfs_rw_header), 403 sizeof(struct nfs_pgio_header),
408 0, SLAB_HWCACHE_ALIGN, 404 0, SLAB_HWCACHE_ALIGN,
409 NULL); 405 NULL);
410 if (nfs_rdata_cachep == NULL) 406 if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 084af1060d79..e4499d5b51e8 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1027,8 +1027,7 @@ static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
1027 rpc_authflavor_t flavor) 1027 rpc_authflavor_t flavor)
1028{ 1028{
1029 unsigned int i; 1029 unsigned int i;
1030 unsigned int max_flavor_len = (sizeof(auth_info->flavors) / 1030 unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
1031 sizeof(auth_info->flavors[0]));
1032 1031
1033 /* make sure this flavor isn't already in the list */ 1032 /* make sure this flavor isn't already in the list */
1034 for (i = 0; i < auth_info->flavor_len; i++) { 1033 for (i = 0; i < auth_info->flavor_len; i++) {
@@ -2180,7 +2179,7 @@ out_no_address:
2180 return -EINVAL; 2179 return -EINVAL;
2181} 2180}
2182 2181
2183#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ 2182#define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
2184 | NFS_MOUNT_SECURE \ 2183 | NFS_MOUNT_SECURE \
2185 | NFS_MOUNT_TCP \ 2184 | NFS_MOUNT_TCP \
2186 | NFS_MOUNT_VER3 \ 2185 | NFS_MOUNT_VER3 \
@@ -2188,15 +2187,16 @@ out_no_address:
2188 | NFS_MOUNT_NONLM \ 2187 | NFS_MOUNT_NONLM \
2189 | NFS_MOUNT_BROKEN_SUID \ 2188 | NFS_MOUNT_BROKEN_SUID \
2190 | NFS_MOUNT_STRICTLOCK \ 2189 | NFS_MOUNT_STRICTLOCK \
2191 | NFS_MOUNT_UNSHARED \
2192 | NFS_MOUNT_NORESVPORT \
2193 | NFS_MOUNT_LEGACY_INTERFACE) 2190 | NFS_MOUNT_LEGACY_INTERFACE)
2194 2191
2192#define NFS_MOUNT_CMP_FLAGMASK (NFS_REMOUNT_CMP_FLAGMASK & \
2193 ~(NFS_MOUNT_UNSHARED | NFS_MOUNT_NORESVPORT))
2194
2195static int 2195static int
2196nfs_compare_remount_data(struct nfs_server *nfss, 2196nfs_compare_remount_data(struct nfs_server *nfss,
2197 struct nfs_parsed_mount_data *data) 2197 struct nfs_parsed_mount_data *data)
2198{ 2198{
2199 if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK || 2199 if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
2200 data->rsize != nfss->rsize || 2200 data->rsize != nfss->rsize ||
2201 data->wsize != nfss->wsize || 2201 data->wsize != nfss->wsize ||
2202 data->version != nfss->nfs_client->rpc_ops->version || 2202 data->version != nfss->nfs_client->rpc_ops->version ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 962c9ee758be..e3b5cf28bdc5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -47,6 +47,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
47static const struct nfs_commit_completion_ops nfs_commit_completion_ops; 47static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48static const struct nfs_rw_ops nfs_rw_write_ops; 48static const struct nfs_rw_ops nfs_rw_write_ops;
49static void nfs_clear_request_commit(struct nfs_page *req); 49static void nfs_clear_request_commit(struct nfs_page *req);
50static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
51 struct inode *inode);
50 52
51static struct kmem_cache *nfs_wdata_cachep; 53static struct kmem_cache *nfs_wdata_cachep;
52static mempool_t *nfs_wdata_mempool; 54static mempool_t *nfs_wdata_mempool;
@@ -71,18 +73,18 @@ void nfs_commit_free(struct nfs_commit_data *p)
71} 73}
72EXPORT_SYMBOL_GPL(nfs_commit_free); 74EXPORT_SYMBOL_GPL(nfs_commit_free);
73 75
74static struct nfs_rw_header *nfs_writehdr_alloc(void) 76static struct nfs_pgio_header *nfs_writehdr_alloc(void)
75{ 77{
76 struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); 78 struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
77 79
78 if (p) 80 if (p)
79 memset(p, 0, sizeof(*p)); 81 memset(p, 0, sizeof(*p));
80 return p; 82 return p;
81} 83}
82 84
83static void nfs_writehdr_free(struct nfs_rw_header *whdr) 85static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
84{ 86{
85 mempool_free(whdr, nfs_wdata_mempool); 87 mempool_free(hdr, nfs_wdata_mempool);
86} 88}
87 89
88static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 90static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -93,6 +95,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
93} 95}
94 96
95/* 97/*
98 * nfs_page_search_commits_for_head_request_locked
99 *
100 * Search through commit lists on @inode for the head request for @page.
101 * Must be called while holding the inode (which is cinfo) lock.
102 *
103 * Returns the head request if found, or NULL if not found.
104 */
105static struct nfs_page *
106nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
107 struct page *page)
108{
109 struct nfs_page *freq, *t;
110 struct nfs_commit_info cinfo;
111 struct inode *inode = &nfsi->vfs_inode;
112
113 nfs_init_cinfo_from_inode(&cinfo, inode);
114
115 /* search through pnfs commit lists */
116 freq = pnfs_search_commit_reqs(inode, &cinfo, page);
117 if (freq)
118 return freq->wb_head;
119
120 /* Linearly search the commit list for the correct request */
121 list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
122 if (freq->wb_page == page)
123 return freq->wb_head;
124 }
125
126 return NULL;
127}
128
129/*
96 * nfs_page_find_head_request_locked - find head request associated with @page 130 * nfs_page_find_head_request_locked - find head request associated with @page
97 * 131 *
98 * must be called while holding the inode lock. 132 * must be called while holding the inode lock.
@@ -106,21 +140,12 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
106 140
107 if (PagePrivate(page)) 141 if (PagePrivate(page))
108 req = (struct nfs_page *)page_private(page); 142 req = (struct nfs_page *)page_private(page);
109 else if (unlikely(PageSwapCache(page))) { 143 else if (unlikely(PageSwapCache(page)))
110 struct nfs_page *freq, *t; 144 req = nfs_page_search_commits_for_head_request_locked(nfsi,
111 145 page);
112 /* Linearly search the commit list for the correct req */
113 list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
114 if (freq->wb_page == page) {
115 req = freq->wb_head;
116 break;
117 }
118 }
119 }
120 146
121 if (req) { 147 if (req) {
122 WARN_ON_ONCE(req->wb_head != req); 148 WARN_ON_ONCE(req->wb_head != req);
123
124 kref_get(&req->wb_kref); 149 kref_get(&req->wb_kref);
125 } 150 }
126 151
@@ -216,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
216 unsigned int pos = 0; 241 unsigned int pos = 0;
217 unsigned int len = nfs_page_length(req->wb_page); 242 unsigned int len = nfs_page_length(req->wb_page);
218 243
219 nfs_page_group_lock(req); 244 nfs_page_group_lock(req, true);
220 245
221 do { 246 do {
222 tmp = nfs_page_group_search_locked(req->wb_head, pos); 247 tmp = nfs_page_group_search_locked(req->wb_head, pos);
@@ -379,8 +404,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
379 subreq->wb_head = subreq; 404 subreq->wb_head = subreq;
380 subreq->wb_this_page = subreq; 405 subreq->wb_this_page = subreq;
381 406
382 nfs_clear_request_commit(subreq);
383
384 /* subreq is now totally disconnected from page group or any 407 /* subreq is now totally disconnected from page group or any
385 * write / commit lists. last chance to wake any waiters */ 408 * write / commit lists. last chance to wake any waiters */
386 nfs_unlock_request(subreq); 409 nfs_unlock_request(subreq);
@@ -456,7 +479,9 @@ try_again:
456 } 479 }
457 480
458 /* lock each request in the page group */ 481 /* lock each request in the page group */
459 nfs_page_group_lock(head); 482 ret = nfs_page_group_lock(head, false);
483 if (ret < 0)
484 return ERR_PTR(ret);
460 subreq = head; 485 subreq = head;
461 do { 486 do {
462 /* 487 /*
@@ -488,7 +513,7 @@ try_again:
488 * Commit list removal accounting is done after locks are dropped */ 513 * Commit list removal accounting is done after locks are dropped */
489 subreq = head; 514 subreq = head;
490 do { 515 do {
491 nfs_list_remove_request(subreq); 516 nfs_clear_request_commit(subreq);
492 subreq = subreq->wb_this_page; 517 subreq = subreq->wb_this_page;
493 } while (subreq != head); 518 } while (subreq != head);
494 519
@@ -518,15 +543,11 @@ try_again:
518 543
519 nfs_page_group_unlock(head); 544 nfs_page_group_unlock(head);
520 545
521 /* drop lock to clear_request_commit the head req and clean up 546 /* drop lock to clean uprequests on destroy list */
522 * requests on destroy list */
523 spin_unlock(&inode->i_lock); 547 spin_unlock(&inode->i_lock);
524 548
525 nfs_destroy_unlinked_subrequests(destroy_list, head); 549 nfs_destroy_unlinked_subrequests(destroy_list, head);
526 550
527 /* clean up commit list state */
528 nfs_clear_request_commit(head);
529
530 /* still holds ref on head from nfs_page_find_head_request_locked 551 /* still holds ref on head from nfs_page_find_head_request_locked
531 * and still has lock on head from lock loop */ 552 * and still has lock on head from lock loop */
532 return head; 553 return head;
@@ -705,6 +726,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
705 726
706 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) 727 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
707 nfs_release_request(req); 728 nfs_release_request(req);
729 else
730 WARN_ON_ONCE(1);
708} 731}
709 732
710static void 733static void
@@ -808,6 +831,7 @@ nfs_clear_page_commit(struct page *page)
808 dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); 831 dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE);
809} 832}
810 833
834/* Called holding inode (/cinfo) lock */
811static void 835static void
812nfs_clear_request_commit(struct nfs_page *req) 836nfs_clear_request_commit(struct nfs_page *req)
813{ 837{
@@ -817,20 +841,17 @@ nfs_clear_request_commit(struct nfs_page *req)
817 841
818 nfs_init_cinfo_from_inode(&cinfo, inode); 842 nfs_init_cinfo_from_inode(&cinfo, inode);
819 if (!pnfs_clear_request_commit(req, &cinfo)) { 843 if (!pnfs_clear_request_commit(req, &cinfo)) {
820 spin_lock(cinfo.lock);
821 nfs_request_remove_commit_list(req, &cinfo); 844 nfs_request_remove_commit_list(req, &cinfo);
822 spin_unlock(cinfo.lock);
823 } 845 }
824 nfs_clear_page_commit(req->wb_page); 846 nfs_clear_page_commit(req->wb_page);
825 } 847 }
826} 848}
827 849
828static inline 850int nfs_write_need_commit(struct nfs_pgio_header *hdr)
829int nfs_write_need_commit(struct nfs_pgio_data *data)
830{ 851{
831 if (data->verf.committed == NFS_DATA_SYNC) 852 if (hdr->verf.committed == NFS_DATA_SYNC)
832 return data->header->lseg == NULL; 853 return hdr->lseg == NULL;
833 return data->verf.committed != NFS_FILE_SYNC; 854 return hdr->verf.committed != NFS_FILE_SYNC;
834} 855}
835 856
836#else 857#else
@@ -856,8 +877,7 @@ nfs_clear_request_commit(struct nfs_page *req)
856{ 877{
857} 878}
858 879
859static inline 880int nfs_write_need_commit(struct nfs_pgio_header *hdr)
860int nfs_write_need_commit(struct nfs_pgio_data *data)
861{ 881{
862 return 0; 882 return 0;
863} 883}
@@ -883,11 +903,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
883 nfs_context_set_write_error(req->wb_context, hdr->error); 903 nfs_context_set_write_error(req->wb_context, hdr->error);
884 goto remove_req; 904 goto remove_req;
885 } 905 }
886 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { 906 if (nfs_write_need_commit(hdr)) {
887 nfs_mark_request_dirty(req);
888 goto next;
889 }
890 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
891 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); 907 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
892 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 908 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
893 goto next; 909 goto next;
@@ -1038,9 +1054,9 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
1038 else 1054 else
1039 req->wb_bytes = rqend - req->wb_offset; 1055 req->wb_bytes = rqend - req->wb_offset;
1040out_unlock: 1056out_unlock:
1041 spin_unlock(&inode->i_lock);
1042 if (req) 1057 if (req)
1043 nfs_clear_request_commit(req); 1058 nfs_clear_request_commit(req);
1059 spin_unlock(&inode->i_lock);
1044 return req; 1060 return req;
1045out_flushme: 1061out_flushme:
1046 spin_unlock(&inode->i_lock); 1062 spin_unlock(&inode->i_lock);
@@ -1241,17 +1257,18 @@ static int flush_task_priority(int how)
1241 return RPC_PRIORITY_NORMAL; 1257 return RPC_PRIORITY_NORMAL;
1242} 1258}
1243 1259
1244static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, 1260static void nfs_initiate_write(struct nfs_pgio_header *hdr,
1261 struct rpc_message *msg,
1245 struct rpc_task_setup *task_setup_data, int how) 1262 struct rpc_task_setup *task_setup_data, int how)
1246{ 1263{
1247 struct inode *inode = data->header->inode; 1264 struct inode *inode = hdr->inode;
1248 int priority = flush_task_priority(how); 1265 int priority = flush_task_priority(how);
1249 1266
1250 task_setup_data->priority = priority; 1267 task_setup_data->priority = priority;
1251 NFS_PROTO(inode)->write_setup(data, msg); 1268 NFS_PROTO(inode)->write_setup(hdr, msg);
1252 1269
1253 nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, 1270 nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
1254 &task_setup_data->rpc_client, msg, data); 1271 &task_setup_data->rpc_client, msg, hdr);
1255} 1272}
1256 1273
1257/* If a nfs_flush_* function fails, it should remove reqs from @head and 1274/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1313,21 +1330,9 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1313 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); 1330 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1314} 1331}
1315 1332
1316static void nfs_writeback_release_common(struct nfs_pgio_data *data) 1333static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
1317{ 1334{
1318 struct nfs_pgio_header *hdr = data->header; 1335 /* do nothing! */
1319 int status = data->task.tk_status;
1320
1321 if ((status >= 0) && nfs_write_need_commit(data)) {
1322 spin_lock(&hdr->lock);
1323 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
1324 ; /* Do nothing */
1325 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1326 memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
1327 else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
1328 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
1329 spin_unlock(&hdr->lock);
1330 }
1331} 1336}
1332 1337
1333/* 1338/*
@@ -1358,7 +1363,8 @@ static int nfs_should_remove_suid(const struct inode *inode)
1358/* 1363/*
1359 * This function is called when the WRITE call is complete. 1364 * This function is called when the WRITE call is complete.
1360 */ 1365 */
1361static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, 1366static int nfs_writeback_done(struct rpc_task *task,
1367 struct nfs_pgio_header *hdr,
1362 struct inode *inode) 1368 struct inode *inode)
1363{ 1369{
1364 int status; 1370 int status;
@@ -1370,13 +1376,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1370 * another writer had changed the file, but some applications 1376 * another writer had changed the file, but some applications
1371 * depend on tighter cache coherency when writing. 1377 * depend on tighter cache coherency when writing.
1372 */ 1378 */
1373 status = NFS_PROTO(inode)->write_done(task, data); 1379 status = NFS_PROTO(inode)->write_done(task, hdr);
1374 if (status != 0) 1380 if (status != 0)
1375 return status; 1381 return status;
1376 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); 1382 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
1377 1383
1378#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) 1384#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
1379 if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { 1385 if (hdr->res.verf->committed < hdr->args.stable &&
1386 task->tk_status >= 0) {
1380 /* We tried a write call, but the server did not 1387 /* We tried a write call, but the server did not
1381 * commit data to stable storage even though we 1388 * commit data to stable storage even though we
1382 * requested it. 1389 * requested it.
@@ -1392,7 +1399,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1392 dprintk("NFS: faulty NFS server %s:" 1399 dprintk("NFS: faulty NFS server %s:"
1393 " (committed = %d) != (stable = %d)\n", 1400 " (committed = %d) != (stable = %d)\n",
1394 NFS_SERVER(inode)->nfs_client->cl_hostname, 1401 NFS_SERVER(inode)->nfs_client->cl_hostname,
1395 data->res.verf->committed, data->args.stable); 1402 hdr->res.verf->committed, hdr->args.stable);
1396 complain = jiffies + 300 * HZ; 1403 complain = jiffies + 300 * HZ;
1397 } 1404 }
1398 } 1405 }
@@ -1407,16 +1414,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1407/* 1414/*
1408 * This function is called when the WRITE call is complete. 1415 * This function is called when the WRITE call is complete.
1409 */ 1416 */
1410static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) 1417static void nfs_writeback_result(struct rpc_task *task,
1418 struct nfs_pgio_header *hdr)
1411{ 1419{
1412 struct nfs_pgio_args *argp = &data->args; 1420 struct nfs_pgio_args *argp = &hdr->args;
1413 struct nfs_pgio_res *resp = &data->res; 1421 struct nfs_pgio_res *resp = &hdr->res;
1414 1422
1415 if (resp->count < argp->count) { 1423 if (resp->count < argp->count) {
1416 static unsigned long complain; 1424 static unsigned long complain;
1417 1425
1418 /* This a short write! */ 1426 /* This a short write! */
1419 nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); 1427 nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
1420 1428
1421 /* Has the server at least made some progress? */ 1429 /* Has the server at least made some progress? */
1422 if (resp->count == 0) { 1430 if (resp->count == 0) {
@@ -1426,14 +1434,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da
1426 argp->count); 1434 argp->count);
1427 complain = jiffies + 300 * HZ; 1435 complain = jiffies + 300 * HZ;
1428 } 1436 }
1429 nfs_set_pgio_error(data->header, -EIO, argp->offset); 1437 nfs_set_pgio_error(hdr, -EIO, argp->offset);
1430 task->tk_status = -EIO; 1438 task->tk_status = -EIO;
1431 return; 1439 return;
1432 } 1440 }
1433 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1441 /* Was this an NFSv2 write or an NFSv3 stable write? */
1434 if (resp->verf->committed != NFS_UNSTABLE) { 1442 if (resp->verf->committed != NFS_UNSTABLE) {
1435 /* Resend from where the server left off */ 1443 /* Resend from where the server left off */
1436 data->mds_offset += resp->count; 1444 hdr->mds_offset += resp->count;
1437 argp->offset += resp->count; 1445 argp->offset += resp->count;
1438 argp->pgbase += resp->count; 1446 argp->pgbase += resp->count;
1439 argp->count -= resp->count; 1447 argp->count -= resp->count;
@@ -1884,7 +1892,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1884int __init nfs_init_writepagecache(void) 1892int __init nfs_init_writepagecache(void)
1885{ 1893{
1886 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1894 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1887 sizeof(struct nfs_rw_header), 1895 sizeof(struct nfs_pgio_header),
1888 0, SLAB_HWCACHE_ALIGN, 1896 0, SLAB_HWCACHE_ALIGN,
1889 NULL); 1897 NULL);
1890 if (nfs_wdata_cachep == NULL) 1898 if (nfs_wdata_cachep == NULL)
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index ed628f71274c..538f142935ea 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -30,9 +30,6 @@
30 30
31MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
32 32
33EXPORT_SYMBOL_GPL(nfsacl_encode);
34EXPORT_SYMBOL_GPL(nfsacl_decode);
35
36struct nfsacl_encode_desc { 33struct nfsacl_encode_desc {
37 struct xdr_array2_desc desc; 34 struct xdr_array2_desc desc;
38 unsigned int count; 35 unsigned int count;
@@ -136,6 +133,7 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
136 nfsacl_desc.desc.array_len; 133 nfsacl_desc.desc.array_len;
137 return err; 134 return err;
138} 135}
136EXPORT_SYMBOL_GPL(nfsacl_encode);
139 137
140struct nfsacl_decode_desc { 138struct nfsacl_decode_desc {
141 struct xdr_array2_desc desc; 139 struct xdr_array2_desc desc;
@@ -295,3 +293,4 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
295 return 8 + nfsacl_desc.desc.elem_size * 293 return 8 + nfsacl_desc.desc.elem_size *
296 nfsacl_desc.desc.array_len; 294 nfsacl_desc.desc.array_len;
297} 295}
296EXPORT_SYMBOL_GPL(nfsacl_decode);